[llvm] c4f32b4 - [AArch64] Tests for postinc scheduling write operands. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 7 13:31:20 PDT 2023
Author: David Green
Date: 2023-10-07T21:31:08+01:00
New Revision: c4f32b4bda8e536bdd539762785805dbe42f0852
URL: https://github.com/llvm/llvm-project/commit/c4f32b4bda8e536bdd539762785805dbe42f0852
DIFF: https://github.com/llvm/llvm-project/commit/c4f32b4bda8e536bdd539762785805dbe42f0852.diff
LOG: [AArch64] Tests for postinc scheduling write operands. NFC
Add similar tests to D159254 for other CPU targets.
Added:
llvm/test/tools/llvm-mca/AArch64/Cortex/A510-writeback.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A53-writeback.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-writeback.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A57-writeback.s
llvm/test/tools/llvm-mca/AArch64/Neoverse/N1-writeback.s
llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
Modified:
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-writeback.s
new file mode 100644
index 000000000000000..f9b4509531d35cb
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-writeback.s
@@ -0,0 +1,5285 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a510 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1 { v1.2d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2s }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4h }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+add x0, x27, 1
+ld1 { v1.8h }, [x27], #16
+add x0, x27, 1
+ld1 { v1.16b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+ld1r { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.4h }, [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.8b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.8h }, [x27], #2
+add x0, x27, 1
+ld1r { v1.16b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+add x0, x27, 1
+ld1r { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+ld2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], #4
+add x0, x27, 1
+ld2r { v1.4s, v2.4s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld2r { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+add x0, x27, 1
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ldp s1, s2, [x27], #248
+add x0, x27, 1
+ldp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+add x0, x27, 1
+ldp s1, s2, [x27, #248]!
+add x0, x27, 1
+ldp d1, d2, [x27, #496]!
+add x0, x27, 1
+ldp q1, q2, [x27, #992]!
+add x0, x27, 1
+ldp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+add x0, x27, 1
+ldp w1, w2, [x27, #248]!
+add x0, x27, 1
+ldp x1, x2, [x27, #496]!
+add x0, x27, 1
+ldpsw x1, x2, [x27], #248
+add x0, x27, 1
+ldpsw x1, x2, [x27, #248]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+add x0, x27, 1
+ldr h1, [x27], #254
+add x0, x27, 1
+ldr s1, [x27], #254
+add x0, x27, 1
+ldr d1, [x27], #254
+add x0, x27, 1
+ldr q1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+add x0, x27, 1
+ldr h1, [x27, #254]!
+add x0, x27, 1
+ldr s1, [x27, #254]!
+add x0, x27, 1
+ldr d1, [x27, #254]!
+add x0, x27, 1
+ldr q1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+add x0, x27, 1
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr w1, [x27, #254]!
+add x0, x27, 1
+ldr x1, [x27, #254]!
+add x0, x27, 1
+ldrb w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+add x0, x27, 1
+ldrh w1, [x27], #254
+add x0, x27, 1
+ldrh w1, [x27, #254]!
+add x0, x27, 1
+ldrsb w1, [x27], #254
+add x0, x27, 1
+ldrsb x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+add x0, x27, 1
+ldrsb x1, [x27, #254]!
+add x0, x27, 1
+ldrsh w1, [x27], #254
+add x0, x27, 1
+ldrsh x1, [x27], #254
+add x0, x27, 1
+ldrsh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+add x0, x27, 1
+ldrsw x1, [x27], #254
+add x0, x27, 1
+ldrsw x1, [x27, #254]!
+add x0, x27, 1
+st1 { v1.1d }, [x27], #8
+add x0, x27, 1
+st1 { v1.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+add x0, x27, 1
+st1 { v1.4h }, [x27], #8
+add x0, x27, 1
+st1 { v1.4s }, [x27], #16
+add x0, x27, 1
+st1 { v1.8b }, [x27], #8
+add x0, x27, 1
+st1 { v1.8h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+add x0, x27, 1
+st1 { v1.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+st1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+st1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stp s1, s2, [x27], #248
+add x0, x27, 1
+stp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+add x0, x27, 1
+stp s1, s2, [x27, #248]!
+add x0, x27, 1
+stp d1, d2, [x27, #496]!
+add x0, x27, 1
+stp q1, q2, [x27, #992]!
+add x0, x27, 1
+stp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+add x0, x27, 1
+stp w1, w2, [x27, #248]!
+add x0, x27, 1
+stp x1, x2, [x27, #496]!
+add x0, x27, 1
+str b1, [x27], #254
+add x0, x27, 1
+str h1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+add x0, x27, 1
+str d1, [x27], #254
+add x0, x27, 1
+str q1, [x27], #254
+add x0, x27, 1
+str b1, [x27, #254]!
+add x0, x27, 1
+str h1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+add x0, x27, 1
+str d1, [x27, #254]!
+add x0, x27, 1
+str q1, [x27, #254]!
+add x0, x27, 1
+str w1, [x27], #254
+add x0, x27, 1
+str x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+add x0, x27, 1
+str x1, [x27, #254]!
+add x0, x27, 1
+strb w1, [x27], #254
+add x0, x27, 1
+strb w1, [x27, #254]!
+add x0, x27, 1
+strh w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr x2, [x1], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2201
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeE . . . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] . DE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,3] . . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] . . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . .DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2301
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.65
+# CHECK-NEXT: IPC: 0.43
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeE . . . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] . DE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,3] . . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] . . . DE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2201
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeE . . . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . .DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2401
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.42
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeE. ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeE . . . . . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeeE. . . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3301
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.45
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . .DE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . .DE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeE . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . .DeeeeeE. ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2901
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.52
+# CHECK-NEXT: IPC: 0.34
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeeE . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . .DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeE . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeE. ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2701
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.56
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeE . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . .DE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,5] . . . .DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeE . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeE. ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . .DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2601
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.58
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeE. . . . .. ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . .. ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . .. ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. .. ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2801
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.54
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeE . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeE . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,7] . . . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeE. ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] . . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3301
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.45
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeeE . . . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] . . .DE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeE . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . .DeeeeeE. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] . .DE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] . . . DE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeE . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] . .DE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeE . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2101
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.81
+# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: Block RThroughput: 5.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . .. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. .. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE .. ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeE ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1501
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.67
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeE . . . ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . .DeeE. . ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] . . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . DeeE . ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . . .DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . DeeE ldp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1501
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.67
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeE . . . ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . .DeeE. . ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] . . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . DeeE . ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,7] . . .DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . DeeE ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: [0,9] . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeE . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeE. . ldr h1, [x27], #254
+# CHECK-NEXT: [0,3] . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeE . ldr s1, [x27], #254
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeE . ldr d1, [x27], #254
+# CHECK-NEXT: [0,7] . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeE ldr q1, [x27], #254
+# CHECK-NEXT: [0,9] . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeE . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeE. . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeE . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeE . ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeE ldr q1, [x27, #254]!
+# CHECK-NEXT: [0,9] . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeE . . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeE. . ldr x1, [x27], #254
+# CHECK-NEXT: [0,3] . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeE . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeE . ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeE ldrb w1, [x27], #254
+# CHECK-NEXT: [0,9] . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeE . . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeE. . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,3] . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeE . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeE . ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,7] . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeE ldrsb x1, [x27], #254
+# CHECK-NEXT: [0,9] . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeE . . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeE. . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeE . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeE . ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,7] . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeE ldrsh w1, [x27, #254]!
+# CHECK-NEXT: [0,9] . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1401
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.07
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeE . . . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeE. . . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,3] . DE. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeE . . ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,5] . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DeeeE. . st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,7] . . DE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeeeE. st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2002
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2002
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2002
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2002
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2002
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2002
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 18.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2202
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 11.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] . . . DE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] . . . . DE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2002
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,3] . . DE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . .DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE. st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2102
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeE. . . . . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,5] . . . DE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . .DE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeeE. st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . .DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1502
+# CHECK-NEXT: Total uOps: 900
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeE . .. st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,1] . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE .. st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE. st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2502
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] . DE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,3] . . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] . . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE. st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 2002
+# CHECK-NEXT: Total uOps: 1200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeeE . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] . DE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DE . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] . . . DE .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 201
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 1.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012
+
+# CHECK: [0,0] DE. stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,1] DE. add x0, x27, #1
+# CHECK-NEXT: [0,2] .DE stp d1, d2, [x27], #496
+# CHECK-NEXT: [0,3] .DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 2.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345
+
+# CHECK: [0,0] DE . stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] DE . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DE . stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] .DE . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DE. stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . DE. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DE stp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 2.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345
+
+# CHECK: [0,0] DE . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] DE . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DE . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] .DE . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DE. str b1, [x27], #254
+# CHECK-NEXT: [0,7] . DE. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DE str h1, [x27], #254
+# CHECK-NEXT: [0,9] . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 2.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345
+
+# CHECK: [0,0] DE . str s1, [x27], #254
+# CHECK-NEXT: [0,1] DE . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DE . str d1, [x27], #254
+# CHECK-NEXT: [0,3] .DE . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . str q1, [x27], #254
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DE. str b1, [x27, #254]!
+# CHECK-NEXT: [0,7] . DE. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DE str h1, [x27, #254]!
+# CHECK-NEXT: [0,9] . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 2.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345
+
+# CHECK: [0,0] DE . str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] DE . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DE . str d1, [x27, #254]!
+# CHECK-NEXT: [0,3] .DE . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . str q1, [x27, #254]!
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DE. str w1, [x27], #254
+# CHECK-NEXT: [0,7] . DE. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DE str x1, [x27], #254
+# CHECK-NEXT: [0,9] . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 2.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345
+
+# CHECK: [0,0] DE . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] DE . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DE . str x1, [x27, #254]!
+# CHECK-NEXT: [0,3] .DE . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . strb w1, [x27], #254
+# CHECK-NEXT: [0,5] . DE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DE. strb w1, [x27, #254]!
+# CHECK-NEXT: [0,7] . DE. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DE strh w1, [x27], #254
+# CHECK-NEXT: [0,9] . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 101
+# CHECK-NEXT: Total uOps: 300
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.97
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01
+
+# CHECK: [0,0] DE strh w1, [x27, #254]!
+# CHECK-NEXT: [0,1] DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 strh w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 401
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234
+
+# CHECK: [0,0] DeE . ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] .DE . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeE ldr x2, [x1], #254
+# CHECK-NEXT: [0,3] . DE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A53-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A53-writeback.s
new file mode 100644
index 000000000000000..c5ca6f9f1764aa0
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A53-writeback.s
@@ -0,0 +1,5290 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a53 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1 { v1.2d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2s }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4h }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+add x0, x27, 1
+ld1 { v1.8h }, [x27], #16
+add x0, x27, 1
+ld1 { v1.16b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+ld1r { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.4h }, [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.8b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.8h }, [x27], #2
+add x0, x27, 1
+ld1r { v1.16b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+add x0, x27, 1
+ld1r { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+ld2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], #4
+add x0, x27, 1
+ld2r { v1.4s, v2.4s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld2r { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+add x0, x27, 1
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ldp s1, s2, [x27], #248
+add x0, x27, 1
+ldp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+add x0, x27, 1
+ldp s1, s2, [x27, #248]!
+add x0, x27, 1
+ldp d1, d2, [x27, #496]!
+add x0, x27, 1
+ldp q1, q2, [x27, #992]!
+add x0, x27, 1
+ldp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+add x0, x27, 1
+ldp w1, w2, [x27, #248]!
+add x0, x27, 1
+ldp x1, x2, [x27, #496]!
+add x0, x27, 1
+ldpsw x1, x2, [x27], #248
+add x0, x27, 1
+ldpsw x1, x2, [x27, #248]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+add x0, x27, 1
+ldr h1, [x27], #254
+add x0, x27, 1
+ldr s1, [x27], #254
+add x0, x27, 1
+ldr d1, [x27], #254
+add x0, x27, 1
+ldr q1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+add x0, x27, 1
+ldr h1, [x27, #254]!
+add x0, x27, 1
+ldr s1, [x27, #254]!
+add x0, x27, 1
+ldr d1, [x27, #254]!
+add x0, x27, 1
+ldr q1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+add x0, x27, 1
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr w1, [x27, #254]!
+add x0, x27, 1
+ldr x1, [x27, #254]!
+add x0, x27, 1
+ldrb w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+add x0, x27, 1
+ldrh w1, [x27], #254
+add x0, x27, 1
+ldrh w1, [x27, #254]!
+add x0, x27, 1
+ldrsb w1, [x27], #254
+add x0, x27, 1
+ldrsb x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+add x0, x27, 1
+ldrsb x1, [x27, #254]!
+add x0, x27, 1
+ldrsh w1, [x27], #254
+add x0, x27, 1
+ldrsh x1, [x27], #254
+add x0, x27, 1
+ldrsh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+add x0, x27, 1
+ldrsw x1, [x27], #254
+add x0, x27, 1
+ldrsw x1, [x27, #254]!
+add x0, x27, 1
+st1 { v1.1d }, [x27], #8
+add x0, x27, 1
+st1 { v1.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+add x0, x27, 1
+st1 { v1.4h }, [x27], #8
+add x0, x27, 1
+st1 { v1.4s }, [x27], #16
+add x0, x27, 1
+st1 { v1.8b }, [x27], #8
+add x0, x27, 1
+st1 { v1.8h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+add x0, x27, 1
+st1 { v1.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+st1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+st1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stp s1, s2, [x27], #248
+add x0, x27, 1
+stp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+add x0, x27, 1
+stp s1, s2, [x27, #248]!
+add x0, x27, 1
+stp d1, d2, [x27, #496]!
+add x0, x27, 1
+stp q1, q2, [x27, #992]!
+add x0, x27, 1
+stp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+add x0, x27, 1
+stp w1, w2, [x27, #248]!
+add x0, x27, 1
+stp x1, x2, [x27, #496]!
+add x0, x27, 1
+str b1, [x27], #254
+add x0, x27, 1
+str h1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+add x0, x27, 1
+str d1, [x27], #254
+add x0, x27, 1
+str q1, [x27], #254
+add x0, x27, 1
+str b1, [x27, #254]!
+add x0, x27, 1
+str h1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+add x0, x27, 1
+str d1, [x27, #254]!
+add x0, x27, 1
+str q1, [x27, #254]!
+add x0, x27, 1
+str w1, [x27], #254
+add x0, x27, 1
+str x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+add x0, x27, 1
+str x1, [x27, #254]!
+add x0, x27, 1
+strb w1, [x27], #254
+add x0, x27, 1
+strb w1, [x27, #254]!
+add x0, x27, 1
+strh w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr x2, [x1], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3901
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.38
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE . . . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . .DeeeeE . ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4301
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 13.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeE . . . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeeE . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeeE . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeeE . ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 15.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345
+
+# CHECK: [0,0] DeeeeeE . . . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeE. . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeE . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . .DeeeeeE . ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 15.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345
+
+# CHECK: [0,0] DeeeeeE . . . . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeE. . . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeE . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . .DeeeeeE . ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4701
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.32
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 17.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeE . . . . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeE. . . . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE. . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE. . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.30
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.30
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4701
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.32
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 17.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeE. . ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . . . . . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE . . . . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4801
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 18.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4401
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.34
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeE . . . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeeeE . . . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . .DeeeeE . . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeeeE . ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3801
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.39
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE. . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeE . . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeE . . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . .DeeeE . ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3701
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.41
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . . ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. . ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.30
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4901
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 19.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . .DeeE. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4601
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . .. ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . .. ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . .. ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeE . .. ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeE .. ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5101
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.29
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 21.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . . . .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeeE . . . . .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . . .DeeE. . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeeE . . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeeE .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5401
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: Block RThroughput: 24.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeE . . . . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . .DeeeeeeeE. . . . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . . DeeeeeeeE . . . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeE . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeE . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 25.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeE . . . . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . .DeeeeeeeE. . . . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . . DeeeeeeeE . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeeE . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeE . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3201
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.53
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeE . . ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ldp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ldr h1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ldr s1, [x27], #254
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ldr d1, [x27], #254
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ldr q1, [x27], #254
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ldr q1, [x27, #254]!
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ldr x1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ldrb w1, [x27], #254
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ldrsb x1, [x27], #254
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ldrsh w1, [x27, #254]!
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2201
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeE. . . . . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . . ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . DeeeE. . . st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . DeeeE. . st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2703
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.55
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeE . . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeE . st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2603
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.58
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE . st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2703
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.55
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . . st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . .DeeeE . . st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . .DeeeeE . st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2803
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.54
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeE . . st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE . st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2703
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.55
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . .DeeeeE . . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeE . . st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeE . st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 900
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.45
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeE . . . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE. . st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3403
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.44
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . .. st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] . .DeeE. . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . .. st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] . . . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . .. st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] . . . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeE . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeE .. st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 15.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeE . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. . st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3103
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.48
+# CHECK-NEXT: IPC: 0.32
+# CHECK-NEXT: Block RThroughput: 11.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,3] . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE. . . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE . st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3103
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.48
+# CHECK-NEXT: IPC: 0.32
+# CHECK-NEXT: Block RThroughput: 11.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeE . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 15.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeE . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3403
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.44
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . . .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE. . . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeE . .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . .DeeE. .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeE .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3203
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.47
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE. . . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . .DeeeeE . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 2403
+# CHECK-NEXT: Total uOps: 1200
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] . . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 801
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeE. . stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,1] .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE stp d1, d2, [x27], #496
+# CHECK-NEXT: [0,3] . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE stp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . str b1, [x27], #254
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE str h1, [x27], #254
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . str s1, [x27], #254
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . str d1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . str q1, [x27], #254
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . str b1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE str h1, [x27, #254]!
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . str d1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . str q1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . str w1, [x27], #254
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE str x1, [x27], #254
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . str x1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . strb w1, [x27], #254
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . strb w1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE strh w1, [x27], #254
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 401
+# CHECK-NEXT: Total uOps: 300
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234
+
+# CHECK: [0,0] DeeeE strh w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 strh w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 801
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeE. . ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE ldr x2, [x1], #254
+# CHECK-NEXT: [0,3] . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-writeback.s
new file mode 100644
index 000000000000000..76f46ccf0c5cb4a
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-writeback.s
@@ -0,0 +1,5290 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1 { v1.2d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2s }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4h }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+add x0, x27, 1
+ld1 { v1.8h }, [x27], #16
+add x0, x27, 1
+ld1 { v1.16b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+ld1r { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.4h }, [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.8b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.8h }, [x27], #2
+add x0, x27, 1
+ld1r { v1.16b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+add x0, x27, 1
+ld1r { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+ld2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], #4
+add x0, x27, 1
+ld2r { v1.4s, v2.4s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld2r { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+add x0, x27, 1
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ldp s1, s2, [x27], #248
+add x0, x27, 1
+ldp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+add x0, x27, 1
+ldp s1, s2, [x27, #248]!
+add x0, x27, 1
+ldp d1, d2, [x27, #496]!
+add x0, x27, 1
+ldp q1, q2, [x27, #992]!
+add x0, x27, 1
+ldp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+add x0, x27, 1
+ldp w1, w2, [x27, #248]!
+add x0, x27, 1
+ldp x1, x2, [x27, #496]!
+add x0, x27, 1
+ldpsw x1, x2, [x27], #248
+add x0, x27, 1
+ldpsw x1, x2, [x27, #248]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+add x0, x27, 1
+ldr h1, [x27], #254
+add x0, x27, 1
+ldr s1, [x27], #254
+add x0, x27, 1
+ldr d1, [x27], #254
+add x0, x27, 1
+ldr q1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+add x0, x27, 1
+ldr h1, [x27, #254]!
+add x0, x27, 1
+ldr s1, [x27, #254]!
+add x0, x27, 1
+ldr d1, [x27, #254]!
+add x0, x27, 1
+ldr q1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+add x0, x27, 1
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr w1, [x27, #254]!
+add x0, x27, 1
+ldr x1, [x27, #254]!
+add x0, x27, 1
+ldrb w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+add x0, x27, 1
+ldrh w1, [x27], #254
+add x0, x27, 1
+ldrh w1, [x27, #254]!
+add x0, x27, 1
+ldrsb w1, [x27], #254
+add x0, x27, 1
+ldrsb x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+add x0, x27, 1
+ldrsb x1, [x27, #254]!
+add x0, x27, 1
+ldrsh w1, [x27], #254
+add x0, x27, 1
+ldrsh x1, [x27], #254
+add x0, x27, 1
+ldrsh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+add x0, x27, 1
+ldrsw x1, [x27], #254
+add x0, x27, 1
+ldrsw x1, [x27, #254]!
+add x0, x27, 1
+st1 { v1.1d }, [x27], #8
+add x0, x27, 1
+st1 { v1.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+add x0, x27, 1
+st1 { v1.4h }, [x27], #8
+add x0, x27, 1
+st1 { v1.4s }, [x27], #16
+add x0, x27, 1
+st1 { v1.8b }, [x27], #8
+add x0, x27, 1
+st1 { v1.8h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+add x0, x27, 1
+st1 { v1.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+st1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+st1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stp s1, s2, [x27], #248
+add x0, x27, 1
+stp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+add x0, x27, 1
+stp s1, s2, [x27, #248]!
+add x0, x27, 1
+stp d1, d2, [x27, #496]!
+add x0, x27, 1
+stp q1, q2, [x27, #992]!
+add x0, x27, 1
+stp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+add x0, x27, 1
+stp w1, w2, [x27, #248]!
+add x0, x27, 1
+stp x1, x2, [x27, #496]!
+add x0, x27, 1
+str b1, [x27], #254
+add x0, x27, 1
+str h1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+add x0, x27, 1
+str d1, [x27], #254
+add x0, x27, 1
+str q1, [x27], #254
+add x0, x27, 1
+str b1, [x27, #254]!
+add x0, x27, 1
+str h1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+add x0, x27, 1
+str d1, [x27, #254]!
+add x0, x27, 1
+str q1, [x27, #254]!
+add x0, x27, 1
+str w1, [x27], #254
+add x0, x27, 1
+str x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+add x0, x27, 1
+str x1, [x27, #254]!
+add x0, x27, 1
+strb w1, [x27], #254
+add x0, x27, 1
+strb w1, [x27, #254]!
+add x0, x27, 1
+strh w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr x2, [x1], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3701
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.41
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE. . . . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeE . . . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeE. . ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3801
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.39
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeE . . . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeE . ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3701
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.41
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE. . . . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeE . . . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeE. . ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4201
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeE . . . . . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeeeE . . . . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . .DeeeeE . . . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE. . ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4601
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . .. ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . .. ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . .. ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . .. ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeE .. ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4401
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.34
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeE . . . . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . .DeeeeeeE . . . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . .DeeeeE . ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.30
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeE . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeeeE . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . .DeeeeeE . ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5401
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: Block RThroughput: 24.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeE . . . . . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . .DeeeeeE . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeeeE. . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeE . ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5101
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.29
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 21.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . . . .. ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeE . . . . . . .. ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . .DeeeeeE . . . . .. ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeE . . .. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . .DeeE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeeeE .. ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5701
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.26
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 27.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeE . . . . . . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . .DeeeeeeeeE . . . . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeE . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeeeE. . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5801
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.26
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 28.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeeeeeE . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . . . .DeeE. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeE . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeeeE . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5801
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.26
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 28.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeE . . . . . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] . . .DeeE. . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . . DeeeeeeE . . . . . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . . DeeeeeeeeeeE . . . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeE . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . . DeeeeeeE . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5901
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 29.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeE . . . . . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . . .DeeE. . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . . DeeeeeeE . . . . . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . . DeeeeeeeeeeE . . . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeeeeeE. . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . . . DeeeE . ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,9] . . . . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeE. . . . . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . . . . . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . . . . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE . . . . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4801
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 18.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4401
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.34
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeE . . . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeeeE . . . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . .DeeeeE . . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeeeE . ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4201
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeE . . . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . .DeeeeE . . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE. . ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4401
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.34
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeE . . . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . . ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeeeeeE . ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5101
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.29
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 21.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . . . .. ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . .. ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeeeE . . . . .. ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeE . . .. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . . . . .DeeE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeeeE .. ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5401
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: Block RThroughput: 24.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeE. . . . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . . DeeeeeeeeE . . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . . DeeeeeE . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . DeeeeeE. . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeE . ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4901
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 19.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeE . . . . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . .DeeeeeeeeE . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . DeeeeE . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . .DeeeeE . ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] . . . . . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5601
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 26.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . . . . .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeeeE. . . . . . . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . . DeeeeeeE. . . . . .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeE. . . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeeeE .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 6201
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.24
+# CHECK-NEXT: IPC: 0.16
+# CHECK-NEXT: Block RThroughput: 32.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeeeE . . . . . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . . DeeeeeeeeeeE . . . . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeeeeeE. . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . . . DeeeeeeE. . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 6201
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.24
+# CHECK-NEXT: IPC: 0.16
+# CHECK-NEXT: Block RThroughput: 32.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeeeE . . . . . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . . DeeeeeeE . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . . .DeeE. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeeeeeE . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . . . DeeeeeeeeeeE. . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3301
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.51
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeE . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeE . . ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeE ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2601
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.77
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeE . . . .. ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeE . . .. ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeeE .. ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . .DeeeE. ldp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2501
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ldr h1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ldr s1, [x27], #254
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ldr d1, [x27], #254
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ldr q1, [x27], #254
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ldr q1, [x27, #254]!
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ldr x1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ldrb w1, [x27], #254
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ldrsb x1, [x27], #254
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2001
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeE . . . . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeE . ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeE. ldrsh w1, [x27, #254]!
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2201
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeE . . . . . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . . ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . DeeeE. . . st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . DeeeE. . st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 18.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2703
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.55
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 11.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeE . . . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeE . . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeE . st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2603
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.58
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeE. . . . . . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE . st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1803
+# CHECK-NEXT: Total uOps: 900
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeeE . . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,1] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 2403
+# CHECK-NEXT: Total uOps: 1200
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeE . . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] . . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.49
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DE .. stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,1] .DeeE.. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DE .. stp d1, d2, [x27], #496
+# CHECK-NEXT: [0,3] . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DE . . . stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DE . . . stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . . stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DE . . stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DE. . stp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DE . . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DE . . . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DE . . str b1, [x27], #254
+# CHECK-NEXT: [0,7] . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DE. . str h1, [x27], #254
+# CHECK-NEXT: [0,9] . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DE . . . str s1, [x27], #254
+# CHECK-NEXT: [0,1] .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DE . . . str d1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . . str q1, [x27], #254
+# CHECK-NEXT: [0,5] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DE . . str b1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DE. . str h1, [x27, #254]!
+# CHECK-NEXT: [0,9] . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DE . . . str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DE . . . str d1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . . str q1, [x27, #254]!
+# CHECK-NEXT: [0,5] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DE . . str w1, [x27], #254
+# CHECK-NEXT: [0,7] . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DE. . str x1, [x27], #254
+# CHECK-NEXT: [0,9] . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DE . . . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DE . . . str x1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DE . . strb w1, [x27], #254
+# CHECK-NEXT: [0,5] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DE . . strb w1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DE. . strh w1, [x27], #254
+# CHECK-NEXT: [0,9] . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 203
+# CHECK-NEXT: Total uOps: 300
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.48
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234
+
+# CHECK: [0,0] DE . strh w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 strh w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 801
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeE . . ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE. ldr x2, [x1], #254
+# CHECK-NEXT: [0,3] . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A57-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A57-writeback.s
new file mode 100644
index 000000000000000..6993401ffa259a2
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A57-writeback.s
@@ -0,0 +1,5289 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a57 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1 { v1.2d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2s }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4h }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+add x0, x27, 1
+ld1 { v1.8h }, [x27], #16
+add x0, x27, 1
+ld1 { v1.16b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+ld1r { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.4h }, [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.8b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.8h }, [x27], #2
+add x0, x27, 1
+ld1r { v1.16b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+add x0, x27, 1
+ld1r { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+ld2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], #4
+add x0, x27, 1
+ld2r { v1.4s, v2.4s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld2r { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+add x0, x27, 1
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ldp s1, s2, [x27], #248
+add x0, x27, 1
+ldp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+add x0, x27, 1
+ldp s1, s2, [x27, #248]!
+add x0, x27, 1
+ldp d1, d2, [x27, #496]!
+add x0, x27, 1
+ldp q1, q2, [x27, #992]!
+add x0, x27, 1
+ldp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+add x0, x27, 1
+ldp w1, w2, [x27, #248]!
+add x0, x27, 1
+ldp x1, x2, [x27, #496]!
+add x0, x27, 1
+ldpsw x1, x2, [x27], #248
+add x0, x27, 1
+ldpsw x1, x2, [x27, #248]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+add x0, x27, 1
+ldr h1, [x27], #254
+add x0, x27, 1
+ldr s1, [x27], #254
+add x0, x27, 1
+ldr d1, [x27], #254
+add x0, x27, 1
+ldr q1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+add x0, x27, 1
+ldr h1, [x27, #254]!
+add x0, x27, 1
+ldr s1, [x27, #254]!
+add x0, x27, 1
+ldr d1, [x27, #254]!
+add x0, x27, 1
+ldr q1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+add x0, x27, 1
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr w1, [x27, #254]!
+add x0, x27, 1
+ldr x1, [x27, #254]!
+add x0, x27, 1
+ldrb w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+add x0, x27, 1
+ldrh w1, [x27], #254
+add x0, x27, 1
+ldrh w1, [x27, #254]!
+add x0, x27, 1
+ldrsb w1, [x27], #254
+add x0, x27, 1
+ldrsb x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+add x0, x27, 1
+ldrsb x1, [x27, #254]!
+add x0, x27, 1
+ldrsh w1, [x27], #254
+add x0, x27, 1
+ldrsh x1, [x27], #254
+add x0, x27, 1
+ldrsh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+add x0, x27, 1
+ldrsw x1, [x27], #254
+add x0, x27, 1
+ldrsw x1, [x27, #254]!
+add x0, x27, 1
+st1 { v1.1d }, [x27], #8
+add x0, x27, 1
+st1 { v1.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+add x0, x27, 1
+st1 { v1.4h }, [x27], #8
+add x0, x27, 1
+st1 { v1.4s }, [x27], #16
+add x0, x27, 1
+st1 { v1.8b }, [x27], #8
+add x0, x27, 1
+st1 { v1.8h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+add x0, x27, 1
+st1 { v1.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+st1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+st1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stp s1, s2, [x27], #248
+add x0, x27, 1
+stp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+add x0, x27, 1
+stp s1, s2, [x27, #248]!
+add x0, x27, 1
+stp d1, d2, [x27, #496]!
+add x0, x27, 1
+stp q1, q2, [x27, #992]!
+add x0, x27, 1
+stp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+add x0, x27, 1
+stp w1, w2, [x27, #248]!
+add x0, x27, 1
+stp x1, x2, [x27, #496]!
+add x0, x27, 1
+str b1, [x27], #254
+add x0, x27, 1
+str h1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+add x0, x27, 1
+str d1, [x27], #254
+add x0, x27, 1
+str q1, [x27], #254
+add x0, x27, 1
+str b1, [x27, #254]!
+add x0, x27, 1
+str h1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+add x0, x27, 1
+str d1, [x27, #254]!
+add x0, x27, 1
+str q1, [x27, #254]!
+add x0, x27, 1
+str w1, [x27], #254
+add x0, x27, 1
+str x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+add x0, x27, 1
+str x1, [x27, #254]!
+add x0, x27, 1
+strb w1, [x27], #254
+add x0, x27, 1
+strb w1, [x27, #254]!
+add x0, x27, 1
+strh w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr x2, [x1], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,3] .D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeER . . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] . D=============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D============eeeeeER . . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] . D=================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D================eeeeeER. ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] . D=====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 9. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.5 0.1 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,3] .D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeER . . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] . D=============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D============eeeeeER . . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . D=================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D================eeeeeER. ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D=====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.5 0.1 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeER . . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D=============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D============eeeeeER . . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D=================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D================eeeeeER. ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D=====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.5 0.1 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2604
+# CHECK-NEXT: Total uOps: 1600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.61
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] .D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeeER . . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . D=============eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=============eeeeeER . . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . D=================eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=================eeeeeER. ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . D=====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 14.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.7 0.1 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2804
+# CHECK-NEXT: Total uOps: 1800
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.64
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . .. ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] .D=====eER. . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeER . . . .. ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . D=========eER. . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeeER. . .. ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . D=============eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D============eeeeeeER .. ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . .D=================eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . .D=================eeeeeER. ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.7 0.1 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2704
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.63
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] .D=====eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeER . . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . D=========eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=========eeeeeER . . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . D=============eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D============eeeeeeER. . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . D=================eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=================eeeeeER. ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . .D=====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.8 0.1 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3104
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.32
+# CHECK-NEXT: Block RThroughput: 11.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .D=====eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeER. . . . . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeeER . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,5] . D=============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D============eeeeeeeER . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] . . D==================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=================eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] . . D======================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 7. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 9. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.8 0.1 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3304
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.70
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 13.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . .. ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] .D=====eER. . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeeER . . . .. ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] . D==========eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=========eeeeeeER . . .. ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,5] . D==============eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D=============eeeeeeeER . .. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] . . D===================eER. .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D==================eeeeeeeER. ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] . . D========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 14.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 19.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 9. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 12.6 0.1 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3204
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.69
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] .D=====eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeeER . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . D==========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=========eeeeeeER . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . D==============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D=============eeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . D==================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=================eeeeeeeER. ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=======================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 14.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 12.3 0.1 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3404
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .D=====eER. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeeER . . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . D==========eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=========eeeeeeeER . . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . D===============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==============eeeeeeER . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] . . D===================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D==================eeeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] . . D=========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 19.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 9. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 12.9 0.1 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3404
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] .D=====eER. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeER. . . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,3] . D=========eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeeeeER . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,5] . D===============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==============eeeeeeER . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] . . D===================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D==================eeeeeeeeER. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] . . D=========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 5. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 19.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 9. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 12.7 0.1 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3404
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeER . . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . D===========eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeeeER. . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D================eeeeeeER . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D====================eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.3 0.1 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3804
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 15.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeER . . . . .. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . D===========eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeeeER. . . .. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . D=================eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D================eeeeeeeeER . .. ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=======================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D======================eeeeeeeeER. ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,9] . . D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.1 0.1 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER. ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3704
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.51
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 6.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,5] . D===================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . D=========================eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=========================eeeeeER. ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 26.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.4 0.1 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3404
+# CHECK-NEXT: Total uOps: 1800
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.53
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,3] .D=========eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeeeeER . . . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,5] . D===============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==============eeeeeeeeER . . . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,7] . D=====================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . .D====================eeeeeeeeER. ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: [0,9] . . D===========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 5. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 9. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.3 0.1 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3704
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.51
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 6.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,3] . D=============eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,5] . D===================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,7] . . D=========================eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=========================eeeeeER. ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 26.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.4 0.1 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER. ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3804
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.53
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . .. ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . .. ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeER. . . .. ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . D=================eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D================eeeeeeeeER . .. ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . D=======================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D======================eeeeeeeeER. ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.5 0.1 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4104
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.63
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] .D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeER. . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . D==============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=============eeeeeeeeeER . . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . D=====================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D====================eeeeeeeeeER . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . D============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D===========================eeeeeeER. ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . D================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 28.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 33.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 18.0 0.1 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4204
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeeER . . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D====================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D===================eeeeeeeeER . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . D==========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=========================eeeeeeeeeER. ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 26.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 34.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.1 0.1 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4104
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.63
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 8.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeER. . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,3] . D==============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=============eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,5] . D====================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D===================eeeeeeeeER . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . D==========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=========================eeeeeeeeER. ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . D================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 26.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 33.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.4 0.1 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3804
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.63
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . .. ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] .D=======eER . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . .. ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,3] . D=============eER. . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . .. ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. .. ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . D=========================eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeER. ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.3 0.1 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D=====eER. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeeeER . . . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,3] . D===========eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeeeER. . . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=================eeeeeER. . . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,7] . .D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D====================eeeeeeeeER. ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . D===========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.2 0.1 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.55
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,5] . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER. ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3704
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeER . . . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . D===========eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeeeER . . . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . D=================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D================eeeeeeeeER . . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D======================eeeeeeeeER. ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.1 0.1 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 8.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER. ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4704
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.77
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] .D========eER . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeeER . . . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,3] . D===============eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeeeER . . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] . .D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=====================eeeeeeeeeER. . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,7] . . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D============================eeeeeeeeeeER. ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] . . .D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4604
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.78
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeER . . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] . D========eER . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeER . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . D==============eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=============eeeeeeeeeER. . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . .D=====================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D====================eeeeeeeeeER . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . D============================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D===========================eeeeeeeeeeER. ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . .D===================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 28.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 36.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 18.3 0.1 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4704
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.77
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .D========eER . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeeeER . . . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . D===============eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=====================eeeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,7] . . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D============================eeeeeeeeeER. ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] . . .D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D========eER . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . D===============eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeeER. . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D=====================eeeeeeeeeER . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,7] . . D=============================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D============================eeeeeeeeeER. ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3704
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.65
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] .D========eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeER. . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,3] . D==============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=============eeeeeeeeER . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D====================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D===================eeeeeeER . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,7] . . D========================eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=======================eeeeeeER. ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . D============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.6 0.1 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeER . . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] .D=====eER. . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeeeeER . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,3] . D===========eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeeeER . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] . .D=================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D================eeeeeeeeER . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,7] . . D=======================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D======================eeeeeeeeeER. ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] . . D==============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.8 0.1 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4104
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 9.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeeER. . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,3] . D==============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=============eeeeeeeeeER . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] . D=====================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D====================eeeeeeER. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeeER. ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 5. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.1 0.1 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4204
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.64
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeeER . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D====================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D===================eeeeeeeeER . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . D==========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=========================eeeeeeeeeER. ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 26.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 34.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.1 0.1 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4604
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.74
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 13.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .D========eER . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeER. . . . . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] . D==============eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=============eeeeeeeeeER . . . . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] . D=====================eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D====================eeeeeeeeeER . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] . . D============================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D===========================eeeeeeeeeeeER. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] . . .D===================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 5. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 7. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 28.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 9. 1 36.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 18.3 0.1 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4804
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.79
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] .D========eER . . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeeeeER . . . . . .. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] . D===============eER . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . .D==============eeeeeeeeeeeER . . . .. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] . . D======================eER. . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=====================eeeeeeeeER . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . .D============================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . D===========================eeeeeeeeeER. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . D===================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 28.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 36.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 18.7 0.1 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 5104
+# CHECK-NEXT: Total uOps: 4200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.82
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] .D========eER . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeeeeER . . . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . D===============eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . .D==============eeeeeeeeeER . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . D======================eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=====================eeeeeeeeeeeER . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . .D=============================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . D============================eeeeeeeeeeeER. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4504
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.78
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 11.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] . D=======eER . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] . D=============eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . .D============eeeeeeeeeER. . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D==================eeeeeeeeeER . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] . . .D=========================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . D========================eeeeeeeeeER. ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4304
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.72
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 10.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . .. ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] . D=======eER . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeeER . . . . .. ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . .D============eeeeeeeeeER. . . .. ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] . . D===================eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D==================eeeeeeeeER . .. ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . D=========================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . .D========================eeeeeeeeER. ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4104
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.76
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 10.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] . D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeeER . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . .D============eeeeeeER . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] . . D=================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D================eeeeeeeeeER . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] . . D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . .D======================eeeeeeeeER. ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.5 0.1 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4304
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.72
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 10.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . .. ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeeER. . . . . .. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] . D=============eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . .. ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] . .D===================eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D==================eeeeeeeeeER . .. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] . . D=========================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeeER. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] . . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.72
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 9.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeER . . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] .D=====eER. . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeeeeER . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . D===========eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeeeER . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . .D=================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D================eeeeeeeeER . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=======================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D======================eeeeeeeeeER. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.7 0.1 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3604
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeeER. . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeeER . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . D===================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D==================eeeeeER . . ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] . . D======================eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=====================eeeeeER. ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . . .D=========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 7. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 22.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 9. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.3 0.1 0.0 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2306
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.95
+# CHECK-NEXT: IPC: 0.43
+# CHECK-NEXT: Block RThroughput: 7.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeER . . . . . ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] .D=====eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeER . . . . ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] . D========eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=======eeeeeER . . . ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] . D===========eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==========eeeeeeER. . ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . . D===============eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D==============eeeeER ldp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . . D==============eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 11.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 15.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 9.8 0.1 0.2 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1307
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.84
+# CHECK-NEXT: IPC: 0.77
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . . ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] .DeE--R . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeER . . . ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DeE--R . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeeeER . . ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] . DeE--R . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeeeeeER . . ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,7] . . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D===eeeeeER. ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: [0,9] . . D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.4 0.4 0.6 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . ldr h1, [x27], #254
+# CHECK-NEXT: [0,3] .D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeER . . . ldr s1, [x27], #254
+# CHECK-NEXT: [0,5] . D=============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D============eeeeeER . . ldr d1, [x27], #254
+# CHECK-NEXT: [0,7] . D=================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D================eeeeeER. ldr q1, [x27], #254
+# CHECK-NEXT: [0,9] . D=====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 9. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.5 0.1 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeER . . . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,5] . D=============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D============eeeeeER . . ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,7] . D=================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D================eeeeeER. ldr q1, [x27, #254]!
+# CHECK-NEXT: [0,9] . D=====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 9. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.5 0.1 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeeeER . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DeeeeER. ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DeeeeER ldrb w1, [x27], #254
+# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.5 0.1 1.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeeeeER . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeeeER . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DeeeeER. ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DeeeeER ldrsb x1, [x27], #254
+# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.5 0.1 1.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeeeeER . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeeeER . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DeeeeER. ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DeeeeER ldrsh w1, [x27, #254]!
+# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.5 0.1 1.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 604
+# CHECK-NEXT: Total uOps: 1600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.65
+# CHECK-NEXT: IPC: 1.66
+# CHECK-NEXT: Block RThroughput: 5.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeeeeER . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeeeER. ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,5] . D=eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,6] . DeE--R. st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,7] . D=eE-R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DeeER. st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 0.0 2.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 7. 1 2.0 0.0 1.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.5 0.1 0.9 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.41
+# CHECK-NEXT: IPC: 1.42
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeER. . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeER . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=eER . st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,7] . D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . DeeER. st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,9] . .D=eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.6 0.1 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.41
+# CHECK-NEXT: IPC: 1.42
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] .D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eER . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeER . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=eER . st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=eER. st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . .D=eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.8 0.1 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 904
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.10
+# CHECK-NEXT: IPC: 1.11
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] .D=eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eER . . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . D=eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . D=eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DeeER. . st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . .D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeeER. st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,9] . . D=eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.6 0.1 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1404
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.71
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] .D===eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D==eeER . . . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,3] . D===eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,5] . D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==eeeeER. . st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,7] . . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D====eeER. st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,9] . . D=====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.9 0.1 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1604
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.62
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] .D===eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D==eeeeER . . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,3] . D=====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D====eeER . . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,5] . D=====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D====eeeeER . st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D======eeER. st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.3 0.1 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1604
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.62
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] .D=eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeER . . . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . D===eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==eeeeER. . st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D====eeeeER. st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.7 0.1 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2104
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.47
+# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: Block RThroughput: 21.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeER . . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] .D==eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeeeeER . . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] . D=====eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D====eeeER. . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] . .D======eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=====eeeER . . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,7] . . D=======eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D======eeeeeeER. st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] . . .D==========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 9. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.6 0.1 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2404
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.41
+# CHECK-NEXT: IPC: 0.42
+# CHECK-NEXT: Block RThroughput: 24.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeER . . . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] .D==eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeeeeER . . . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] . D=====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D====eeeeeeER . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] . . D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=======eeeER . . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========eeeeeeER. st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . D============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 5. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 8.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 9.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 6.6 0.1 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2104
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.47
+# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: Block RThroughput: 21.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeER . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .D==eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeER . . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . D===eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeeeeeER. . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . .D======eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=====eeeER . . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=======eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D======eeeeeeER. st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . .D==========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2604
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.38
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 26.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . D====eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D===eeeeER . . . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] . D======eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=====eeeeeeeeER . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] . . D==========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=========eeeeER. . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] . . D============eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . .D===========eeeeER. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . D==============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 5. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 10.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 7. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 12.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 9. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 8.4 0.1 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3204
+# CHECK-NEXT: Total uOps: 4200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 32.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] . D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeER. . . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] . D=======eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . .D======eeeeeeeeER . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] . . D===========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D==========eeeeeeeeER . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] . . . D===============eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . D==============eeeeER. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . D=================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 11.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 9.9 0.1 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2804
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.36
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 28.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . .. st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . D=====eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeER. . . .. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . D=======eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . .D======eeeeER . . .. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . D=========eER. . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D========eeeeeeeeER .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . .D=============eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . D============eeeeER. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . D===============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 9.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 13.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 8.9 0.1 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1904
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.52
+# CHECK-NEXT: IPC: 0.53
+# CHECK-NEXT: Block RThroughput: 19.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . D=====eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . D=========eER. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . D=========eER. . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] . . D=========eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=========eER . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] . . D=========eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=========eER. st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . D=========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 5. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 10.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 7. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 10.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 8.2 0.1 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeER. . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeER . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DeER . st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DeER. st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.5 0.1 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1204
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.66
+# CHECK-NEXT: IPC: 0.83
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] D=eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeER. . . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D=eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeeER . . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,5] . D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=eeeER . st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . .D==eeeeER. st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,9] . . D=====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.5 0.1 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1704
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.82
+# CHECK-NEXT: IPC: 0.59
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] .D==eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeER . . . st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] . D===eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeeeER . . st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,5] . .D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D===eeeER . st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,7] . . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D====eeeeER. st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,9] . . .D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1804
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.77
+# CHECK-NEXT: IPC: 0.55
+# CHECK-NEXT: Block RThroughput: 16.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeeER . . .. st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] . D==eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeeER . .. st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . D====eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D===eeeER . .. st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . .D=====eER. .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D====eeeER .. st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . D======eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=====eeeeER. st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . .D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 6.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.7 0.1 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1704
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.70
+# CHECK-NEXT: IPC: 0.59
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .D==eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeeER. . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeeeER . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D===eeeER . st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D====eeeER. st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,9] . . .D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D==eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeER . . . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . D===eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeeER . . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,5] . D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D===eeeER. . st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,7] . . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D====eeeER. st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1304
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.53
+# CHECK-NEXT: IPC: 0.77
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . .. st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] .D==eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeER . .. st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,3] . D===eER. .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeeER .. st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D====eER .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D===eeER .. st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . D====eER.. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D===eeER. st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . D====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.6 0.1 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1204
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.83
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,1] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D===eeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] . D====eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] . .D===eeeER. st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,5] . . D====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.2 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2704
+# CHECK-NEXT: Total uOps: 5100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.89
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 27.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] . D===eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D==eeeER . . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] . .D===eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . D==eeeeeeER . . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] . . D=====eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . .D====eeeeeeER . . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] . . . D=======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . D======eeeeeeER. st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . D==========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2104
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.14
+# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: Block RThroughput: 21.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeER . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . D=eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeER . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . D=eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . .DeeeeeeER. . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D==eeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . D==eeeeeeER. st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . .D=====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1804
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.55
+# CHECK-NEXT: IPC: 0.55
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeeeeER . . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . D===eER. . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D==eeeER . .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,3] . D====eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . .D===eeeER. .. st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] . . D=====eER .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D====eeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . D======eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=====eeeER. st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . .D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 6.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.9 0.1 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1504
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.40
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] .D==eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeER . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,3] . D===eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeeER . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D===eeeER. . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D====eeeER. st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] . . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2104
+# CHECK-NEXT: Total uOps: 3300
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.57
+# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: Block RThroughput: 19.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeER . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D==eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeER . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,3] . D===eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeeER . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D====eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D===eeeeeeeeER. . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] . . D========eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D=======eeeeER. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] . . . D=========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 7. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.9 0.1 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3204
+# CHECK-NEXT: Total uOps: 5800
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.81
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 32.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeER . . . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] . D==eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeeeeeeER. . . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] . . D=====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . D====eeeeER . . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] . . D======eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . .D=====eeeeeeeeER . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] . . . D=========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .D========eeeeeeeeER. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . D============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 7. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 9.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 9. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 6.2 0.1 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2804
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.71
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 28.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . D=====eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeER. . . .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . .D======eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . D=====eeeeER . . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . D=======eER. . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D======eeeeeeeeER .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . D==========eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . D=========eeeeER. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . D===========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 10.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.3 0.1 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 19.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeeeER . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D===eeeeeeeeER . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . D=======eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . D======eeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . .D========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . D=======eeeER. . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] . . . D=========eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . D========eeeER. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . D==========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 5. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 8.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 7. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 9.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.2 0.1 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] .D==eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eeeER . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,3] . D===eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] . D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D===eeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D====eeeER. st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 1404
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.57
+# CHECK-NEXT: IPC: 0.57
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] .D===eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D==eeeeER . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D====eeeER . . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] . D======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D=====eeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.0 0.1 0.0 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 900
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.23
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,1] .DeER.. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeER stp d1, d2, [x27], #496
+# CHECK-NEXT: [0,3] . DeER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.0 0.5 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1207
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.32
+# CHECK-NEXT: IPC: 0.83
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . .. stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] . DeE-R . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=eER . .. stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] . D=eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=eeER .. stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] . .D=eER .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . D=eeeeER.. stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . . DeE--R.. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D==eER. stp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . . .D==eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 1.0 1.0 1.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 1.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 2.0 1.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 3.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.9 0.9 0.3 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.19
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeER . . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeER . . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeER. . str b1, [x27], #254
+# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeER. str h1, [x27], #254
+# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 1.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 1.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 1.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.0 0.5 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.09
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . str s1, [x27], #254
+# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeER . . str d1, [x27], #254
+# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeER . . str q1, [x27], #254
+# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeER. . str b1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeER. str h1, [x27, #254]!
+# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 1.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 1.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 1.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.0 0.5 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.09
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeER . . str d1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeER . . str q1, [x27, #254]!
+# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeER. . str w1, [x27], #254
+# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeER. str x1, [x27], #254
+# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 1.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 1.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 1.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.0 0.5 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeER . . str x1, [x27, #254]!
+# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeER . . strb w1, [x27], #254
+# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .DeER. . strb w1, [x27, #254]!
+# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . DeER. strh w1, [x27], #254
+# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 1.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 1.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 1.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.0 0.5 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 203
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.97
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234
+
+# CHECK: [0,0] DeER. strh w1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.0 0.5 0.0 <total>
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 210
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 2.86
+# CHECK-NEXT: IPC: 1.90
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D===eeeeER ldr x2, [x1], #254
+# CHECK-NEXT: [0,3] .DeE------R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.0 0.3 2.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N1-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N1-writeback.s
new file mode 100644
index 000000000000000..b4e7a4e40ba7721
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N1-writeback.s
@@ -0,0 +1,5290 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-n1 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1 { v1.2d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2s }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4h }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+add x0, x27, 1
+ld1 { v1.8h }, [x27], #16
+add x0, x27, 1
+ld1 { v1.16b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+ld1r { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.4h }, [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.8b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.8h }, [x27], #2
+add x0, x27, 1
+ld1r { v1.16b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+add x0, x27, 1
+ld1r { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+ld2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], #4
+add x0, x27, 1
+ld2r { v1.4s, v2.4s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld2r { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+add x0, x27, 1
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ldp s1, s2, [x27], #248
+add x0, x27, 1
+ldp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+add x0, x27, 1
+ldp s1, s2, [x27, #248]!
+add x0, x27, 1
+ldp d1, d2, [x27, #496]!
+add x0, x27, 1
+ldp q1, q2, [x27, #992]!
+add x0, x27, 1
+ldp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+add x0, x27, 1
+ldp w1, w2, [x27, #248]!
+add x0, x27, 1
+ldp x1, x2, [x27, #496]!
+add x0, x27, 1
+ldpsw x1, x2, [x27], #248
+add x0, x27, 1
+ldpsw x1, x2, [x27, #248]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+add x0, x27, 1
+ldr h1, [x27], #254
+add x0, x27, 1
+ldr s1, [x27], #254
+add x0, x27, 1
+ldr d1, [x27], #254
+add x0, x27, 1
+ldr q1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+add x0, x27, 1
+ldr h1, [x27, #254]!
+add x0, x27, 1
+ldr s1, [x27, #254]!
+add x0, x27, 1
+ldr d1, [x27, #254]!
+add x0, x27, 1
+ldr q1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+add x0, x27, 1
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr w1, [x27, #254]!
+add x0, x27, 1
+ldr x1, [x27, #254]!
+add x0, x27, 1
+ldrb w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+add x0, x27, 1
+ldrh w1, [x27], #254
+add x0, x27, 1
+ldrh w1, [x27, #254]!
+add x0, x27, 1
+ldrsb w1, [x27], #254
+add x0, x27, 1
+ldrsb x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+add x0, x27, 1
+ldrsb x1, [x27, #254]!
+add x0, x27, 1
+ldrsh w1, [x27], #254
+add x0, x27, 1
+ldrsh x1, [x27], #254
+add x0, x27, 1
+ldrsh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+add x0, x27, 1
+ldrsw x1, [x27], #254
+add x0, x27, 1
+ldrsw x1, [x27, #254]!
+add x0, x27, 1
+st1 { v1.1d }, [x27], #8
+add x0, x27, 1
+st1 { v1.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+add x0, x27, 1
+st1 { v1.4h }, [x27], #8
+add x0, x27, 1
+st1 { v1.4s }, [x27], #16
+add x0, x27, 1
+st1 { v1.8b }, [x27], #8
+add x0, x27, 1
+st1 { v1.8h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+add x0, x27, 1
+st1 { v1.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+st1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+st1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stp s1, s2, [x27], #248
+add x0, x27, 1
+stp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+add x0, x27, 1
+stp s1, s2, [x27, #248]!
+add x0, x27, 1
+stp d1, d2, [x27, #496]!
+add x0, x27, 1
+stp q1, q2, [x27, #992]!
+add x0, x27, 1
+stp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+add x0, x27, 1
+stp w1, w2, [x27, #248]!
+add x0, x27, 1
+stp x1, x2, [x27, #496]!
+add x0, x27, 1
+str b1, [x27], #254
+add x0, x27, 1
+str h1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+add x0, x27, 1
+str d1, [x27], #254
+add x0, x27, 1
+str q1, [x27], #254
+add x0, x27, 1
+str b1, [x27, #254]!
+add x0, x27, 1
+str h1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+add x0, x27, 1
+str d1, [x27, #254]!
+add x0, x27, 1
+str q1, [x27, #254]!
+add x0, x27, 1
+str w1, [x27], #254
+add x0, x27, 1
+str x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+add x0, x27, 1
+str x1, [x27, #254]!
+add x0, x27, 1
+strb w1, [x27], #254
+add x0, x27, 1
+strb w1, [x27, #254]!
+add x0, x27, 1
+strh w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr x2, [x1], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,3] D==========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==========eeeeeER . . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] .D==============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==============eeeeeER . . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] .D===================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===================eeeeeER. ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] .D========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 9. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.0 0.1 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,3] D==========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==========eeeeeER . . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] .D==============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==============eeeeeER . . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] .D===================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===================eeeeeER. ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] .D========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.0 0.1 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] D==========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==========eeeeeER . . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] .D==============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==============eeeeeER . . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] .D===================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===================eeeeeER. ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] .D========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.0 0.1 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.76
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 4.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] D==========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=========eeeeeER . . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] .D==============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==============eeeeeER . . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] .D===================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==================eeeeeER. ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . D=======================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 19.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 12.7 0.1 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] D==========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=========eeeeeER . . . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] .D==============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==============eeeeeER . . ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] .D===================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==================eeeeeER. ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 19.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 12.7 0.1 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] D==========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=========eeeeeER . . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] .D==============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==============eeeeeER . . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,7] .D===================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==================eeeeeER. ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 19.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 12.7 0.1 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2804
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.82
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . .. ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . .. ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] D==========eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=========eeeeeeER . . .. ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,5] .D===============eER. . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==============eeeeeeER. .. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] . D====================eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===================eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] . D=========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 5. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 7. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 9. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.2 0.1 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.83
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeER. ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.83
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeER. ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.90
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 8.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3104
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.90
+# CHECK-NEXT: IPC: 0.32
+# CHECK-NEXT: Block RThroughput: 8.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeeER. ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,9] . D===========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 9. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.1 0.1 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] D==============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,5] .D====================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D====================eeeeeeeER . . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,7] .D===========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==========================eeeeeeeER. ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,9] . D=================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 7. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 9. 1 34.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.7 0.1 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] D==============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,5] .D====================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D====================eeeeeeeER . . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==========================eeeeeeeER. ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,9] . D=================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 7. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 9. 1 34.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.7 0.1 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,3] D==============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,5] .D====================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D====================eeeeeeeER . . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,7] .D===========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==========================eeeeeeeER. ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: [0,9] . D=================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 7. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 9. 1 34.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.7 0.1 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,3] D==============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,5] .D====================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D====================eeeeeeeER . . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,7] .D===========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==========================eeeeeeeER. ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . D=================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 7. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 34.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.7 0.1 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,3] D==============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,5] .D====================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D====================eeeeeeeER . . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,7] .D===========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==========================eeeeeeeER. ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . D=================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 34.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.7 0.1 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.74
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,3] D==============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] .D====================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===================eeeeeeeER . . ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . D==========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=========================eeeeeeeER. ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . D================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 26.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 33.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.3 0.1 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.86
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.86
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.86
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.86
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.86
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.86
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.86
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3604
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.89
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeeER. ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,9] . D================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 9. 1 33.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.6 0.1 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3804
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . .. ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . .. ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . .. ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeER . .. ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,7] . D============================eER. .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===========================eeeeeeeER. ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] . D==================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 7. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 28.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 9. 1 35.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 18.6 0.1 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4304
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.05
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 9.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeeeeER. . . . . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] . D===============eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] . D=====================eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D====================eeeeeeeeER . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] . .D===========================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D==========================eeeeeeeeeeER. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] . . D===================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 5. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 7. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 9. 1 36.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 18.1 0.1 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4604
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.04
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeeeER . . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] . D===============eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeeeER . . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] . D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D======================eeeeeeeeeeER . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D==============================eeeeeeeeER. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 38.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.5 0.1 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4604
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.04
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeeeER . . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . D===============eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . D=====================eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D====================eeeeeeeeeeER . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=============================eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D============================eeeeeeeeeeER. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . D=====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 38.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 18.7 0.1 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 5000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER. ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 5000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER. ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 5000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER. ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 5000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 5000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3404
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . D=============eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===================eeeeeER . . ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] . D========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . .D=======================eeeeeER. ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . .D============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.1 0.1 0.0 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2506
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D=======eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=======eeeeeER. . . . ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] D============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeER. . . ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] .D================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D================eeeeeeeER . ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] .D=======================eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeER ldp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . D=======================eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 24.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.7 0.1 0.2 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1304
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.53
+# CHECK-NEXT: IPC: 0.77
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . .. ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eE--R . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . .. ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] D==eE--R . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eeeeER . .. ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] .D==eE--R . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeeeeER .. ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,7] .D=======eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeeeeER. ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: [0,9] . D===========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 9. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.3 0.1 0.6 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . ldr h1, [x27], #254
+# CHECK-NEXT: [0,3] D==========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==========eeeeeER . . . ldr s1, [x27], #254
+# CHECK-NEXT: [0,5] .D==============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==============eeeeeER . . ldr d1, [x27], #254
+# CHECK-NEXT: [0,7] .D===================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===================eeeeeER. ldr q1, [x27], #254
+# CHECK-NEXT: [0,9] .D========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 9. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.0 0.1 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,3] D==========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==========eeeeeER . . . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,5] .D==============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==============eeeeeER . . ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,7] .D===================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===================eeeeeER. ldr q1, [x27, #254]!
+# CHECK-NEXT: [0,9] .D========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 9. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.0 0.1 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeeeER. ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeER ldrb w1, [x27], #254
+# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.0 0.1 1.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeeeER. ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeER ldrsb x1, [x27], #254
+# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.0 0.1 1.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeeeER. ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeER ldrsh w1, [x27, #254]!
+# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.0 0.1 1.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.41
+# CHECK-NEXT: IPC: 1.42
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeE-R . st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,7] .D====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeER. st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 1.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.3 0.1 0.7 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.09
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.69
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,5] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===eeER . st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,7] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eeER. st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.79
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,5] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===eeER . st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eeER. st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.79
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===eeER . st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eeER. st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1704
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.17
+# CHECK-NEXT: IPC: 0.59
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] D===eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D==eeeeER. . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] .D======eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=====eeeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] . D========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=======eeeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,7] . D==========eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=========eeeeER. st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] . D=============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 3. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 5. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 8.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 7. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 10.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 9. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.3 0.1 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1804
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.11
+# CHECK-NEXT: IPC: 0.55
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeER . . .. st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] D===eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D==eeeeER. . .. st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] .D======eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=====eeeeER . .. st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] . D=========eER. .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D========eeeER .. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . D===========eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==========eeeeER. st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D==============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 5. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 9.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 11.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.8 0.1 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1704
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.17
+# CHECK-NEXT: IPC: 0.59
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D===eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D==eeeER . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D=====eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D====eeeeER . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=======eeeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D==========eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=========eeeeER. st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D=============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 8.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 10.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.1 0.1 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1504
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.39
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D===eeER . . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] .D=====eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D====eeeeeER . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] . D========eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D========eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] . D==========eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=========eeER. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] . D===========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 5. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 9.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 7. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 10.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 9. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.2 0.1 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1904
+# CHECK-NEXT: Total uOps: 4200
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.21
+# CHECK-NEXT: IPC: 0.53
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeER . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] .D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeER. . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] .D======eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=====eeeeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] . D=========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D========eeeeeER. . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] . D============eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============eeER. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . D==============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 5. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 9.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 7. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 13.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 8.4 0.1 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1604
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.37
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] .D====eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeER. . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] .D======eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=====eeER . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . D=======eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======eeeeeER . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . D==========eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==========eeER. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . D============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 11.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.4 0.1 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2204
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.45
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeeER . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D===eeeeeER . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D=======eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=======eeeeER . . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] . D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==========eeeeER . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] . D==============eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 11.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 7. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 9.8 0.1 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2104
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.05
+# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeER . . . . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeeER. st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,9] . D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.3 0.1 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2204
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.09
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeeER . . st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,5] .D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===========eeeeER. . st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,7] . D===============eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeeER. st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,9] . D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.4 0.1 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2304
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.13
+# CHECK-NEXT: IPC: 0.43
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] D=====eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . .. st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D=========eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeER . .. st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . D============eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D============eeeeER .. st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . D================eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===============eeeeeER. st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . D====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.1 0.1 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2204
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.09
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D===eeeeeER . . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=======eeeeeER . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===========eeeeER . st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,7] . D===============eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===============eeeeER. st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,9] . D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.4 0.1 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1604
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.25
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,1] D======eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeER . . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] .D==========eER. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=========eeeeeER. st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,5] . D==============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 8.3 0.2 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2904
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.31
+# CHECK-NEXT: IPC: 0.34
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeER . . . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] .D==========eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=========eeeeeeER. . . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] . D===============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==============eeeeeeER . . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] . D====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===================eeeeeeER. st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D=========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 5. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 7. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.3 0.1 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2704
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.26
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeeER . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D==============eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=============eeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D==================eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=================eeeeeeER. st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 14.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 18.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 12.1 0.1 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2204
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.81
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeeeER . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeER . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,3] .D=========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeER . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] . D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===========eeeeER . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D===============eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.8 0.1 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D===eeeeER . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,3] .D=======eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D======eeeeER. . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D==========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=========eeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . D=============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============eeeeER. st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] . D================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 10.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 13.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 9. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 9.0 0.1 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2704
+# CHECK-NEXT: Total uOps: 4200
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.55
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeER . . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D===eeeeeER . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,3] .D========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=======eeeeeER . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===========eeeeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] . D================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===============eeeeeeeER. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] . D======================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 7. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 9. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.8 0.1 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4104
+# CHECK-NEXT: Total uOps: 5800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.41
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] D=======eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeeeER . . . . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] . D==============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=============eeeeeeeER . . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] . D====================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===================eeeeeeeeeER. . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] . D===========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . .D==========================eeeeeeeeeER. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] . . D==================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 5. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 20.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 7. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 27.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 9. 1 35.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.6 0.1 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3604
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 9.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] .D=====eER. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D====eeeeeeeER . . . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . D===========eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeeER . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . D=================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D================eeeeeeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . D========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . .D=======================eeeeeeeER. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . .D==============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.0 0.1 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3304
+# CHECK-NEXT: Total uOps: 5200
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.57
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 10.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeER . . . . .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .D========eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeeER . . .. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D===============eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeER . .. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] . D===================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeER .. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] . D=======================eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . .D======================eeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . .D===========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.3 0.1 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2504
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.60
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeER . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,3] .D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeER . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] . D=============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D============eeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D=================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D================eeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . D=====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 17.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.5 0.1 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 1804
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.77
+# CHECK-NEXT: IPC: 0.44
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeeeER . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] D=====eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D=========eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeER .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] . D============eER .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===========eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D===============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 9.0 0.1 0.0 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 404
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.48
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeER. . stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,1] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER. stp d1, d2, [x27], #496
+# CHECK-NEXT: [0,3] .D===eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.5 0.3 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1104
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.81
+# CHECK-NEXT: IPC: 0.91
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D==eeER . . stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] .D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D===eeER. . stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D====eeeER . stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . D=======eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eER. stp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.1 0.1 0.0 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 3.27
+# CHECK-NEXT: IPC: 1.42
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeER. . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeER . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=eeER . str b1, [x27], #254
+# CHECK-NEXT: [0,7] . D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eeER. str h1, [x27], #254
+# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 2.0 0.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.2 0.1 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.19
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . str s1, [x27], #254
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . str d1, [x27], #254
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . str q1, [x27], #254
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D====eeER . str b1, [x27, #254]!
+# CHECK-NEXT: [0,7] . D======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. str h1, [x27, #254]!
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.0 0.1 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 804
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 2.74
+# CHECK-NEXT: IPC: 1.24
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. .. str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] D==eER .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER .. str d1, [x27, #254]!
+# CHECK-NEXT: [0,3] D====eER .. add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER .. str q1, [x27, #254]!
+# CHECK-NEXT: [0,5] .D=====eER.. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D====eER.. str w1, [x27], #254
+# CHECK-NEXT: [0,7] . D=====eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=====eER. str x1, [x27], #254
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 6.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.6 0.1 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 3.97
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eER. . str x1, [x27, #254]!
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eER . strb w1, [x27], #254
+# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eER . strb w1, [x27, #254]!
+# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eER. strh w1, [x27], #254
+# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 104
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 3.85
+# CHECK-NEXT: IPC: 1.92
+# CHECK-NEXT: Block RThroughput: 0.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234
+
+# CHECK: [0,0] DeER. strh w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.5 0.5 0.0 <total>
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 142
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 4.23
+# CHECK-NEXT: IPC: 2.82
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER ldr x2, [x1], #254
+# CHECK-NEXT: [0,3] D=eE------R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.5 0.3 2.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
new file mode 100644
index 000000000000000..7ee97a088ecba18
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
@@ -0,0 +1,5347 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-n2 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1 { v1.2d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2s }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4h }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+add x0, x27, 1
+ld1 { v1.8h }, [x27], #16
+add x0, x27, 1
+ld1 { v1.16b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+ld1r { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.4h }, [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.8b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.8h }, [x27], #2
+add x0, x27, 1
+ld1r { v1.16b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+add x0, x27, 1
+ld1r { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+ld2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], #4
+add x0, x27, 1
+ld2r { v1.4s, v2.4s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld2r { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+add x0, x27, 1
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ldp s1, s2, [x27], #248
+add x0, x27, 1
+ldp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+add x0, x27, 1
+ldp s1, s2, [x27, #248]!
+add x0, x27, 1
+ldp d1, d2, [x27, #496]!
+add x0, x27, 1
+ldp q1, q2, [x27, #992]!
+add x0, x27, 1
+ldp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+add x0, x27, 1
+ldp w1, w2, [x27, #248]!
+add x0, x27, 1
+ldp x1, x2, [x27, #496]!
+add x0, x27, 1
+ldpsw x1, x2, [x27], #248
+add x0, x27, 1
+ldpsw x1, x2, [x27, #248]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+add x0, x27, 1
+ldr h1, [x27], #254
+add x0, x27, 1
+ldr s1, [x27], #254
+add x0, x27, 1
+ldr d1, [x27], #254
+add x0, x27, 1
+ldr q1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+add x0, x27, 1
+ldr h1, [x27, #254]!
+add x0, x27, 1
+ldr s1, [x27, #254]!
+add x0, x27, 1
+ldr d1, [x27, #254]!
+add x0, x27, 1
+ldr q1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+add x0, x27, 1
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr w1, [x27, #254]!
+add x0, x27, 1
+ldr x1, [x27, #254]!
+add x0, x27, 1
+ldrb w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+add x0, x27, 1
+ldrh w1, [x27], #254
+add x0, x27, 1
+ldrh w1, [x27, #254]!
+add x0, x27, 1
+ldrsb w1, [x27], #254
+add x0, x27, 1
+ldrsb x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+add x0, x27, 1
+ldrsb x1, [x27, #254]!
+add x0, x27, 1
+ldrsh w1, [x27], #254
+add x0, x27, 1
+ldrsh x1, [x27], #254
+add x0, x27, 1
+ldrsh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+add x0, x27, 1
+ldrsw x1, [x27], #254
+add x0, x27, 1
+ldrsw x1, [x27, #254]!
+add x0, x27, 1
+st1 { v1.1d }, [x27], #8
+add x0, x27, 1
+st1 { v1.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+add x0, x27, 1
+st1 { v1.4h }, [x27], #8
+add x0, x27, 1
+st1 { v1.4s }, [x27], #16
+add x0, x27, 1
+st1 { v1.8b }, [x27], #8
+add x0, x27, 1
+st1 { v1.8h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+add x0, x27, 1
+st1 { v1.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+st1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+st1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st2g x26, [x27], #4064
+add x0, x27, 1
+st2g x26, [x27, #4064]!
+add x0, x27, 1
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+stg x26, [x27], #4064
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stg x26, [x27, #4064]!
+add x0, x27, 1
+stgp x1, x2, [x27], #992
+add x0, x27, 1
+stgp x1, x2, [x27, #992]!
+add x0, x27, 1
+stp s1, s2, [x27], #248
+add x0, x27, 1
+stp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+add x0, x27, 1
+stp s1, s2, [x27, #248]!
+add x0, x27, 1
+stp d1, d2, [x27, #496]!
+add x0, x27, 1
+stp q1, q2, [x27, #992]!
+add x0, x27, 1
+stp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+add x0, x27, 1
+stp w1, w2, [x27, #248]!
+add x0, x27, 1
+stp x1, x2, [x27, #496]!
+add x0, x27, 1
+str b1, [x27], #254
+add x0, x27, 1
+str h1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+add x0, x27, 1
+str d1, [x27], #254
+add x0, x27, 1
+str q1, [x27], #254
+add x0, x27, 1
+str b1, [x27, #254]!
+add x0, x27, 1
+str h1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+add x0, x27, 1
+str d1, [x27, #254]!
+add x0, x27, 1
+str q1, [x27, #254]!
+add x0, x27, 1
+str w1, [x27], #254
+add x0, x27, 1
+str x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+add x0, x27, 1
+str x1, [x27, #254]!
+add x0, x27, 1
+strb w1, [x27], #254
+add x0, x27, 1
+strb w1, [x27, #254]!
+add x0, x27, 1
+strh w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+add x0, x27, 1
+stz2g x26, [x27], #4064
+add x0, x27, 1
+stz2g x26, [x27, #4064]!
+add x0, x27, 1
+stzg x26, [x27], #4064
+add x0, x27, 1
+stzg x26, [x27, #4064]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr x2, [x1], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.6 0.1 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.6 0.1 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.6 0.1 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.63
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.5 0.1 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeeeER. ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . D============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.2 0.1 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeeeER. ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . D============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.2 0.1 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.77
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 4.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] . D============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.2 0.1 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.83
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeeeER. ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] . D============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.2 0.1 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.83
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeeeER. ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . D============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.2 0.1 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3204
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.84
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 5.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] . D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=======================eeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] . D==============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.5 0.1 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.86
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.86
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D========================eeeeeeeER. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.5 0.1 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3604
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.78
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 5.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] .D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . D===================eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . D=========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=========================eeeeeeeeER. ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,9] . D=================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 26.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 9. 1 34.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.8 0.1 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . D==============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.1 0.1 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.72
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======================eeeeeeeeER. . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . D==============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=============================eeeeeeeeER. ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D=====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 30.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 38.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.7 0.1 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D==============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.1 0.1 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.65
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D==============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.0 0.1 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.70
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.92
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.95
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.92
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.87
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.87
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.92
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.95
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.92
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4204
+# CHECK-NEXT: Total uOps: 4600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.09
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 9.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeeER. . . . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] .D================eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D===============eeeeeeeeER . . . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] . D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======================eeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] . D==============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=============================eeeeeeeeeER. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 7. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 30.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.8 0.1 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4304
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] D========eER . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeeER. . . . . .. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] .D================eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D===============eeeeeeeeeER . . . .. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] . D========================eER. . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=======================eeeeeeeeeER . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . D================================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===============================eeeeeeeeER. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 33.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.5 0.1 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4304
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeeER. . . . . .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] .D================eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D===============eeeeeeeeER . . . .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . D=======================eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======================eeeeeeeeeER . .. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . D===============================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeeER. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.1 0.1 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.15
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.20
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.17
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3604
+# CHECK-NEXT: Total uOps: 3900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.08
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeER . . ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] . D===========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===========================eeeeeeER. ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . D=================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 7. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 28.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 9. 1 34.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 18.4 0.1 0.0 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2506
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D======eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] .D===========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] .D=================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D================eeeeeeER . ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . D======================eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeER ldp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . D======================eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 23.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.2 0.1 0.2 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1304
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.53
+# CHECK-NEXT: IPC: 0.77
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . .. ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eE--R . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . .. ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] D==eE--R . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eeeeER . .. ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] .D==eE--R . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeeeeER .. ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,7] .D=======eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeeeeER. ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: [0,9] . D===========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 9. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.3 0.1 0.6 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ldr h1, [x27], #254
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ldr s1, [x27], #254
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ldr d1, [x27], #254
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeeeER. ldr q1, [x27], #254
+# CHECK-NEXT: [0,9] . D============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.2 0.1 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeeeER. ldr q1, [x27, #254]!
+# CHECK-NEXT: [0,9] . D============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.2 0.1 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeeeER. ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeER ldrb w1, [x27], #254
+# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.1 0.1 1.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeeeER. ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeER ldrsb x1, [x27], #254
+# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.1 0.1 1.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeeeER. ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeER ldrsh w1, [x27, #254]!
+# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.1 0.1 1.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.41
+# CHECK-NEXT: IPC: 1.42
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeE-R . st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,7] .D====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeER. st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,9] .D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 1.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 0.7 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.39
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.59
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D====eeER . st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . D======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.8 0.1 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.59
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.39
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,7] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eeER. st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.59
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===eeER . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eeER. st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.39
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eeER. st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.59
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eeER. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 4200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.18
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===eeER . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eeER. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.78
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeER . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . D====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===eeER . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eeER. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.1 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1604
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.00
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==eeeeER . . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] . D======eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======eeeeER. . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] . D==========eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=========eeeeER. st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D=============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 7. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 10.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 6.2 0.1 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.10
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.20
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.30
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D===eeeeER . . . st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D=======eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==========eeeeER . . st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . D==============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 11.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 9.8 0.1 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.20
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1804
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.55
+# CHECK-NEXT: IPC: 0.55
+# CHECK-NEXT: Block RThroughput: 4.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeER . . . .. st2g x26, [x27], #4064
+# CHECK-NEXT: [0,1] D=eER. . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eER. . . .. st2g x26, [x27, #4064]!
+# CHECK-NEXT: [0,3] D==eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eeeeeeER . .. st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,5] .D=======eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======eeeeeER .. st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,7] . D===========eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==========eeeeeER. st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,9] . D===============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2g x26, [x27], #4064
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2g x26, [x27, #4064]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 5. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 7. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 11.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 9. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 6.4 0.1 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2904
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.31
+# CHECK-NEXT: IPC: 0.34
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeER . . . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] .D==========eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=========eeeeeeER. . . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] . D===============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==============eeeeeeER . . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] . D====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===================eeeeeeER. st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D=========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 5. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 7. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.3 0.1 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2704
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.26
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeER . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D========eeeeeeER . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . D==============eER. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=============eeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D==================eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=================eeeeeeER. st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 14.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 18.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 12.1 0.1 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeER. st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeER. st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2904
+# CHECK-NEXT: Total uOps: 4200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.45
+# CHECK-NEXT: IPC: 0.34
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] . D====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===================eeeeeeER. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] . D=========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 7. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 9. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.7 0.1 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3304
+# CHECK-NEXT: Total uOps: 5800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.76
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] D======eER. . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeeER . . . .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] . D===========eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] . D================eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeeER . .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] . D=====================eER. .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . .D====================eeeeeeeER. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] . . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.60
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 9.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D====eeeeeeER . . . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] .D==========eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D=========eeeeeeER. . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . D===============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==============eeeeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . D====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===================eeeeeeER. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . D=========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 20.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.1 0.1 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3204
+# CHECK-NEXT: Total uOps: 5200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.62
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 10.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .D======eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . D=====eeeeeeeER . . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] . D=====================eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . .D====================eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . .D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeeeER . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] . D================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D===============eeeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 5. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.0 0.1 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2104
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.71
+# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] D======eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeER. . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==========eeeeER . . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] . D==============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=============eeeeER . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D=================eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D================eER. stg x26, [x27], #4064
+# CHECK-NEXT: [0,9] . D=================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 14.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 17.0 0.0 0.0 stg x26, [x27], #4064
+# CHECK-NEXT: 9. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.9 0.1 0.0 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.37
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . stg x26, [x27, #4064]!
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eER. . stgp x1, x2, [x27], #992
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eER . stgp x1, x2, [x27, #992]!
+# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeER. stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eeER stp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stg x26, [x27, #4064]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stgp x1, x2, [x27], #992
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 stgp x1, x2, [x27, #992]!
+# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.55
+# CHECK-NEXT: IPC: 1.42
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D==eeER . stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] .D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===eeER . stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] . D====eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====eER. stp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.8 0.1 0.0 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.37
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eER. . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eER . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeER. str b1, [x27], #254
+# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eeER str h1, [x27], #254
+# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . str s1, [x27], #254
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeER . str d1, [x27], #254
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27], #254
+# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeER. str b1, [x27, #254]!
+# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eeER str h1, [x27, #254]!
+# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.56
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeER . str d1, [x27, #254]!
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27, #254]!
+# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eER . str w1, [x27], #254
+# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eER. str x1, [x27], #254
+# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.97
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eER. . str x1, [x27, #254]!
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eER . strb w1, [x27], #254
+# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eER . strb w1, [x27, #254]!
+# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eER. strh w1, [x27], #254
+# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.97
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . strh w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eER. . stz2g x26, [x27], #4064
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eER . stz2g x26, [x27, #4064]!
+# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eER . stzg x26, [x27], #4064
+# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eER. stzg x26, [x27, #4064]!
+# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stz2g x26, [x27], #4064
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 stz2g x26, [x27, #4064]!
+# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 stzg x26, [x27], #4064
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 stzg x26, [x27, #4064]!
+# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.7 0.1 0.0 <total>
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 110
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 5.45
+# CHECK-NEXT: IPC: 3.64
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER ldr x2, [x1], #254
+# CHECK-NEXT: [0,3] D=eE------R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.5 0.3 2.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
new file mode 100644
index 000000000000000..91adecac8d67e21
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
@@ -0,0 +1,5290 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v1 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1 { v1.2d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2s }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4h }, [x27], #8
+add x0, x27, 1
+ld1 { v1.4s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+add x0, x27, 1
+ld1 { v1.8h }, [x27], #16
+add x0, x27, 1
+ld1 { v1.16b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+ld1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+ld1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+ld1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+ld1r { v1.1d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2d }, [x27], #8
+add x0, x27, 1
+ld1r { v1.2s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.4h }, [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+add x0, x27, 1
+ld1r { v1.8b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.8h }, [x27], #2
+add x0, x27, 1
+ld1r { v1.16b }, [x27], #1
+add x0, x27, 1
+ld1r { v1.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+add x0, x27, 1
+ld1r { v1.2s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.4s }, [x27], x28
+add x0, x27, 1
+ld1r { v1.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+add x0, x27, 1
+ld1r { v1.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+ld2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+ld2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+ld2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+ld2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+ld2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+ld2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+ld2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], #4
+add x0, x27, 1
+ld2r { v1.4s, v2.4s }, [x27], #8
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+add x0, x27, 1
+ld2r { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+ld2r { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+ld2r { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+ld2r { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+add x0, x27, 1
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+add x0, x27, 1
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+add x0, x27, 1
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+add x0, x27, 1
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+ldp s1, s2, [x27], #248
+add x0, x27, 1
+ldp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+add x0, x27, 1
+ldp s1, s2, [x27, #248]!
+add x0, x27, 1
+ldp d1, d2, [x27, #496]!
+add x0, x27, 1
+ldp q1, q2, [x27, #992]!
+add x0, x27, 1
+ldp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+add x0, x27, 1
+ldp w1, w2, [x27, #248]!
+add x0, x27, 1
+ldp x1, x2, [x27, #496]!
+add x0, x27, 1
+ldpsw x1, x2, [x27], #248
+add x0, x27, 1
+ldpsw x1, x2, [x27, #248]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+add x0, x27, 1
+ldr h1, [x27], #254
+add x0, x27, 1
+ldr s1, [x27], #254
+add x0, x27, 1
+ldr d1, [x27], #254
+add x0, x27, 1
+ldr q1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+add x0, x27, 1
+ldr h1, [x27, #254]!
+add x0, x27, 1
+ldr s1, [x27, #254]!
+add x0, x27, 1
+ldr d1, [x27, #254]!
+add x0, x27, 1
+ldr q1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+add x0, x27, 1
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr w1, [x27, #254]!
+add x0, x27, 1
+ldr x1, [x27, #254]!
+add x0, x27, 1
+ldrb w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+add x0, x27, 1
+ldrh w1, [x27], #254
+add x0, x27, 1
+ldrh w1, [x27, #254]!
+add x0, x27, 1
+ldrsb w1, [x27], #254
+add x0, x27, 1
+ldrsb x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+add x0, x27, 1
+ldrsb x1, [x27, #254]!
+add x0, x27, 1
+ldrsh w1, [x27], #254
+add x0, x27, 1
+ldrsh x1, [x27], #254
+add x0, x27, 1
+ldrsh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+add x0, x27, 1
+ldrsw x1, [x27], #254
+add x0, x27, 1
+ldrsw x1, [x27, #254]!
+add x0, x27, 1
+st1 { v1.1d }, [x27], #8
+add x0, x27, 1
+st1 { v1.2d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+add x0, x27, 1
+st1 { v1.4h }, [x27], #8
+add x0, x27, 1
+st1 { v1.4s }, [x27], #16
+add x0, x27, 1
+st1 { v1.8b }, [x27], #8
+add x0, x27, 1
+st1 { v1.8h }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+add x0, x27, 1
+st1 { v1.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st1 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st1 { v1.1d, v2.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+add x0, x27, 1
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st1 { v1.b }[0], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[8], [x27], #1
+add x0, x27, 1
+st1 { v1.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[0], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[4], [x27], #2
+add x0, x27, 1
+st1 { v1.h }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+add x0, x27, 1
+st1 { v1.s }[0], [x27], x28
+add x0, x27, 1
+st1 { v1.d }[0], [x27], #8
+add x0, x27, 1
+st1 { v1.d }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], #16
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], #32
+add x0, x27, 1
+st2 { v1.8b, v2.8b }, [x27], #16
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+add x0, x27, 1
+st2 { v1.2d, v2.2d }, [x27], x28
+add x0, x27, 1
+st2 { v1.2s, v2.2s }, [x27], x28
+add x0, x27, 1
+st2 { v1.4h, v2.4h }, [x27], x28
+add x0, x27, 1
+st2 { v1.4s, v2.4s }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+add x0, x27, 1
+st2 { v1.8h, v2.8h }, [x27], x28
+add x0, x27, 1
+st2 { v1.16b, v2.16b }, [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[0], [x27], #2
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], #2
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.b, v2.b }[8], [x27], x28
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[4], [x27], #4
+add x0, x27, 1
+st2 { v1.h, v2.h }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], #8
+add x0, x27, 1
+st2 { v1.s, v2.s }[0], [x27], x28
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], #16
+add x0, x27, 1
+st2 { v1.d, v2.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+add x0, x27, 1
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+add x0, x27, 1
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+add x0, x27, 1
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+add x0, x27, 1
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+add x0, x27, 1
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+add x0, x27, 1
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+add x0, x27, 1
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+add x0, x27, 1
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+add x0, x27, 1
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+add x0, x27, 1
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+add x0, x27, 1
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+add x0, x27, 1
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+add x0, x27, 1
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+add x0, x27, 1
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+add x0, x27, 1
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+add x0, x27, 1
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+add x0, x27, 1
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+add x0, x27, 1
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stp s1, s2, [x27], #248
+add x0, x27, 1
+stp d1, d2, [x27], #496
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+add x0, x27, 1
+stp s1, s2, [x27, #248]!
+add x0, x27, 1
+stp d1, d2, [x27, #496]!
+add x0, x27, 1
+stp q1, q2, [x27, #992]!
+add x0, x27, 1
+stp w1, w2, [x27], #248
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+add x0, x27, 1
+stp w1, w2, [x27, #248]!
+add x0, x27, 1
+stp x1, x2, [x27, #496]!
+add x0, x27, 1
+str b1, [x27], #254
+add x0, x27, 1
+str h1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+add x0, x27, 1
+str d1, [x27], #254
+add x0, x27, 1
+str q1, [x27], #254
+add x0, x27, 1
+str b1, [x27, #254]!
+add x0, x27, 1
+str h1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+add x0, x27, 1
+str d1, [x27, #254]!
+add x0, x27, 1
+str q1, [x27, #254]!
+add x0, x27, 1
+str w1, [x27], #254
+add x0, x27, 1
+str x1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+add x0, x27, 1
+str x1, [x27, #254]!
+add x0, x27, 1
+strb w1, [x27], #254
+add x0, x27, 1
+strb w1, [x27, #254]!
+add x0, x27, 1
+strh w1, [x27], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+add x0, x27, 1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+add x0, x27, 1
+ldr x2, [x1], #254
+add x0, x27, 1
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] D========================eeeeeeER. ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.0 0.1 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] D========================eeeeeeER. ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.0 0.1 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] D========================eeeeeeER. ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.0 0.1 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.63
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.8 0.1 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.7 0.1 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.7 0.1 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.77
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 4.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.6 0.1 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.83
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.6 0.1 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.83
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.6 0.1 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3104
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.81
+# CHECK-NEXT: IPC: 0.32
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] .D==============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.7 0.1 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3204
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 4.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeeER . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,5] D===================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==================eeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] .D========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D========================eeeeeeeER. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] .D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.2 0.1 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3204
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 4.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] D=======eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,3] D=============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D=============eeeeeeeER . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,5] .D===================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===================eeeeeeER . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] .D=========================eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=========================eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] .D===============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 26.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.9 0.1 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3504
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.74
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] D=============eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D=============eeeeeeeER . . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,5] .D===================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===================eeeeeeeER. . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] .D==========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D==========================eeeeeeeeER. ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,9] .D==================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 9. 1 35.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.4 0.1 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.7 0.1 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.7 0.1 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.7 0.1 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.7 0.1 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.7 0.1 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.6 0.1 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.72
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.5 0.1 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.65
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.5 0.1 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.6 0.1 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.6 0.1 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.6 0.1 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.6 0.1 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.70
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.6 0.1 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.92
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.95
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 4.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.1 0.1 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.92
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.87
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.87
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.87
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.87
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.87
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.2 0.1 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4204
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.07
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 5.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeeER. . . . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] .D================eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D===============eeeeeeeeER . . . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] . D=======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======================eeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] . D==============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=============================eeeeeeeeeER. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 7. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 30.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.8 0.1 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4304
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] D========eER . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeeER. . . . . .. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] .D================eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D===============eeeeeeeeeER . . . .. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] . D========================eER. . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=======================eeeeeeeeeER . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . D================================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===============================eeeeeeeeER. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 33.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.5 0.1 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4304
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeeER. . . . . .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] .D================eER . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D===============eeeeeeeeER . . . .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . D=======================eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======================eeeeeeeeeER . .. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . D===============================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============================eeeeeeeeeER. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 20.1 0.1 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 4004
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 19.0 0.1 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3604
+# CHECK-NEXT: Total uOps: 3300
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.92
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 3.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D===============eER. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . D======================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======================eeeeeeER . . ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] . D============================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D============================eeeeeeER. ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . D==================================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 7. 1 29.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 9. 1 35.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 18.8 0.1 0.0 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2804
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.61
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . .. ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D======eER. . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . .. ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] D============eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . .. ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] D==================eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] D==================eeeeeeER .. ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] D========================eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======================eeeeER. ldp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] .D===========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.6 0.1 0.0 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1604
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.18
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . . ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eE--R . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . . . ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] D=====eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D=====eeeeER . . ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] D======eE--R . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D======eeeeeER . . ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,7] D===========eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D==========eeeeeER. ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: [0,9] .D===============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 6.0 0.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 7.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 7. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 11.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 9. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.0 0.1 0.4 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ldr h1, [x27], #254
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ldr s1, [x27], #254
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D==================eeeeeeER . . ldr d1, [x27], #254
+# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] D========================eeeeeeER. ldr q1, [x27], #254
+# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.0 0.1 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . . . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D==================eeeeeeER . . ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] D========================eeeeeeER. ldr q1, [x27, #254]!
+# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 16.0 0.1 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeER. ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,7] D====eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeER ldrb w1, [x27], #254
+# CHECK-NEXT: [0,9] D=====eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 1.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeER. ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,7] D====eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeER ldrsb x1, [x27], #254
+# CHECK-NEXT: [0,9] D=====eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 1.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeER. ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,7] D====eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeER ldrsh w1, [x27, #254]!
+# CHECK-NEXT: [0,9] D=====eE--R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 1.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.41
+# CHECK-NEXT: IPC: 1.42
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeER . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeE-R . st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,7] D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeER. st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,9] .D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 1.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.7 0.1 0.7 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] D======eeER . st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.7 0.1 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] D======eeER . st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.7 0.1 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] D======eeER . st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.7 0.1 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.39
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.6 0.1 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.59
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.5 0.1 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.59
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.6 0.1 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 3.39
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 3.59
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 3.39
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.2 0.1 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 3.59
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] . D======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.1 0.1 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 4200
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 4.18
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D====eeER . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] . D======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.8 0.1 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 3.78
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D====eeER . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . D======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.8 0.1 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1604
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.00
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D==eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=eeER . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===eeeeER . . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] .D=======eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D======eeeeER. . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] . D==========eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==========eeeeER. st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D==============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 5. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 7. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 11.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 6.6 0.1 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D========eeeeER. . . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D============eeeeER . . st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============eeeeER. st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.7 0.1 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.10
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D========eeeeER. . . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D============eeeeER . . st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.7 0.1 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.20
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.6 0.1 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.30
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.5 0.1 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.20
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.5 0.1 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D============eeeeER . . st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.7 0.1 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D============eeeeER . . st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.7 0.1 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1304
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.53
+# CHECK-NEXT: IPC: 0.46
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeER . .. st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,1] D=====eER . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeER .. st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] D=========eER .. add x0, x27, #1
+# CHECK-NEXT: [0,4] .D========eeeeER. st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,5] .D============eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.5 0.2 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2404
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.58
+# CHECK-NEXT: IPC: 0.42
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] D=========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D========eeeeeER . . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] .D=============eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=============eeeeeER . . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] . D=================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=================eeeeeER. st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D======================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 14.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 18.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.9 0.1 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2204
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.54
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeER . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] .D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D============eeeeER. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] .D================eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===============eeeeeER. st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.8 0.1 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2104
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.52
+# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeeER . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=====eeeeER . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,3] D=========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D========eeeeER . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] .D============eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D============eeeeER. . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D================eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D===============eeeeER. st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 7. 1 17.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.1 0.1 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeER . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeER. st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.2 0.1 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2204
+# CHECK-NEXT: Total uOps: 3600
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.63
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeER . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=======eeeeER. . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===========eeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] . D==============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==============eeeeeeER. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] . D====================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 7. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 9. 1 21.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 10.3 0.1 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3304
+# CHECK-NEXT: Total uOps: 5800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.76
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] D======eER. . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] .D=====eeeeeeeER . . . .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] .D============eER . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . D===========eeeeeeER . . .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] . D=================eER . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . D================eeeeeeeER . .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] . D=======================eER. .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeeeeER. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] . D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.1 0.1 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 2904
+# CHECK-NEXT: Total uOps: 4800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.65
+# CHECK-NEXT: IPC: 0.34
+# CHECK-NEXT: Block RThroughput: 9.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeER . . . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D====eER . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D===eeeeeeER . . . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] .D=========eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=========eeeeeeER . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . D==============eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==============eeeeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D====================eeeeeeER. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 15.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 21.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 13.0 0.1 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3204
+# CHECK-NEXT: Total uOps: 5200
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.62
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D=======eER . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] .D=============eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . D============eeeeeeER . . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] . D==================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D==================eeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] . D=======================eER. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D=======================eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . D=============================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 19.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 24.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 15.9 0.1 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D================eeeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . D======================eER . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . D======================eeeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . D===========================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 17.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 23.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 23.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 28.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 14.8 0.1 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total uOps: 2800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.40
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] D======eER. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D======eeeeeeER. . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D===========eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D===========eeeeER . . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] .D===============eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D===============eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===================eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 12.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 5. 1 16.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 16.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 11.4 0.1 0.0 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 404
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeER. . stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,1] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER. stp d1, d2, [x27], #496
+# CHECK-NEXT: [0,3] D====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.0 0.3 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 904
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.43
+# CHECK-NEXT: IPC: 1.11
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER . . stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D=====eeER . stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,7] .D=======eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======eER. stp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] .D========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.5 0.1 0.0 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.84
+# CHECK-NEXT: IPC: 1.42
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eER. . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eER . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeER . str b1, [x27], #254
+# CHECK-NEXT: [0,7] .D====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeER. str h1, [x27], #254
+# CHECK-NEXT: [0,9] .D======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.6 0.1 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . str s1, [x27], #254
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . str d1, [x27], #254
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER . . str q1, [x27], #254
+# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] D======eeER . str b1, [x27, #254]!
+# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=======eeER. str h1, [x27, #254]!
+# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.7 0.1 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 804
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 2.49
+# CHECK-NEXT: IPC: 1.24
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. .. str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] D==eER .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER .. str d1, [x27, #254]!
+# CHECK-NEXT: [0,3] D====eER .. add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER .. str q1, [x27, #254]!
+# CHECK-NEXT: [0,5] D======eER.. add x0, x27, #1
+# CHECK-NEXT: [0,6] D======eER.. str w1, [x27], #254
+# CHECK-NEXT: [0,7] .D======eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D======eER. str x1, [x27], #254
+# CHECK-NEXT: [0,9] .D=======eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.3 0.1 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1000
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 3.97
+# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eER. . str x1, [x27, #254]!
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eER . strb w1, [x27], #254
+# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eER . strb w1, [x27, #254]!
+# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eER. strh w1, [x27], #254
+# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 104
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 3.85
+# CHECK-NEXT: IPC: 1.92
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234
+
+# CHECK: [0,0] DeER. strh w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.5 0.5 0.0 <total>
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 110
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 5.45
+# CHECK-NEXT: IPC: 3.64
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,2] D====eeeeER ldr x2, [x1], #254
+# CHECK-NEXT: [0,3] D=eE------R add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.5 0.3 2.0 <total>
More information about the llvm-commits
mailing list