[llvm] 475d687 - [AArch64] Fix postinc operands for Cortex-A55 scheduling
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 10 04:56:40 PDT 2023
Author: David Green
Date: 2023-10-10T12:56:33+01:00
New Revision: 475d687ac1123f7ea01bc90789d93b5938930ca1
URL: https://github.com/llvm/llvm-project/commit/475d687ac1123f7ea01bc90789d93b5938930ca1
DIFF: https://github.com/llvm/llvm-project/commit/475d687ac1123f7ea01bc90789d93b5938930ca1.diff
LOG: [AArch64] Fix postinc operands for Cortex-A55 scheduling
Similar to D159254, this fixes the order of WriteAdr operands on post/pre-inc
loads/stores in the Cortex-A55 scheduling model.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SchedA55.td
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-writeback.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
index 533fb9330260606..cb77be350d12444 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -285,16 +285,16 @@ def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
// 2-element structures
def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
@@ -302,10 +302,10 @@ def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$"
def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
// 3-element structures
def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
@@ -313,10 +313,10 @@ def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$"
def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
// 4-element structures
def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
@@ -324,10 +324,10 @@ def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$"
def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
//---
// Vector Stores
@@ -337,28 +337,28 @@ def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)
def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
//---
// Floating Point Conversions, MAC, DIV, SQRT
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-writeback.s
index 76f46ccf0c5cb4a..0fded5f8f08f49f 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-writeback.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-writeback.s
@@ -1162,28 +1162,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3701
+# CHECK-NEXT: Total Cycles: 2201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.41
-# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 01234567
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
-# CHECK: [0,0] DeeeE. . . . . . . . ld1 { v1.1d }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.2d }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE. . . . . ld1 { v1.2s }, [x27], #8
-# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeE . . . ld1 { v1.4h }, [x27], #8
-# CHECK-NEXT: [0,7] . . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeeE. . ld1 { v1.4s }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE. . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,3] . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1208,28 +1208,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3801
+# CHECK-NEXT: Total Cycles: 2301
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.39
-# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: uOps Per Cycle: 0.65
+# CHECK-NEXT: IPC: 0.43
# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345678
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeE. . . . . . . . ld1 { v1.8b }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.8h }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . ld1 { v1.16b }, [x27], #16
-# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeE . . . ld1 { v1.1d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeeE . ld1 { v1.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE. . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,3] . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE. . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1254,28 +1254,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3701
+# CHECK-NEXT: Total Cycles: 2201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.41
-# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 01234567
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
-# CHECK: [0,0] DeeeE. . . . . . . . ld1 { v1.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . . ld1 { v1.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE. . . . . ld1 { v1.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeE . . . ld1 { v1.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeeE. . ld1 { v1.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1300,28 +1300,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4201
+# CHECK-NEXT: Total Cycles: 2701
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.36
-# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: uOps Per Cycle: 0.56
+# CHECK-NEXT: IPC: 0.37
# CHECK-NEXT: Block RThroughput: 12.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 012
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
-# CHECK: [0,0] DeeeeE . . . . . . . . ld1 { v1.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld1 { v1.1d, v2.1d }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeeeE . . . . . ld1 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . .DeeeeE . . . ld1 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE. . ld1 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeeeE . . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1346,28 +1346,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4601
+# CHECK-NEXT: Total Cycles: 3101
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.33
-# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: uOps Per Cycle: 0.48
+# CHECK-NEXT: IPC: 0.32
# CHECK-NEXT: Block RThroughput: 16.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123456
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . .. ld1 { v1.4s, v2.4s }, [x27], #32
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . .. ld1 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . .. ld1 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . .. ld1 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . . DeeE . .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeE .. ld1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . .. ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . .. ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE. . .. ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . .DeeE. . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeeE .. ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . .DeeeeE ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1392,28 +1392,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4401
+# CHECK-NEXT: Total Cycles: 2901
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.34
-# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: uOps Per Cycle: 0.52
+# CHECK-NEXT: IPC: 0.34
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 01234
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
-# CHECK: [0,0] DeeeeeeE . . . . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeE . . . . . ld1 { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . .DeeeeeeE . . . ld1 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . .DeeeeE . ld1 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . .DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeeeE . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeeE. . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1438,28 +1438,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5001
+# CHECK-NEXT: Total Cycles: 3501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.30
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld1 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeE . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
-# CHECK-NEXT: [0,5] . . . . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeeeE . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . .DeeeeeE . ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE. . . . . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeeeeE. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeE ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1484,28 +1484,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5401
+# CHECK-NEXT: Total Cycles: 3901
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.28
-# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: uOps Per Cycle: 0.38
+# CHECK-NEXT: IPC: 0.26
# CHECK-NEXT: Block RThroughput: 24.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 01234
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeE . . . . . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeeeE . . . . . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . .DeeeeeE . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeeeE. . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeE . ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,9] . . . . . . . . . . .DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeeeeE . . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeeeeE . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeeeE ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1530,28 +1530,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5101
+# CHECK-NEXT: Total Cycles: 3601
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.29
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.28
# CHECK-NEXT: Block RThroughput: 21.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 01
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeE . . . . . . . . .. ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeeeE . . . . . . .. ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . .DeeeeeE . . . . .. ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeE . . .. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . .DeeE. . .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeeeE .. ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . .. ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeeeeE . . . .. ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . .. ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeE . .. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeeeE ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1576,28 +1576,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5701
+# CHECK-NEXT: Total Cycles: 4201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.26
-# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.24
# CHECK-NEXT: Block RThroughput: 27.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 01234567
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeE . . . . . . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeeeE . . . . . . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . .DeeeeeeeeE . . . . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeE . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeeeE. . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeE . . . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeeeeE . . . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeeeE. . . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeeeE . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . .DeeeeeeeeeeE ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1622,28 +1622,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5801
+# CHECK-NEXT: Total Cycles: 4301
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.26
-# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 28.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 012345678
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeeeeeE . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,5] . . . . . . .DeeE. . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeE . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeeeE . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE. . . . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeeeeeE . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeeeE . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeeeeeeeE ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1668,28 +1668,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5801
+# CHECK-NEXT: Total Cycles: 4301
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.26
-# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 28.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 012345678
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeeeeeE . . . . . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,1] . . .DeeE. . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . . DeeeeeeE . . . . . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . . DeeeeeeeeeeE . . . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeE . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . . DeeeeeeE . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeeeE . . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . .DeeeeeeE . . . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeeeeeE. . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . .DeeeeeeE ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1714,28 +1714,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5901
+# CHECK-NEXT: Total Cycles: 4401
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.25
-# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: uOps Per Cycle: 0.34
+# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 29.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeeeeeE . . . . . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,1] . . .DeeE. . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . . DeeeeeeE . . . . . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . . DeeeeeeeeeeE . . . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeeeeeE. . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . . . DeeeE . ld1 { v1.b }[0], [x27], #1
-# CHECK-NEXT: [0,9] . . . . . . . . . . . .DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234
+
+# CHECK: [0,0] DeeeeeeeeeeE . . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . .DeeeeeeE . . . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeeeeeE. . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeeeeeE . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeE ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,9] . . . . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1760,28 +1760,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.b }[8], [x27], #1
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.b }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.b }[8], [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.h }[0], [x27], #2
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.h }[4], [x27], #2
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1806,28 +1806,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.h }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.h }[4], [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.s }[0], [x27], #4
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.s }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.d }[0], [x27], #8
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1852,28 +1852,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.d }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.1d }, [x27], #8
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.2d }, [x27], #8
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.2s }, [x27], #4
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.4h }, [x27], #2
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1898,28 +1898,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1r { v1.4s }, [x27], #4
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.8b }, [x27], #1
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.8h }, [x27], #2
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.16b }, [x27], #1
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.1d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1944,28 +1944,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1r { v1.2d }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.2s }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.4h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1990,28 +1990,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeE. . . . . . . . ld1r { v1.8h }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . . ld1r { v1.16b }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeeeE . . . . ld2 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeeeE . . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2036,28 +2036,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4801
+# CHECK-NEXT: Total Cycles: 3301
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.31
-# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: uOps Per Cycle: 0.45
+# CHECK-NEXT: IPC: 0.30
# CHECK-NEXT: Block RThroughput: 18.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 012345678
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld2 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld2 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld2 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE. . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeeE . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . .DeeeeeeE ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2082,28 +2082,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4401
+# CHECK-NEXT: Total Cycles: 2901
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.34
-# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: uOps Per Cycle: 0.52
+# CHECK-NEXT: IPC: 0.34
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 01234
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
-# CHECK: [0,0] DeeeeE . . . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2 { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeeeE . . . . . ld2 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . .DeeeeE . . . ld2 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeeeE . ld2 { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . .DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeeeE . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeeE ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2128,28 +2128,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4201
+# CHECK-NEXT: Total Cycles: 2701
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.36
-# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: uOps Per Cycle: 0.56
+# CHECK-NEXT: IPC: 0.37
# CHECK-NEXT: Block RThroughput: 12.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 012
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
-# CHECK: [0,0] DeeeeeeE . . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeE . . . . . ld2 { v1.b, v2.b }[8], [x27], #2
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . .DeeeeE . . . ld2 { v1.b, v2.b }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE. . ld2 { v1.b, v2.b }[8], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeeeE . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2174,28 +2174,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld2 { v1.h, v2.h }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2 { v1.h, v2.h }[4], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2 { v1.s, v2.s }[0], [x27], #8
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2220,28 +2220,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2 { v1.d, v2.d }[0], [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld2 { v1.d, v2.d }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2r { v1.1d, v2.1d }, [x27], #16
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2r { v1.2d, v2.2d }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2266,28 +2266,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2r { v1.4h, v2.4h }, [x27], #4
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld2r { v1.4s, v2.4s }, [x27], #8
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2r { v1.8b, v2.8b }, [x27], #2
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2r { v1.8h, v2.8h }, [x27], #4
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2312,28 +2312,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld2r { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld2r { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2r { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2r { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2358,28 +2358,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4401
+# CHECK-NEXT: Total Cycles: 2901
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.34
-# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: uOps Per Cycle: 0.52
+# CHECK-NEXT: IPC: 0.34
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 01234
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
-# CHECK: [0,0] DeeeeE . . . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2r { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . . ld2r { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . . ld2r { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeeeeeE . ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: [0,9] . . . . . . . . .DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . . ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeeeeeE ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2404,28 +2404,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5101
+# CHECK-NEXT: Total Cycles: 3601
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.29
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.28
# CHECK-NEXT: Block RThroughput: 21.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 01
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeE . . . . . . . . .. ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . .. ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeeeE . . . . .. ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeE . . .. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
-# CHECK-NEXT: [0,7] . . . . . . . .DeeE. . .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeeeE .. ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . .. ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . . .. ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,3] . . DeeE . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeeeE . . .. ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] . . . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeE . .. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeeeE ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2450,28 +2450,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5401
+# CHECK-NEXT: Total Cycles: 3901
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.28
-# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: uOps Per Cycle: 0.38
+# CHECK-NEXT: IPC: 0.26
# CHECK-NEXT: Block RThroughput: 24.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 01234
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeeeE. . . . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . . DeeeeeeeeE . . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . . DeeeeeE . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . DeeeeeE. . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeE . ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . . .DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeE. . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] . .DeeE. . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeE . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeE. . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeeE . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeeeE ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2496,28 +2496,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4901
+# CHECK-NEXT: Total Cycles: 3401
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.31
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.44
+# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 19.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeE . . . . . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeeeE . . . . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . .DeeeeeeeeE . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . DeeeeE . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . .DeeeeE . ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
-# CHECK-NEXT: [0,9] . . . . . . . . . .DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeeeeE . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeeeE. . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeE. . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,7] . . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeE ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] . . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2542,28 +2542,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2588,28 +2588,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2634,28 +2634,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2680,28 +2680,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2726,28 +2726,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2772,28 +2772,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5601
+# CHECK-NEXT: Total Cycles: 4101
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.27
-# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.24
# CHECK-NEXT: Block RThroughput: 26.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0123456
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . . . . .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeeeeeE. . . . . . . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . . DeeeeeeE. . . . . .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeE. . . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeeeeE .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01
+
+# CHECK: [0,0] DeeeeE . . . . . . .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeeeeeeeE . . . . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] . . . DeeE . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeeeE . . . .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeeeE . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeeeeeE ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2818,28 +2818,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 6201
+# CHECK-NEXT: Total Cycles: 4701
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.24
-# CHECK-NEXT: IPC: 0.16
+# CHECK-NEXT: uOps Per Cycle: 0.32
+# CHECK-NEXT: IPC: 0.21
# CHECK-NEXT: Block RThroughput: 32.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789 012
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeeeeeE . . . . . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . . DeeeeeeeeeeE . . . . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,5] . . . . . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeeeeeE. . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . . . DeeeeeeE. . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeeeE . . . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeeeeeE. . . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . . DeeeeeeeeeeE . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2864,28 +2864,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 6201
+# CHECK-NEXT: Total Cycles: 4701
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.24
-# CHECK-NEXT: IPC: 0.16
+# CHECK-NEXT: uOps Per Cycle: 0.32
+# CHECK-NEXT: IPC: 0.21
# CHECK-NEXT: Block RThroughput: 32.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789 012
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeeeeeE . . . . . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . .DeeE. . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . . DeeeeeeE . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . .DeeE. . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeeeeeE . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . . DeeeeeeeeeeE. . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeeeeE . . . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeeeeeeeE . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . .DeeeeeeeeeeE ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2910,28 +2910,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2956,28 +2956,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3002,28 +3002,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3048,28 +3048,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3094,28 +3094,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3140,28 +3140,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3301
+# CHECK-NEXT: Total Cycles: 2401
# CHECK-NEXT: Total uOps: 1700
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.51
-# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.42
# CHECK-NEXT: Block RThroughput: 9.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeE . . ldp s1, s2, [x27], #248
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeeE ldp d1, d2, [x27], #496
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeE . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3508,28 +3508,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2201
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.68
-# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeE . . . . . ldrsh x1, [x27, #254]!
-# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeE . . . . ldrsw x1, [x27], #254
-# CHECK-NEXT: [0,3] . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeE . . . ldrsw x1, [x27, #254]!
-# CHECK-NEXT: [0,5] . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . DeeeE. . . st1 { v1.1d }, [x27], #8
-# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . DeeeE. . st1 { v1.2d }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeE . . . . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeE . . . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeE . . ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3554,28 +3554,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.2s }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.4h }, [x27], #8
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.4s }, [x27], #16
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.8b }, [x27], #8
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.8h }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3600,28 +3600,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.16b }, [x27], #16
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.1d }, [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.2d }, [x27], x28
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.2s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.4h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3646,28 +3646,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.4s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.8b }, [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.8h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.16b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.1d, v2.1d }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3692,30 +3692,30 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
-
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.4s, v2.4s }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: Average Wait times (based on the timeline view):
+# CHECK: [0,0] DeeeE. . . . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+
+# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
@@ -3738,28 +3738,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3784,28 +3784,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3830,28 +3830,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3876,28 +3876,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3922,28 +3922,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3968,28 +3968,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 18.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4014,28 +4014,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4060,28 +4060,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4106,28 +4106,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2703
+# CHECK-NEXT: Total Cycles: 2201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.55
-# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
# CHECK-NEXT: Block RThroughput: 11.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 012
-# CHECK: [0,0] DeeeeE . . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . st1 { v1.b }[0], [x27], #1
-# CHECK-NEXT: [0,5] . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeE . . st1 { v1.b }[8], [x27], #1
-# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeE . st1 { v1.b }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4152,28 +4152,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.b }[8], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.h }[0], [x27], #2
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.h }[4], [x27], #2
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.h }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.h }[4], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4198,28 +4198,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2603
+# CHECK-NEXT: Total Cycles: 2101
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.58
-# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.48
# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012345678
-
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.s }[0], [x27], #4
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.s }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.d }[0], [x27], #8
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.d }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeeE . st2 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] .DeeE. . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,5] . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeeE st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4244,28 +4244,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.4s, v2.4s }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4290,28 +4290,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 16.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4336,28 +4336,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.b, v2.b }[0], [x27], #2
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.b, v2.b }[8], [x27], #2
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4382,28 +4382,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.b, v2.b }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.b, v2.b }[8], [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.h, v2.h }[0], [x27], #4
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.h, v2.h }[4], [x27], #4
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.h, v2.h }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4428,28 +4428,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st2 { v1.h, v2.h }[4], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st2 { v1.s, v2.s }[0], [x27], #8
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st2 { v1.s, v2.s }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st2 { v1.d, v2.d }[0], [x27], #16
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st2 { v1.d, v2.d }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4474,24 +4474,24 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
-# CHECK-NEXT: Total Cycles: 1803
+# CHECK-NEXT: Total Cycles: 1501
# CHECK-NEXT: Total uOps: 900
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 12.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 0
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeE . . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: [0,1] . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,3] . . .DeeE. . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: [0,5] . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,1] . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4512,28 +4512,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4558,28 +4558,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4604,28 +4604,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 12.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.b, v2.b, v3.b }[8], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4650,28 +4650,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.s, v2.s, v3.s }[0], [x27], #12
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4696,28 +4696,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4742,28 +4742,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4788,28 +4788,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4834,28 +4834,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4880,28 +4880,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4926,26 +4926,26 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
-# CHECK-NEXT: Total Cycles: 2403
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1200
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeeE . . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
-# CHECK-NEXT: [0,1] . DeeE . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
-# CHECK-NEXT: [0,5] . . . . DeeE .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
More information about the llvm-commits
mailing list