[llvm] 6c4b3dc - [AArch64] Fix postinc operands for Cortex-A53 scheduling
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 10 02:14:51 PDT 2023
Author: David Green
Date: 2023-10-10T10:14:44+01:00
New Revision: 6c4b3dc340d1afda02e68ce42ebafa77fa076797
URL: https://github.com/llvm/llvm-project/commit/6c4b3dc340d1afda02e68ce42ebafa77fa076797
DIFF: https://github.com/llvm/llvm-project/commit/6c4b3dc340d1afda02e68ce42ebafa77fa076797.diff
LOG: [AArch64] Fix postinc operands for Cortex-A53 scheduling
Similar to D159254, this fixes the order of WriteAdr operands on post/pre-inc
loads/stores in the Cortex-A53 scheduling model.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SchedA53.td
llvm/test/tools/llvm-mca/AArch64/Cortex/A53-writeback.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td
index 67e2b07692509ac..3e4168f5f445f58 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td
@@ -215,39 +215,39 @@ def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
def : InstRW<[A53WriteVLD3], (instregex "LD3Threev2d$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev2d_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD3], (instregex "LD3Threev2d_POST$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVLD4], (instregex "LD4Fourv(2d)_POST$")>;
//---
// Vector Stores
@@ -257,32 +257,32 @@ def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>;
def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST3Threev(2d)_POST$")>;
def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
-def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[WriteAdr, A53WriteVST2], (instregex "ST4Fourv(2d)_POST$")>;
//---
// Floating Point MAC, DIV, SQRT
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A53-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A53-writeback.s
index c5ca6f9f1764aa0..ed5b0869e53ded9 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A53-writeback.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A53-writeback.s
@@ -1162,28 +1162,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.1d }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.2d }, [x27], #16
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.2s }, [x27], #8
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.4h }, [x27], #8
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.4s }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1208,28 +1208,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.8b }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.8h }, [x27], #16
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.16b }, [x27], #16
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.1d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1254,28 +1254,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1300,28 +1300,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3901
+# CHECK-NEXT: Total Cycles: 2401
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.38
-# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.42
# CHECK-NEXT: Block RThroughput: 9.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeE. . . . . . . . ld1 { v1.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.1d, v2.1d }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . ld1 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE . . . ld1 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . .DeeeeE . ld1 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . .DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeE. . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE. . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE. . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE. . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1346,28 +1346,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld1 { v1.4s, v2.4s }, [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld1 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld1 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1392,28 +1392,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld1 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld1 { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld1 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld1 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1438,28 +1438,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4301
+# CHECK-NEXT: Total Cycles: 2801
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.35
-# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: uOps Per Cycle: 0.54
+# CHECK-NEXT: IPC: 0.36
# CHECK-NEXT: Block RThroughput: 13.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123
-
-# CHECK: [0,0] DeeeeE . . . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld1 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeeE . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeeE . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeeE . ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeE . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeeE . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . .DeeeeeE . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeE ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1484,28 +1484,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4501
+# CHECK-NEXT: Total Cycles: 3001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.33
-# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 15.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 012345
-
-# CHECK: [0,0] DeeeeeE . . . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeE. . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeE . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,7] . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . .DeeeeeE . ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeE. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeE ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1530,28 +1530,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4501
+# CHECK-NEXT: Total Cycles: 3001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.33
-# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 15.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 012345
-
-# CHECK: [0,0] DeeeeeE . . . . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeE. . . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeE . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . .DeeeeeE . ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeE. . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeE ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1576,28 +1576,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4701
+# CHECK-NEXT: Total Cycles: 3201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.32
-# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: uOps Per Cycle: 0.47
+# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 17.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 01234567
-
-# CHECK: [0,0] DeeeeeE . . . . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeE. . . . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE. . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE. . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeE . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeeE . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeeE ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1622,28 +1622,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5001
+# CHECK-NEXT: Total Cycles: 3501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.30
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE. . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeeE . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeE ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1668,28 +1668,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5001
+# CHECK-NEXT: Total Cycles: 3501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.30
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE. . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeeE . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeE ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1714,28 +1714,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4701
+# CHECK-NEXT: Total Cycles: 3201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.32
-# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: uOps Per Cycle: 0.47
+# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 17.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 01234567
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeE. . ld1 { v1.b }[0], [x27], #1
-# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE. . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeeE . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeE ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1760,28 +1760,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.b }[8], [x27], #1
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.b }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.b }[8], [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.h }[0], [x27], #2
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.h }[4], [x27], #2
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1806,28 +1806,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.h }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1 { v1.h }[4], [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1 { v1.s }[0], [x27], #4
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1 { v1.s }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1 { v1.d }[0], [x27], #8
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1852,28 +1852,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1 { v1.d }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.1d }, [x27], #8
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.2d }, [x27], #8
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.2s }, [x27], #4
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.4h }, [x27], #2
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1898,28 +1898,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1r { v1.4s }, [x27], #4
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.8b }, [x27], #1
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.8h }, [x27], #2
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.16b }, [x27], #1
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.1d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1944,28 +1944,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld1r { v1.2d }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld1r { v1.2s }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld1r { v1.4h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld1r { v1.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld1r { v1.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1990,28 +1990,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeE. . . . . . . . ld1r { v1.8h }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . . ld1r { v1.16b }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeeeE . . . . ld2 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld2 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld2 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeE. . . . . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeeeE . . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2036,28 +2036,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4801
+# CHECK-NEXT: Total Cycles: 3301
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.31
-# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: uOps Per Cycle: 0.45
+# CHECK-NEXT: IPC: 0.30
# CHECK-NEXT: Block RThroughput: 18.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 012345678
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld2 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld2 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld2 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeE . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE. . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeeE . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . .DeeeeeeE ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2082,28 +2082,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4401
+# CHECK-NEXT: Total Cycles: 2901
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.34
-# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: uOps Per Cycle: 0.52
+# CHECK-NEXT: IPC: 0.34
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 01234
-
-# CHECK: [0,0] DeeeeE . . . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . . ld2 { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeeeE . . . . . ld2 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . .DeeeeE . . . ld2 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeeeE . ld2 { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . .DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeeeE . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeeE ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2128,28 +2128,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3801
+# CHECK-NEXT: Total Cycles: 2301
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.39
-# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: uOps Per Cycle: 0.65
+# CHECK-NEXT: IPC: 0.43
# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345678
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE. . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeE . . . . ld2 { v1.b, v2.b }[8], [x27], #2
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeE . . . ld2 { v1.b, v2.b }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . .DeeeE . ld2 { v1.b, v2.b }[8], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeE . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeE . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . .DeeeE . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2174,28 +2174,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld2 { v1.h, v2.h }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld2 { v1.h, v2.h }[4], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld2 { v1.s, v2.s }[0], [x27], #8
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2220,28 +2220,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2 { v1.d, v2.d }[0], [x27], #16
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld2 { v1.d, v2.d }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld2r { v1.1d, v2.1d }, [x27], #16
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld2r { v1.2d, v2.2d }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2266,28 +2266,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2r { v1.4h, v2.4h }, [x27], #4
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld2r { v1.4s, v2.4s }, [x27], #8
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld2r { v1.8b, v2.8b }, [x27], #2
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld2r { v1.8h, v2.8h }, [x27], #4
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2312,28 +2312,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3501
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . ld2r { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . ld2r { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . ld2r { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE . ld2r { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2358,28 +2358,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3701
+# CHECK-NEXT: Total Cycles: 2201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.41
-# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 01234567
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
-# CHECK: [0,0] DeeeE. . . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeE . . . . . . ld2r { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . . . ld2r { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeE . . . ld2r { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. . ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeeeE ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2404,28 +2404,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5001
+# CHECK-NEXT: Total Cycles: 3501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.30
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 20.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 012345
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE. . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeeeE . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeE ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2450,28 +2450,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4901
+# CHECK-NEXT: Total Cycles: 3401
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.31
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.44
+# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 19.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,3] . . . .DeeE. . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeE . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeE . ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . .DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeeE . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeE ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2496,28 +2496,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4601
+# CHECK-NEXT: Total Cycles: 3101
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.33
-# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: uOps Per Cycle: 0.48
+# CHECK-NEXT: IPC: 0.32
# CHECK-NEXT: Block RThroughput: 16.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123456
-
-# CHECK: [0,0] DeeeeeeE . . . . . . . .. ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . .. ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeE . . . .. ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeE . .. ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
-# CHECK-NEXT: [0,7] . . . . . . . DeeE . .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeE .. ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
-# CHECK-NEXT: [0,9] . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeE . . . . .. ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeE. . . .. ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . .DeeE. . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeE . .. ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . .DeeeeE .. ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,7] . . . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . .DeeeeE ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2542,28 +2542,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2588,28 +2588,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
-# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2634,28 +2634,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2680,28 +2680,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2726,28 +2726,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2772,28 +2772,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5101
+# CHECK-NEXT: Total Cycles: 3601
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.29
-# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.28
# CHECK-NEXT: Block RThroughput: 21.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 01
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . . . .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeeE . . . . . . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . DeeeeeeeE . . . . .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . . .DeeE. . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . DeeeeeeeE . . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . DeeeeeeeE .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,9] . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeE . . . . . .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeeeE . . . . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] . . DeeE . . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeeeE . . .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] . . . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeeeE . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . DeeeeeeeE ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2818,28 +2818,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5401
+# CHECK-NEXT: Total Cycles: 3901
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.28
-# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: uOps Per Cycle: 0.38
+# CHECK-NEXT: IPC: 0.26
# CHECK-NEXT: Block RThroughput: 24.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 01234
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeeE . . . . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . .DeeeeeeeE. . . . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . . DeeeeeeeE . . . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeE . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeE . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . . .DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeE . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeE . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeeeeE. . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeeeE . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . .DeeeeeeeE ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2864,28 +2864,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 5501
+# CHECK-NEXT: Total Cycles: 4001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.27
-# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 25.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 012345
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeeeeE . . . . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,1] . . DeeE . . . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . .DeeeeeeeE. . . . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,3] . . . . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . . . DeeeeeeeE . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . . . DeeeeeeeE . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . . . DeeeeeeeE . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeeeeeE . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . . DeeeeeeeE . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . .DeeeeeeeE. . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . . DeeeeeeeE . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . . . DeeeeeeeE ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2910,28 +2910,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2956,28 +2956,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3002,28 +3002,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3048,28 +3048,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3094,28 +3094,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 4001
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.37
-# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0
-
-# CHECK: [0,0] DeeeeE . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeeE. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . . DeeeeE . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3140,28 +3140,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3201
+# CHECK-NEXT: Total Cycles: 2301
# CHECK-NEXT: Total uOps: 1700
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.53
-# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: uOps Per Cycle: 0.74
+# CHECK-NEXT: IPC: 0.43
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . .DeeeeE . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . . DeeeE . . ldp s1, s2, [x27], #248
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeE ldp d1, d2, [x27], #496
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeE . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE. . ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3508,28 +3508,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2201
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.68
-# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . ldrsh x1, [x27, #254]!
-# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE . . . . ldrsw x1, [x27], #254
-# CHECK-NEXT: [0,3] . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE . . . ldrsw x1, [x27, #254]!
-# CHECK-NEXT: [0,5] . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . DeeeE. . . st1 { v1.1d }, [x27], #8
-# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . DeeeE. . st1 { v1.2d }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3554,28 +3554,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.2s }, [x27], #8
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.4h }, [x27], #8
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.4s }, [x27], #16
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.8b }, [x27], #8
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.8h }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3600,28 +3600,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.16b }, [x27], #16
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.1d }, [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.2d }, [x27], x28
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.2s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.4h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3646,28 +3646,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.4s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.8b }, [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.8h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.16b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.1d, v2.1d }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3692,28 +3692,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.4s, v2.4s }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3738,28 +3738,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3784,28 +3784,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
-
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeE. . . . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3830,28 +3830,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3876,28 +3876,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3922,28 +3922,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3968,28 +3968,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4014,28 +4014,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4060,28 +4060,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4106,28 +4106,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2703
+# CHECK-NEXT: Total Cycles: 2201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.55
-# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 012
-# CHECK: [0,0] DeeeeE . . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . st1 { v1.b }[0], [x27], #1
-# CHECK-NEXT: [0,5] . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeE . . st1 { v1.b }[8], [x27], #1
-# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeE . st1 { v1.b }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4152,28 +4152,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.b }[8], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.h }[0], [x27], #2
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.h }[4], [x27], #2
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.h }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st1 { v1.h }[4], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4198,28 +4198,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2603
+# CHECK-NEXT: Total Cycles: 2101
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.58
-# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.48
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012345678
-
-# CHECK: [0,0] DeeeE. . . . . . st1 { v1.s }[0], [x27], #4
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st1 { v1.s }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st1 { v1.d }[0], [x27], #8
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st1 { v1.d }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeeE . st2 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeE. . . .. st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] .DeeE. . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . .. st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . .. st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,5] . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE .. st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeeE st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4244,28 +4244,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2703
+# CHECK-NEXT: Total Cycles: 2201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.55
-# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 012
-# CHECK: [0,0] DeeeE. . . . . . st2 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st2 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeeE . . . st2 { v1.4s, v2.4s }, [x27], #32
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . .DeeeE . . st2 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . .DeeeeE . st2 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . . st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . . st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . . st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,5] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4290,28 +4290,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2803
+# CHECK-NEXT: Total Cycles: 2301
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.54
-# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: uOps Per Cycle: 0.65
+# CHECK-NEXT: IPC: 0.43
# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . st2 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . st2 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeE . . . st2 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeE . . st2 { v1.4h, v2.4h }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE . st2 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeE . . . . st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE. . . st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,5] . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4336,28 +4336,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2703
+# CHECK-NEXT: Total Cycles: 2201
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.55
-# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: uOps Per Cycle: 0.68
+# CHECK-NEXT: IPC: 0.45
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 012
-# CHECK: [0,0] DeeeE. . . . . . st2 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeeE . . . . st2 { v1.8h, v2.8h }, [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . .DeeeeE . . . st2 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,5] . . . .DeeE. . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeE . . st2 { v1.b, v2.b }[0], [x27], #2
-# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeE . st2 { v1.b, v2.b }[8], [x27], #2
-# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE. . . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE. . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,5] . . .DeeE. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . . st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,7] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeE st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4382,28 +4382,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st2 { v1.b, v2.b }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st2 { v1.b, v2.b }[8], [x27], x28
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st2 { v1.h, v2.h }[0], [x27], #4
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st2 { v1.h, v2.h }[4], [x27], #4
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st2 { v1.h, v2.h }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4428,28 +4428,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeE. . . . . . st2 { v1.h, v2.h }[4], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . DeeeE. . . . . st2 { v1.s, v2.s }[0], [x27], #8
-# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . DeeeE. . . . st2 { v1.s, v2.s }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . DeeeE. . . st2 { v1.d, v2.d }[0], [x27], #16
-# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . DeeeE. . st2 { v1.d, v2.d }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeE. . . . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] .DeeE. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeE . . . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,3] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeE . . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeE . st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,7] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . .DeeeE st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4474,24 +4474,24 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
-# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total Cycles: 1701
# CHECK-NEXT: Total uOps: 900
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.45
-# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: uOps Per Cycle: 0.53
+# CHECK-NEXT: IPC: 0.35
# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeE . . . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeeE . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,3] . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeeE. . st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: [0,5] . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,1] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeeE . . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . .DeeeeeE st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,5] . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4512,28 +4512,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3403
+# CHECK-NEXT: Total Cycles: 2901
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.44
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.52
+# CHECK-NEXT: IPC: 0.34
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 0123456789
-# CHECK: [0,0] DeeeeeE . . . . . .. st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,1] . .DeeE. . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeE . . . . .. st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
-# CHECK-NEXT: [0,3] . . . DeeE . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeeE . . .. st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,5] . . . . DeeE . . .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeeeE . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,7] . . . . . . DeeE .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeeE .. st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeeE . . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeE. . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4558,28 +4558,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total Cycles: 3001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 15.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
-# CHECK: [0,0] DeeeeeE . . . . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeeeE . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. . st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeeE . . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeE. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeE st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4604,28 +4604,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3103
+# CHECK-NEXT: Total Cycles: 2601
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.48
-# CHECK-NEXT: IPC: 0.32
+# CHECK-NEXT: uOps Per Cycle: 0.58
+# CHECK-NEXT: IPC: 0.38
# CHECK-NEXT: Block RThroughput: 11.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
-# CHECK: [0,0] DeeeeeE . . . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeE . . . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
-# CHECK-NEXT: [0,3] . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE. . . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE . st3 { v1.b, v2.b, v3.b }[8], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeeE . . . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeE . . .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,3] . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . .DeeeeE . .. st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . .DeeeeE .. st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . .DeeeeE st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4650,28 +4650,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st3 { v1.s, v2.s, v3.s }[0], [x27], #12
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4696,28 +4696,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3103
+# CHECK-NEXT: Total Cycles: 2601
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.48
-# CHECK-NEXT: IPC: 0.32
+# CHECK-NEXT: uOps Per Cycle: 0.58
+# CHECK-NEXT: IPC: 0.38
# CHECK-NEXT: Block RThroughput: 11.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
-# CHECK: [0,0] DeeeeE . . . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeeE . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . .. st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . .. st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,3] . . DeeE . . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . .. st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . .. add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . DeeE .. add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeeE st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4742,28 +4742,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total Cycles: 3001
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 15.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
-# CHECK: [0,0] DeeeeeE . . . . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,3] . . . DeeE . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeeE . . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,5] . . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . .DeeeeeE . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,7] . . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeeeE. . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeeE . . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeeE . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeE. . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] . . . . .DeeE. . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeE st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4788,28 +4788,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3403
+# CHECK-NEXT: Total Cycles: 2901
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.44
-# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.52
+# CHECK-NEXT: IPC: 0.34
# CHECK-NEXT: Block RThroughput: 14.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 0123456789
-# CHECK: [0,0] DeeeeE . . . . . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeeE . . . . .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeeE. . . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,5] . . . . DeeE . . .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeeE . .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . . . . . .DeeE. .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . . DeeeeeE .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeeE . . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . .DeeeeeE . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeeE . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeeE st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] . . . . . .DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4834,28 +4834,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3203
+# CHECK-NEXT: Total Cycles: 2701
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.47
-# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: uOps Per Cycle: 0.56
+# CHECK-NEXT: IPC: 0.37
# CHECK-NEXT: Block RThroughput: 12.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 01234
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
-# CHECK: [0,0] DeeeeeE . . . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,1] . .DeeE. . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . . DeeeeeE . . . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,3] . . . DeeE . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE. . . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . .DeeeeE . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . .DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeeE . . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . .DeeeeeE . . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . . DeeeeE . . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] . . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . . DeeeeE . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] . . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . . DeeeeE st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4880,28 +4880,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 2501
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 012
-# CHECK-NEXT: Index 0123456789 0123456789
-
-# CHECK: [0,0] DeeeeE . . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
-# CHECK-NEXT: [0,1] . DeeE . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
-# CHECK-NEXT: [0,3] . . .DeeE. . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
-# CHECK-NEXT: [0,5] . . . . DeeE . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . . . . . DeeeeE. . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
-# CHECK-NEXT: [0,9] . . . . . . DeeE add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
+
+# CHECK: [0,0] DeeeeE . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] . DeeE . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,3] . . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] . . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,8] . . . . DeeeeE st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] . . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4926,26 +4926,26 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
-# CHECK-NEXT: Total Cycles: 2403
+# CHECK-NEXT: Total Cycles: 2001
# CHECK-NEXT: Total uOps: 1200
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.40
# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0
-# CHECK: [0,0] DeeeeE . . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
-# CHECK-NEXT: [0,1] . DeeE . . . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] . .DeeeeE . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
-# CHECK-NEXT: [0,3] . . .DeeE. . .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . . . DeeeeE . .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
-# CHECK-NEXT: [0,5] . . . . DeeE .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . . . . DeeeeE .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
-# CHECK-NEXT: [0,7] . . . . . DeeE add x0, x27, #1
+# CHECK: [0,0] DeeeeE . . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] . DeeE . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] . DeeeeE . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . . DeeE . . add x0, x27, #1
+# CHECK-NEXT: [0,4] . . DeeeeE . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] . . . DeeE . add x0, x27, #1
+# CHECK-NEXT: [0,6] . . . DeeeeE st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] . . . . DeeE add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
More information about the llvm-commits
mailing list