[llvm] f73334c - [AArch64] Set the latency of Cortex-A55 stores to 1
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 12 05:41:26 PDT 2021
Author: David Green
Date: 2021-07-12T13:39:35+01:00
New Revision: f73334c46d59ffdbf12353932d5985049335a6c7
URL: https://github.com/llvm/llvm-project/commit/f73334c46d59ffdbf12353932d5985049335a6c7
DIFF: https://github.com/llvm/llvm-project/commit/f73334c46d59ffdbf12353932d5985049335a6c7.diff
LOG: [AArch64] Set the latency of Cortex-A55 stores to 1
This sets the latency of stores to 1 in the Cortex-A55 scheduling model,
to better match the values given in the software optimization guide.
The latency of a store in normal llvm scheduling does not appear to have
a lot of uses. If the store has no outputs then the latency is somewhat
meaningless (and pre/post increment update operands use the WriteAdr
write for those operands instead). The one place it does alter things is
the latency between a store and the end of the scheduling region, which
can in turn have an effect on the critical path length. As a result a
latency of 1 is more correct and offers ever-so-slightly better
scheduling of instructions near the end of the block.
They are marked as RetireOOO to keep the llvm-mca from introducing
stalls where non would exist.
Differential Revision: https://reviews.llvm.org/D105541
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SchedA55.td
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-alias.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
index 0015c27228f68..dff70f9345754 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -109,9 +109,11 @@ def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
def : WriteRes<WriteAdr, []> { let Latency = 0; }
// Store
-def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 4; }
-def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 4; }
-def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 4; }
+let RetireOOO = 1 in {
+def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 1; }
+}
def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
index f5ce26e43b6ac..760a1c9466e79 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
@@ -10,12 +10,12 @@ str w0, [x21, x18, lsl #2]
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
-# CHECK-NEXT: Total Cycles: 23
+# CHECK-NEXT: Total Cycles: 17
# CHECK-NEXT: Total uOps: 14
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.61
-# CHECK-NEXT: IPC: 0.52
+# CHECK-NEXT: uOps Per Cycle: 0.82
+# CHECK-NEXT: IPC: 0.71
# CHECK-NEXT: Block RThroughput: 3.5
# CHECK: Instruction Info:
@@ -32,27 +32,28 @@ str w0, [x21, x18, lsl #2]
# CHECK-NEXT: 1 4 1.00 madd w0, w5, w4, w0
# CHECK-NEXT: 1 3 0.50 add x3, x3, x13
# CHECK-NEXT: 1 3 0.50 subs x1, x1, #1
-# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
+# CHECK-NEXT: 1 1 1.00 * str w0, [x21, x18, lsl #2]
# CHECK: Dynamic Dispatch Stall Cycles:
-# CHECK-NEXT: RAT - Register unavailable: 8 (34.8%)
+# CHECK-NEXT: RAT - Register unavailable: 8 (47.1%)
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 13 (56.5%)
-# CHECK-NEXT: 1, 6 (26.1%)
-# CHECK-NEXT: 2, 4 (17.4%)
+# CHECK-NEXT: 0, 7 (41.2%)
+# CHECK-NEXT: 1, 6 (35.3%)
+# CHECK-NEXT: 2, 4 (23.5%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
-# CHECK-NEXT: 0, 13 (56.5%)
-# CHECK-NEXT: 1, 6 (26.1%)
-# CHECK-NEXT: 2, 4 (17.4%)
+# CHECK-NEXT: 0, 7 (41.2%)
+# CHECK-NEXT: 1, 6 (35.3%)
+# CHECK-NEXT: 2, 4 (23.5%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
index a8bddbbd4a3b6..86314c8f3f101 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
@@ -10,12 +10,12 @@ str w0, [x21, x18, lsl #2]
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
-# CHECK-NEXT: Total Cycles: 23
+# CHECK-NEXT: Total Cycles: 17
# CHECK-NEXT: Total uOps: 14
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.61
-# CHECK-NEXT: IPC: 0.52
+# CHECK-NEXT: uOps Per Cycle: 0.82
+# CHECK-NEXT: IPC: 0.71
# CHECK-NEXT: Block RThroughput: 3.5
# CHECK: Instruction Info:
@@ -32,27 +32,28 @@ str w0, [x21, x18, lsl #2]
# CHECK-NEXT: 1 4 1.00 madd w0, w5, w4, w0
# CHECK-NEXT: 1 3 0.50 add x3, x3, x13
# CHECK-NEXT: 1 3 0.50 subs x1, x1, #1
-# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
+# CHECK-NEXT: 1 1 1.00 * str w0, [x21, x18, lsl #2]
# CHECK: Dynamic Dispatch Stall Cycles:
-# CHECK-NEXT: RAT - Register unavailable: 8 (34.8%)
+# CHECK-NEXT: RAT - Register unavailable: 8 (47.1%)
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 13 (56.5%)
-# CHECK-NEXT: 1, 6 (26.1%)
-# CHECK-NEXT: 2, 4 (17.4%)
+# CHECK-NEXT: 0, 7 (41.2%)
+# CHECK-NEXT: 1, 6 (35.3%)
+# CHECK-NEXT: 2, 4 (23.5%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
-# CHECK-NEXT: 0, 13 (56.5%)
-# CHECK-NEXT: 1, 6 (26.1%)
-# CHECK-NEXT: 2, 4 (17.4%)
+# CHECK-NEXT: 0, 7 (41.2%)
+# CHECK-NEXT: 1, 6 (35.3%)
+# CHECK-NEXT: 2, 4 (23.5%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
@@ -89,21 +90,21 @@ str w0, [x21, x18, lsl #2]
# CHECK-NEXT: - - - - - - - - - - - 1.00 str w0, [x21, x18, lsl #2]
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012
-
-# CHECK: [0,0] DeeE . . . . . ldr w4, [x2], #4
-# CHECK-NEXT: [0,1] .DeeE. . . . . ldr w5, [x3]
-# CHECK-NEXT: [0,2] . DeeeE . . . . madd w0, w5, w4, w0
-# CHECK-NEXT: [0,3] . DeeE . . . . add x3, x3, x13
-# CHECK-NEXT: [0,4] . DeeE . . . . subs x1, x1, #1
-# CHECK-NEXT: [0,5] . . DeeeE . . . str w0, [x21, x18, lsl #2]
-# CHECK-NEXT: [1,0] . . .DeeE. . . ldr w4, [x2], #4
-# CHECK-NEXT: [1,1] . . . DeeE . . ldr w5, [x3]
-# CHECK-NEXT: [1,2] . . . DeeeE. . madd w0, w5, w4, w0
-# CHECK-NEXT: [1,3] . . . .DeeE. . add x3, x3, x13
-# CHECK-NEXT: [1,4] . . . .DeeE. . subs x1, x1, #1
-# CHECK-NEXT: [1,5] . . . . DeeeE str w0, [x21, x18, lsl #2]
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeE . . .. ldr w4, [x2], #4
+# CHECK-NEXT: [0,1] .DeeE. . .. ldr w5, [x3]
+# CHECK-NEXT: [0,2] . DeeeE . .. madd w0, w5, w4, w0
+# CHECK-NEXT: [0,3] . DeeE . .. add x3, x3, x13
+# CHECK-NEXT: [0,4] . DeeE . .. subs x1, x1, #1
+# CHECK-NEXT: [0,5] . . DE . .. str w0, [x21, x18, lsl #2]
+# CHECK-NEXT: [1,0] . . DeeE .. ldr w4, [x2], #4
+# CHECK-NEXT: [1,1] . . DeeE .. ldr w5, [x3]
+# CHECK-NEXT: [1,2] . . . DeeeE madd w0, w5, w4, w0
+# CHECK-NEXT: [1,3] . . . DeeE add x3, x3, x13
+# CHECK-NEXT: [1,4] . . . DeeE subs x1, x1, #1
+# CHECK-NEXT: [1,5] . . . DE str w0, [x21, x18, lsl #2]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s
index d4a5c78108f64..fb74c088437cf 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s
@@ -2107,52 +2107,52 @@ drps
# CHECK-NEXT: 1 3 1.00 * ldr q0, #-1048576
# CHECK-NEXT: 1 3 1.00 U prfm pldl1strm, #0
# CHECK-NEXT: 1 3 1.00 U prfm #22, #0
-# CHECK-NEXT: 2 7 1.00 * * U stxrb w18, w8, [sp]
-# CHECK-NEXT: 2 7 1.00 * * U stxrh w24, w15, [x16]
-# CHECK-NEXT: 2 7 1.00 * * U stxr w5, w6, [x17]
-# CHECK-NEXT: 2 7 1.00 * * U stxr w1, x10, [x21]
+# CHECK-NEXT: 2 4 1.00 * * U stxrb w18, w8, [sp]
+# CHECK-NEXT: 2 4 1.00 * * U stxrh w24, w15, [x16]
+# CHECK-NEXT: 2 4 1.00 * * U stxr w5, w6, [x17]
+# CHECK-NEXT: 2 4 1.00 * * U stxr w1, x10, [x21]
# CHECK-NEXT: 1 3 1.00 * * U ldxrb w30, [x0]
# CHECK-NEXT: 1 3 1.00 * * U ldxrh w17, [x4]
# CHECK-NEXT: 1 3 1.00 * * U ldxr w22, [sp]
# CHECK-NEXT: 1 3 1.00 * * U ldxr x11, [x29]
# CHECK-NEXT: 1 3 1.00 * * U ldxr x11, [x29]
# CHECK-NEXT: 1 3 1.00 * * U ldxr x11, [x29]
-# CHECK-NEXT: 2 7 1.00 * * U stxp w12, w11, w10, [sp]
-# CHECK-NEXT: 2 7 1.00 * * U stxp wzr, x27, x9, [x12]
+# CHECK-NEXT: 2 4 1.00 * * U stxp w12, w11, w10, [sp]
+# CHECK-NEXT: 2 4 1.00 * * U stxp wzr, x27, x9, [x12]
# CHECK-NEXT: 2 5 2.00 * * U ldxp w0, wzr, [sp]
# CHECK-NEXT: 2 5 2.00 * * U ldxp x17, x0, [x18]
# CHECK-NEXT: 2 5 2.00 * * U ldxp x17, x0, [x18]
-# CHECK-NEXT: 2 7 1.00 * * U stlxrb w12, w22, [x0]
-# CHECK-NEXT: 2 7 1.00 * * U stlxrh w10, w1, [x1]
-# CHECK-NEXT: 2 7 1.00 * * U stlxr w9, w2, [x2]
-# CHECK-NEXT: 2 7 1.00 * * U stlxr w9, x3, [sp]
+# CHECK-NEXT: 2 4 1.00 * * U stlxrb w12, w22, [x0]
+# CHECK-NEXT: 2 4 1.00 * * U stlxrh w10, w1, [x1]
+# CHECK-NEXT: 2 4 1.00 * * U stlxr w9, w2, [x2]
+# CHECK-NEXT: 2 4 1.00 * * U stlxr w9, x3, [sp]
# CHECK-NEXT: 1 3 1.00 * * U ldaxrb w8, [x4]
# CHECK-NEXT: 1 3 1.00 * * U ldaxrh w7, [x5]
# CHECK-NEXT: 1 3 1.00 * * U ldaxr w6, [sp]
# CHECK-NEXT: 1 3 1.00 * * U ldaxr x5, [x6]
# CHECK-NEXT: 1 3 1.00 * * U ldaxr x5, [x6]
# CHECK-NEXT: 1 3 1.00 * * U ldaxr x5, [x6]
-# CHECK-NEXT: 2 7 1.00 * * U stlxp w4, w5, w6, [sp]
-# CHECK-NEXT: 2 7 1.00 * * U stlxp wzr, x6, x7, [x1]
+# CHECK-NEXT: 2 4 1.00 * * U stlxp w4, w5, w6, [sp]
+# CHECK-NEXT: 2 4 1.00 * * U stlxp wzr, x6, x7, [x1]
# CHECK-NEXT: 2 5 2.00 * * U ldaxp w5, w18, [sp]
# CHECK-NEXT: 2 5 2.00 * * U ldaxp x6, x19, [x22]
# CHECK-NEXT: 2 5 2.00 * * U ldaxp x6, x19, [x22]
-# CHECK-NEXT: 1 4 1.00 * U stlrb w24, [sp]
-# CHECK-NEXT: 1 4 1.00 * U stlrh w25, [x30]
-# CHECK-NEXT: 1 4 1.00 * U stlr w26, [x29]
-# CHECK-NEXT: 1 4 1.00 * U stlr x27, [x28]
-# CHECK-NEXT: 1 4 1.00 * U stlr x27, [x28]
-# CHECK-NEXT: 1 4 1.00 * U stlr x27, [x28]
+# CHECK-NEXT: 1 1 1.00 * U stlrb w24, [sp]
+# CHECK-NEXT: 1 1 1.00 * U stlrh w25, [x30]
+# CHECK-NEXT: 1 1 1.00 * U stlr w26, [x29]
+# CHECK-NEXT: 1 1 1.00 * U stlr x27, [x28]
+# CHECK-NEXT: 1 1 1.00 * U stlr x27, [x28]
+# CHECK-NEXT: 1 1 1.00 * U stlr x27, [x28]
# CHECK-NEXT: 1 3 1.00 * U ldarb w23, [sp]
# CHECK-NEXT: 1 3 1.00 * U ldarh w22, [x30]
# CHECK-NEXT: 1 3 1.00 * U ldar wzr, [x29]
# CHECK-NEXT: 1 3 1.00 * U ldar x21, [x28]
# CHECK-NEXT: 1 3 1.00 * U ldar x21, [x28]
# CHECK-NEXT: 1 3 1.00 * U ldar x21, [x28]
-# CHECK-NEXT: 1 4 1.00 * sturb w9, [sp]
-# CHECK-NEXT: 1 4 1.00 * sturh wzr, [x12, #255]
-# CHECK-NEXT: 1 4 1.00 * stur w16, [x0, #-256]
-# CHECK-NEXT: 1 4 1.00 * stur x28, [x14, #1]
+# CHECK-NEXT: 1 1 1.00 * sturb w9, [sp]
+# CHECK-NEXT: 1 1 1.00 * sturh wzr, [x12, #255]
+# CHECK-NEXT: 1 1 1.00 * stur w16, [x0, #-256]
+# CHECK-NEXT: 1 1 1.00 * stur x28, [x14, #1]
# CHECK-NEXT: 1 3 1.00 * ldurb w1, [x20, #255]
# CHECK-NEXT: 1 3 1.00 * ldurh w20, [x1, #255]
# CHECK-NEXT: 1 3 1.00 * ldur w12, [sp, #255]
@@ -2163,28 +2163,28 @@ drps
# CHECK-NEXT: 1 3 1.00 U prfum pldl2keep, [sp, #-256]
# CHECK-NEXT: 1 3 1.00 * ldursb w19, [x1, #-256]
# CHECK-NEXT: 1 3 1.00 * ldursh w15, [x21, #-256]
-# CHECK-NEXT: 1 4 1.00 * stur b0, [sp, #1]
-# CHECK-NEXT: 1 4 1.00 * stur h12, [x12, #-1]
-# CHECK-NEXT: 1 4 1.00 * stur s15, [x0, #255]
-# CHECK-NEXT: 1 4 1.00 * stur d31, [x5, #25]
-# CHECK-NEXT: 1 4 1.00 * stur q9, [x5]
+# CHECK-NEXT: 1 1 1.00 * stur b0, [sp, #1]
+# CHECK-NEXT: 1 1 1.00 * stur h12, [x12, #-1]
+# CHECK-NEXT: 1 1 1.00 * stur s15, [x0, #255]
+# CHECK-NEXT: 1 1 1.00 * stur d31, [x5, #25]
+# CHECK-NEXT: 1 1 1.00 * stur q9, [x5]
# CHECK-NEXT: 1 3 1.00 * ldur b3, [sp]
# CHECK-NEXT: 1 3 1.00 * ldur h5, [x4, #-256]
# CHECK-NEXT: 1 3 1.00 * ldur s7, [x12, #-1]
# CHECK-NEXT: 1 3 1.00 * ldur d11, [x19, #4]
# CHECK-NEXT: 1 3 1.00 * ldur q13, [x1, #2]
-# CHECK-NEXT: 2 4 1.00 * strb w9, [x2], #255
-# CHECK-NEXT: 2 4 1.00 * strb w10, [x3], #1
-# CHECK-NEXT: 2 4 1.00 * strb w10, [x3], #-256
-# CHECK-NEXT: 2 4 1.00 * strh w9, [x2], #255
-# CHECK-NEXT: 2 4 1.00 * strh w9, [x2], #1
-# CHECK-NEXT: 2 4 1.00 * strh w10, [x3], #-256
-# CHECK-NEXT: 2 4 1.00 * str w19, [sp], #255
-# CHECK-NEXT: 2 4 1.00 * str w20, [x30], #1
-# CHECK-NEXT: 2 4 1.00 * str w21, [x12], #-256
-# CHECK-NEXT: 2 4 1.00 * str xzr, [x9], #255
-# CHECK-NEXT: 2 4 1.00 * str x2, [x3], #1
-# CHECK-NEXT: 2 4 1.00 * str x19, [x12], #-256
+# CHECK-NEXT: 2 1 1.00 * strb w9, [x2], #255
+# CHECK-NEXT: 2 1 1.00 * strb w10, [x3], #1
+# CHECK-NEXT: 2 1 1.00 * strb w10, [x3], #-256
+# CHECK-NEXT: 2 1 1.00 * strh w9, [x2], #255
+# CHECK-NEXT: 2 1 1.00 * strh w9, [x2], #1
+# CHECK-NEXT: 2 1 1.00 * strh w10, [x3], #-256
+# CHECK-NEXT: 2 1 1.00 * str w19, [sp], #255
+# CHECK-NEXT: 2 1 1.00 * str w20, [x30], #1
+# CHECK-NEXT: 2 1 1.00 * str w21, [x12], #-256
+# CHECK-NEXT: 2 1 1.00 * str xzr, [x9], #255
+# CHECK-NEXT: 2 1 1.00 * str x2, [x3], #1
+# CHECK-NEXT: 2 1 1.00 * str x19, [x12], #-256
# CHECK-NEXT: 2 3 1.00 * ldrb w9, [x2], #255
# CHECK-NEXT: 2 3 1.00 * ldrb w10, [x3], #1
# CHECK-NEXT: 2 3 1.00 * ldrb w10, [x3], #-256
@@ -2212,18 +2212,18 @@ drps
# CHECK-NEXT: 2 3 1.00 * ldrsh wzr, [x9], #255
# CHECK-NEXT: 2 3 1.00 * ldrsh w2, [x3], #1
# CHECK-NEXT: 2 3 1.00 * ldrsh w19, [x12], #-256
-# CHECK-NEXT: 2 4 1.00 * str b0, [x0], #255
-# CHECK-NEXT: 2 4 1.00 * str b3, [x3], #1
-# CHECK-NEXT: 2 4 1.00 * str b5, [sp], #-256
-# CHECK-NEXT: 2 4 1.00 * str h10, [x10], #255
-# CHECK-NEXT: 2 4 1.00 * str h13, [x23], #1
-# CHECK-NEXT: 2 4 1.00 * str h15, [sp], #-256
-# CHECK-NEXT: 2 4 1.00 * str s20, [x20], #255
-# CHECK-NEXT: 2 4 1.00 * str s23, [x23], #1
-# CHECK-NEXT: 2 4 1.00 * str s25, [x0], #-256
-# CHECK-NEXT: 2 4 1.00 * str d20, [x20], #255
-# CHECK-NEXT: 2 4 1.00 * str d23, [x23], #1
-# CHECK-NEXT: 2 4 1.00 * str d25, [x0], #-256
+# CHECK-NEXT: 2 1 1.00 * str b0, [x0], #255
+# CHECK-NEXT: 2 1 1.00 * str b3, [x3], #1
+# CHECK-NEXT: 2 1 1.00 * str b5, [sp], #-256
+# CHECK-NEXT: 2 1 1.00 * str h10, [x10], #255
+# CHECK-NEXT: 2 1 1.00 * str h13, [x23], #1
+# CHECK-NEXT: 2 1 1.00 * str h15, [sp], #-256
+# CHECK-NEXT: 2 1 1.00 * str s20, [x20], #255
+# CHECK-NEXT: 2 1 1.00 * str s23, [x23], #1
+# CHECK-NEXT: 2 1 1.00 * str s25, [x0], #-256
+# CHECK-NEXT: 2 1 1.00 * str d20, [x20], #255
+# CHECK-NEXT: 2 1 1.00 * str d23, [x23], #1
+# CHECK-NEXT: 2 1 1.00 * str d25, [x0], #-256
# CHECK-NEXT: 2 3 1.00 * ldr b0, [x0], #255
# CHECK-NEXT: 2 3 1.00 * ldr b3, [x3], #1
# CHECK-NEXT: 2 3 1.00 * ldr b5, [sp], #-256
@@ -2239,22 +2239,22 @@ drps
# CHECK-NEXT: 2 3 1.00 * ldr q20, [x1], #255
# CHECK-NEXT: 2 3 1.00 * ldr q23, [x9], #1
# CHECK-NEXT: 2 3 1.00 * ldr q25, [x20], #-256
-# CHECK-NEXT: 2 4 1.00 * str q10, [x1], #255
-# CHECK-NEXT: 2 4 1.00 * str q22, [sp], #1
-# CHECK-NEXT: 2 4 1.00 * str q21, [x20], #-256
+# CHECK-NEXT: 2 1 1.00 * str q10, [x1], #255
+# CHECK-NEXT: 2 1 1.00 * str q22, [sp], #1
+# CHECK-NEXT: 2 1 1.00 * str q21, [x20], #-256
# CHECK-NEXT: 2 3 1.00 * ldr x3, [x4, #0]!
-# CHECK-NEXT: 2 4 1.00 * strb w9, [x2, #255]!
-# CHECK-NEXT: 2 4 1.00 * strb w10, [x3, #1]!
-# CHECK-NEXT: 2 4 1.00 * strb w10, [x3, #-256]!
-# CHECK-NEXT: 2 4 1.00 * strh w9, [x2, #255]!
-# CHECK-NEXT: 2 4 1.00 * strh w9, [x2, #1]!
-# CHECK-NEXT: 2 4 1.00 * strh w10, [x3, #-256]!
-# CHECK-NEXT: 2 4 1.00 * str w19, [sp, #255]!
-# CHECK-NEXT: 2 4 1.00 * str w20, [x30, #1]!
-# CHECK-NEXT: 2 4 1.00 * str w21, [x12, #-256]!
-# CHECK-NEXT: 2 4 1.00 * str xzr, [x9, #255]!
-# CHECK-NEXT: 2 4 1.00 * str x2, [x3, #1]!
-# CHECK-NEXT: 2 4 1.00 * str x19, [x12, #-256]!
+# CHECK-NEXT: 2 1 1.00 * strb w9, [x2, #255]!
+# CHECK-NEXT: 2 1 1.00 * strb w10, [x3, #1]!
+# CHECK-NEXT: 2 1 1.00 * strb w10, [x3, #-256]!
+# CHECK-NEXT: 2 1 1.00 * strh w9, [x2, #255]!
+# CHECK-NEXT: 2 1 1.00 * strh w9, [x2, #1]!
+# CHECK-NEXT: 2 1 1.00 * strh w10, [x3, #-256]!
+# CHECK-NEXT: 2 1 1.00 * str w19, [sp, #255]!
+# CHECK-NEXT: 2 1 1.00 * str w20, [x30, #1]!
+# CHECK-NEXT: 2 1 1.00 * str w21, [x12, #-256]!
+# CHECK-NEXT: 2 1 1.00 * str xzr, [x9, #255]!
+# CHECK-NEXT: 2 1 1.00 * str x2, [x3, #1]!
+# CHECK-NEXT: 2 1 1.00 * str x19, [x12, #-256]!
# CHECK-NEXT: 2 3 1.00 * ldrb w9, [x2, #255]!
# CHECK-NEXT: 2 3 1.00 * ldrb w10, [x3, #1]!
# CHECK-NEXT: 2 3 1.00 * ldrb w10, [x3, #-256]!
@@ -2282,18 +2282,18 @@ drps
# CHECK-NEXT: 2 3 1.00 * ldrsh wzr, [x9, #255]!
# CHECK-NEXT: 2 3 1.00 * ldrsh w2, [x3, #1]!
# CHECK-NEXT: 2 3 1.00 * ldrsh w19, [x12, #-256]!
-# CHECK-NEXT: 2 4 1.00 * str b0, [x0, #255]!
-# CHECK-NEXT: 2 4 1.00 * str b3, [x3, #1]!
-# CHECK-NEXT: 2 4 1.00 * str b5, [sp, #-256]!
-# CHECK-NEXT: 2 4 1.00 * str h10, [x10, #255]!
-# CHECK-NEXT: 2 4 1.00 * str h13, [x23, #1]!
-# CHECK-NEXT: 2 4 1.00 * str h15, [sp, #-256]!
-# CHECK-NEXT: 2 4 1.00 * str s20, [x20, #255]!
-# CHECK-NEXT: 2 4 1.00 * str s23, [x23, #1]!
-# CHECK-NEXT: 2 4 1.00 * str s25, [x0, #-256]!
-# CHECK-NEXT: 2 4 1.00 * str d20, [x20, #255]!
-# CHECK-NEXT: 2 4 1.00 * str d23, [x23, #1]!
-# CHECK-NEXT: 2 4 1.00 * str d25, [x0, #-256]!
+# CHECK-NEXT: 2 1 1.00 * str b0, [x0, #255]!
+# CHECK-NEXT: 2 1 1.00 * str b3, [x3, #1]!
+# CHECK-NEXT: 2 1 1.00 * str b5, [sp, #-256]!
+# CHECK-NEXT: 2 1 1.00 * str h10, [x10, #255]!
+# CHECK-NEXT: 2 1 1.00 * str h13, [x23, #1]!
+# CHECK-NEXT: 2 1 1.00 * str h15, [sp, #-256]!
+# CHECK-NEXT: 2 1 1.00 * str s20, [x20, #255]!
+# CHECK-NEXT: 2 1 1.00 * str s23, [x23, #1]!
+# CHECK-NEXT: 2 1 1.00 * str s25, [x0, #-256]!
+# CHECK-NEXT: 2 1 1.00 * str d20, [x20, #255]!
+# CHECK-NEXT: 2 1 1.00 * str d23, [x23, #1]!
+# CHECK-NEXT: 2 1 1.00 * str d25, [x0, #-256]!
# CHECK-NEXT: 2 3 1.00 * ldr b0, [x0, #255]!
# CHECK-NEXT: 2 3 1.00 * ldr b3, [x3, #1]!
# CHECK-NEXT: 2 3 1.00 * ldr b5, [sp, #-256]!
@@ -2309,13 +2309,13 @@ drps
# CHECK-NEXT: 2 3 1.00 * ldr q20, [x1, #255]!
# CHECK-NEXT: 2 3 1.00 * ldr q23, [x9, #1]!
# CHECK-NEXT: 2 3 1.00 * ldr q25, [x20, #-256]!
-# CHECK-NEXT: 2 4 1.00 * str q10, [x1, #255]!
-# CHECK-NEXT: 2 4 1.00 * str q22, [sp, #1]!
-# CHECK-NEXT: 2 4 1.00 * str q21, [x20, #-256]!
-# CHECK-NEXT: 1 4 1.00 * sttrb w9, [sp]
-# CHECK-NEXT: 1 4 1.00 * sttrh wzr, [x12, #255]
-# CHECK-NEXT: 1 4 1.00 * sttr w16, [x0, #-256]
-# CHECK-NEXT: 1 4 1.00 * sttr x28, [x14, #1]
+# CHECK-NEXT: 2 1 1.00 * str q10, [x1, #255]!
+# CHECK-NEXT: 2 1 1.00 * str q22, [sp, #1]!
+# CHECK-NEXT: 2 1 1.00 * str q21, [x20, #-256]!
+# CHECK-NEXT: 1 1 1.00 * sttrb w9, [sp]
+# CHECK-NEXT: 1 1 1.00 * sttrh wzr, [x12, #255]
+# CHECK-NEXT: 1 1 1.00 * sttr w16, [x0, #-256]
+# CHECK-NEXT: 1 1 1.00 * sttr x28, [x14, #1]
# CHECK-NEXT: 1 3 1.00 * ldtrb w1, [x20, #255]
# CHECK-NEXT: 1 3 1.00 * ldtrh w20, [x1, #255]
# CHECK-NEXT: 1 3 1.00 * ldtr w12, [sp, #255]
@@ -2342,21 +2342,21 @@ drps
# CHECK-NEXT: 1 3 1.00 * ldrb w12, [x2]
# CHECK-NEXT: 1 3 1.00 * ldrsb w27, [sp, #4095]
# CHECK-NEXT: 1 3 1.00 * ldrsb xzr, [x15]
-# CHECK-NEXT: 1 4 1.00 * str x30, [sp]
-# CHECK-NEXT: 1 4 1.00 * str w20, [x4, #16380]
-# CHECK-NEXT: 1 4 1.00 * strh w17, [sp, #8190]
-# CHECK-NEXT: 1 4 1.00 * strb w23, [x3, #4095]
-# CHECK-NEXT: 1 4 1.00 * strb wzr, [x2]
+# CHECK-NEXT: 1 1 1.00 * str x30, [sp]
+# CHECK-NEXT: 1 1 1.00 * str w20, [x4, #16380]
+# CHECK-NEXT: 1 1 1.00 * strh w17, [sp, #8190]
+# CHECK-NEXT: 1 1 1.00 * strb w23, [x3, #4095]
+# CHECK-NEXT: 1 1 1.00 * strb wzr, [x2]
# CHECK-NEXT: 1 3 1.00 * ldr b31, [sp, #4095]
# CHECK-NEXT: 1 3 1.00 * ldr h20, [x2, #8190]
# CHECK-NEXT: 1 3 1.00 * ldr s10, [x19, #16380]
# CHECK-NEXT: 1 3 1.00 * ldr d3, [x10, #32760]
-# CHECK-NEXT: 1 4 1.00 * str q12, [sp, #65520]
+# CHECK-NEXT: 1 1 1.00 * str q12, [sp, #65520]
# CHECK-NEXT: 1 4 1.00 * ldrb w3, [sp, x5]
# CHECK-NEXT: 1 4 1.00 * ldrb w9, [x27, x6]
# CHECK-NEXT: 1 4 1.00 * ldrsb w10, [x30, x7]
# CHECK-NEXT: 1 4 1.00 * ldrb w11, [x29, x3, sxtx]
-# CHECK-NEXT: 1 4 1.00 * strb w12, [x28, xzr, sxtx]
+# CHECK-NEXT: 1 1 1.00 * strb w12, [x28, xzr, sxtx]
# CHECK-NEXT: 1 4 1.00 * ldrb w14, [x26, w6, uxtw]
# CHECK-NEXT: 1 4 1.00 * ldrsb w15, [x25, w7, uxtw]
# CHECK-NEXT: 1 4 1.00 * ldrb w17, [x23, w9, sxtw]
@@ -2364,7 +2364,7 @@ drps
# CHECK-NEXT: 1 4 1.00 * ldrsh w3, [sp, x5]
# CHECK-NEXT: 1 4 1.00 * ldrsh w9, [x27, x6]
# CHECK-NEXT: 1 4 1.00 * ldrh w10, [x30, x7, lsl #1]
-# CHECK-NEXT: 1 4 1.00 * strh w11, [x29, x3, sxtx]
+# CHECK-NEXT: 1 1 1.00 * strh w11, [x29, x3, sxtx]
# CHECK-NEXT: 1 4 1.00 * ldrh w12, [x28, xzr, sxtx]
# CHECK-NEXT: 1 4 1.00 * ldrsh x13, [x27, x5, sxtx #1]
# CHECK-NEXT: 1 4 1.00 * ldrh w14, [x26, w6, uxtw]
@@ -2372,23 +2372,23 @@ drps
# CHECK-NEXT: 1 4 1.00 * ldrsh w16, [x24, w8, uxtw #1]
# CHECK-NEXT: 1 4 1.00 * ldrh w17, [x23, w9, sxtw]
# CHECK-NEXT: 1 4 1.00 * ldrh w18, [x22, w10, sxtw]
-# CHECK-NEXT: 1 4 1.00 * strh w19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: 1 1 1.00 * strh w19, [x21, wzr, sxtw #1]
# CHECK-NEXT: 1 4 1.00 * ldr w3, [sp, x5]
# CHECK-NEXT: 1 4 1.00 * ldr s9, [x27, x6]
# CHECK-NEXT: 1 4 1.00 * ldr w10, [x30, x7, lsl #2]
# CHECK-NEXT: 1 4 1.00 * ldr w11, [x29, x3, sxtx]
-# CHECK-NEXT: 1 4 1.00 * str s12, [x28, xzr, sxtx]
-# CHECK-NEXT: 1 4 1.00 * str w13, [x27, x5, sxtx #2]
-# CHECK-NEXT: 1 4 1.00 * str w14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 1 1.00 * str s12, [x28, xzr, sxtx]
+# CHECK-NEXT: 1 1 1.00 * str w13, [x27, x5, sxtx #2]
+# CHECK-NEXT: 1 1 1.00 * str w14, [x26, w6, uxtw]
# CHECK-NEXT: 1 4 1.00 * ldr w15, [x25, w7, uxtw]
# CHECK-NEXT: 1 4 1.00 * ldr w16, [x24, w8, uxtw #2]
# CHECK-NEXT: 1 4 1.00 * ldrsw x17, [x23, w9, sxtw]
# CHECK-NEXT: 1 4 1.00 * ldr w18, [x22, w10, sxtw]
# CHECK-NEXT: 1 4 1.00 * ldrsw x19, [x21, wzr, sxtw #2]
# CHECK-NEXT: 1 4 1.00 * ldr x3, [sp, x5]
-# CHECK-NEXT: 1 4 1.00 * str x9, [x27, x6]
+# CHECK-NEXT: 1 1 1.00 * str x9, [x27, x6]
# CHECK-NEXT: 1 4 1.00 * ldr d10, [x30, x7, lsl #3]
-# CHECK-NEXT: 1 4 1.00 * str x11, [x29, x3, sxtx]
+# CHECK-NEXT: 1 1 1.00 * str x11, [x29, x3, sxtx]
# CHECK-NEXT: 1 4 1.00 * ldr x12, [x28, xzr, sxtx]
# CHECK-NEXT: 1 4 1.00 * ldr x13, [x27, x5, sxtx #3]
# CHECK-NEXT: 1 3 1.00 U prfm pldl1keep, [x26, w6, uxtw]
@@ -2396,21 +2396,21 @@ drps
# CHECK-NEXT: 1 4 1.00 * ldr x16, [x24, w8, uxtw #3]
# CHECK-NEXT: 1 4 1.00 * ldr x17, [x23, w9, sxtw]
# CHECK-NEXT: 1 4 1.00 * ldr x18, [x22, w10, sxtw]
-# CHECK-NEXT: 1 4 1.00 * str d19, [x21, wzr, sxtw #3]
+# CHECK-NEXT: 1 1 1.00 * str d19, [x21, wzr, sxtw #3]
# CHECK-NEXT: 1 4 1.00 * ldr q3, [sp, x5]
# CHECK-NEXT: 1 4 1.00 * ldr q9, [x27, x6]
# CHECK-NEXT: 1 4 1.00 * ldr q10, [x30, x7, lsl #4]
-# CHECK-NEXT: 1 4 1.00 * str q11, [x29, x3, sxtx]
-# CHECK-NEXT: 1 4 1.00 * str q12, [x28, xzr, sxtx]
-# CHECK-NEXT: 1 4 1.00 * str q13, [x27, x5, sxtx #4]
+# CHECK-NEXT: 1 1 1.00 * str q11, [x29, x3, sxtx]
+# CHECK-NEXT: 1 1 1.00 * str q12, [x28, xzr, sxtx]
+# CHECK-NEXT: 1 1 1.00 * str q13, [x27, x5, sxtx #4]
# CHECK-NEXT: 1 4 1.00 * ldr q14, [x26, w6, uxtw]
# CHECK-NEXT: 1 4 1.00 * ldr q15, [x25, w7, uxtw]
# CHECK-NEXT: 1 4 1.00 * ldr q16, [x24, w8, uxtw #4]
# CHECK-NEXT: 1 4 1.00 * ldr q17, [x23, w9, sxtw]
-# CHECK-NEXT: 1 4 1.00 * str q18, [x22, w10, sxtw]
+# CHECK-NEXT: 1 1 1.00 * str q18, [x22, w10, sxtw]
# CHECK-NEXT: 1 4 1.00 * ldr q19, [x21, wzr, sxtw #4]
# CHECK-NEXT: 2 5 3.00 * ldp w3, w5, [sp]
-# CHECK-NEXT: 1 4 1.00 * stp wzr, w9, [sp, #252]
+# CHECK-NEXT: 1 1 1.00 * stp wzr, w9, [sp, #252]
# CHECK-NEXT: 2 5 3.00 * ldp w2, wzr, [sp, #-256]
# CHECK-NEXT: 2 5 3.00 * ldp w9, w10, [sp, #4]
# CHECK-NEXT: 2 5 3.00 * ldpsw x9, x10, [sp, #4]
@@ -2420,16 +2420,16 @@ drps
# CHECK-NEXT: 2 5 3.00 * ldp x22, x23, [x3, #-512]
# CHECK-NEXT: 2 5 3.00 * ldp x24, x25, [x4, #8]
# CHECK-NEXT: 2 5 3.00 * ldp s29, s28, [sp, #252]
-# CHECK-NEXT: 1 4 1.00 * stp s27, s26, [sp, #-256]
+# CHECK-NEXT: 1 1 1.00 * stp s27, s26, [sp, #-256]
# CHECK-NEXT: 2 5 3.00 * ldp s1, s2, [x3, #44]
-# CHECK-NEXT: 1 4 1.00 * stp d3, d5, [x9, #504]
-# CHECK-NEXT: 1 4 1.00 * stp d7, d11, [x10, #-512]
+# CHECK-NEXT: 1 1 1.00 * stp d3, d5, [x9, #504]
+# CHECK-NEXT: 1 1 1.00 * stp d7, d11, [x10, #-512]
# CHECK-NEXT: 2 5 3.00 * ldp d2, d3, [x30, #-8]
-# CHECK-NEXT: 1 4 1.00 * stp q3, q5, [sp]
-# CHECK-NEXT: 1 4 1.00 * stp q17, q19, [sp, #1008]
+# CHECK-NEXT: 1 1 1.00 * stp q3, q5, [sp]
+# CHECK-NEXT: 1 1 1.00 * stp q17, q19, [sp, #1008]
# CHECK-NEXT: 2 5 3.00 * ldp q23, q29, [x1, #-1024]
# CHECK-NEXT: 2 5 3.00 * ldp w3, w5, [sp], #0
-# CHECK-NEXT: 2 4 1.00 * stp wzr, w9, [sp], #252
+# CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp], #252
# CHECK-NEXT: 2 5 3.00 * ldp w2, wzr, [sp], #-256
# CHECK-NEXT: 2 5 3.00 * ldp w9, w10, [sp], #4
# CHECK-NEXT: 2 5 3.00 * ldpsw x9, x10, [sp], #4
@@ -2439,16 +2439,16 @@ drps
# CHECK-NEXT: 2 5 3.00 * ldp x22, x23, [x3], #-512
# CHECK-NEXT: 2 5 3.00 * ldp x24, x25, [x4], #8
# CHECK-NEXT: 2 5 3.00 * ldp s29, s28, [sp], #252
-# CHECK-NEXT: 2 4 1.00 * stp s27, s26, [sp], #-256
+# CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp], #-256
# CHECK-NEXT: 2 5 3.00 * ldp s1, s2, [x3], #44
-# CHECK-NEXT: 2 4 1.00 * stp d3, d5, [x9], #504
-# CHECK-NEXT: 2 4 1.00 * stp d7, d11, [x10], #-512
+# CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9], #504
+# CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10], #-512
# CHECK-NEXT: 2 5 3.00 * ldp d2, d3, [x30], #-8
-# CHECK-NEXT: 2 4 1.00 * stp q3, q5, [sp], #0
-# CHECK-NEXT: 2 4 1.00 * stp q17, q19, [sp], #1008
+# CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp], #0
+# CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp], #1008
# CHECK-NEXT: 2 5 3.00 * ldp q23, q29, [x1], #-1024
# CHECK-NEXT: 2 5 3.00 * ldp w3, w5, [sp, #0]!
-# CHECK-NEXT: 2 4 1.00 * stp wzr, w9, [sp, #252]!
+# CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp, #252]!
# CHECK-NEXT: 2 5 3.00 * ldp w2, wzr, [sp, #-256]!
# CHECK-NEXT: 2 5 3.00 * ldp w9, w10, [sp, #4]!
# CHECK-NEXT: 2 5 3.00 * ldpsw x9, x10, [sp, #4]!
@@ -2458,29 +2458,29 @@ drps
# CHECK-NEXT: 2 5 3.00 * ldp x22, x23, [x3, #-512]!
# CHECK-NEXT: 2 5 3.00 * ldp x24, x25, [x4, #8]!
# CHECK-NEXT: 2 5 3.00 * ldp s29, s28, [sp, #252]!
-# CHECK-NEXT: 2 4 1.00 * stp s27, s26, [sp, #-256]!
+# CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp, #-256]!
# CHECK-NEXT: 2 5 3.00 * ldp s1, s2, [x3, #44]!
-# CHECK-NEXT: 2 4 1.00 * stp d3, d5, [x9, #504]!
-# CHECK-NEXT: 2 4 1.00 * stp d7, d11, [x10, #-512]!
+# CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9, #504]!
+# CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10, #-512]!
# CHECK-NEXT: 2 5 3.00 * ldp d2, d3, [x30, #-8]!
-# CHECK-NEXT: 2 4 1.00 * stp q3, q5, [sp, #0]!
-# CHECK-NEXT: 2 4 1.00 * stp q17, q19, [sp, #1008]!
+# CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp, #0]!
+# CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp, #1008]!
# CHECK-NEXT: 2 5 3.00 * ldp q23, q29, [x1, #-1024]!
# CHECK-NEXT: 2 5 2.00 * ldnp w3, w5, [sp]
-# CHECK-NEXT: 1 4 1.00 * stnp wzr, w9, [sp, #252]
+# CHECK-NEXT: 1 1 1.00 * stnp wzr, w9, [sp, #252]
# CHECK-NEXT: 2 5 2.00 * ldnp w2, wzr, [sp, #-256]
# CHECK-NEXT: 2 5 2.00 * ldnp w9, w10, [sp, #4]
# CHECK-NEXT: 2 5 2.00 * ldnp x21, x29, [x2, #504]
# CHECK-NEXT: 2 5 2.00 * ldnp x22, x23, [x3, #-512]
# CHECK-NEXT: 2 5 2.00 * ldnp x24, x25, [x4, #8]
# CHECK-NEXT: 2 5 2.00 * ldnp s29, s28, [sp, #252]
-# CHECK-NEXT: 1 4 1.00 * stnp s27, s26, [sp, #-256]
+# CHECK-NEXT: 1 1 1.00 * stnp s27, s26, [sp, #-256]
# CHECK-NEXT: 2 5 2.00 * ldnp s1, s2, [x3, #44]
-# CHECK-NEXT: 1 4 1.00 * stnp d3, d5, [x9, #504]
-# CHECK-NEXT: 1 4 1.00 * stnp d7, d11, [x10, #-512]
+# CHECK-NEXT: 1 1 1.00 * stnp d3, d5, [x9, #504]
+# CHECK-NEXT: 1 1 1.00 * stnp d7, d11, [x10, #-512]
# CHECK-NEXT: 2 5 2.00 * ldnp d2, d3, [x30, #-8]
-# CHECK-NEXT: 1 4 1.00 * stnp q3, q5, [sp]
-# CHECK-NEXT: 1 4 1.00 * stnp q17, q19, [sp, #1008]
+# CHECK-NEXT: 1 1 1.00 * stnp q3, q5, [sp]
+# CHECK-NEXT: 1 1 1.00 * stnp q17, q19, [sp, #1008]
# CHECK-NEXT: 2 5 2.00 * ldnp q23, q29, [x1, #-1024]
# CHECK-NEXT: 1 3 0.50 mov w3, #983055
# CHECK-NEXT: 1 3 0.50 mov x10, #-6148914691236517206
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
index c353324205490..8502d0950fec1 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
@@ -41,6 +41,7 @@ add w7, w9, w0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 1 (5.3%)
+# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-alias.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-alias.s
index 207b0358cfb6c..d9a5b2da9c937 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-alias.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-alias.s
@@ -8,12 +8,12 @@ ldr x2, [x4]
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
-# CHECK-NEXT: Total Cycles: 9
+# CHECK-NEXT: Total Cycles: 8
# CHECK-NEXT: Total uOps: 10
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.11
-# CHECK-NEXT: IPC: 1.11
+# CHECK-NEXT: uOps Per Cycle: 1.25
+# CHECK-NEXT: IPC: 1.25
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
@@ -25,7 +25,7 @@ ldr x2, [x4]
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 4 1.00 * str x1, [x4]
+# CHECK-NEXT: 1 1 1.00 * str x1, [x4]
# CHECK-NEXT: 1 3 1.00 * ldr x2, [x4]
# CHECK: Resources:
@@ -52,18 +52,18 @@ ldr x2, [x4]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldr x2, [x4]
# CHECK: Timeline view:
-# CHECK-NEXT: Index 012345678
+# CHECK-NEXT: Index 01234567
-# CHECK: [0,0] DeeeE. . str x1, [x4]
-# CHECK-NEXT: [0,1] .DeeE. . ldr x2, [x4]
-# CHECK-NEXT: [1,0] .DeeeE . str x1, [x4]
-# CHECK-NEXT: [1,1] . DeeE . ldr x2, [x4]
-# CHECK-NEXT: [2,0] . DeeeE . str x1, [x4]
-# CHECK-NEXT: [2,1] . DeeE . ldr x2, [x4]
-# CHECK-NEXT: [3,0] . DeeeE. str x1, [x4]
-# CHECK-NEXT: [3,1] . DeeE. ldr x2, [x4]
-# CHECK-NEXT: [4,0] . DeeeE str x1, [x4]
-# CHECK-NEXT: [4,1] . DeeE ldr x2, [x4]
+# CHECK: [0,0] DE . . str x1, [x4]
+# CHECK-NEXT: [0,1] DeeE . . ldr x2, [x4]
+# CHECK-NEXT: [1,0] .DE . . str x1, [x4]
+# CHECK-NEXT: [1,1] .DeeE. . ldr x2, [x4]
+# CHECK-NEXT: [2,0] . DE . . str x1, [x4]
+# CHECK-NEXT: [2,1] . DeeE . ldr x2, [x4]
+# CHECK-NEXT: [3,0] . DE. . str x1, [x4]
+# CHECK-NEXT: [3,1] . DeeE. ldr x2, [x4]
+# CHECK-NEXT: [4,0] . DE . str x1, [x4]
+# CHECK-NEXT: [4,1] . DeeE ldr x2, [x4]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
index 8935d254cd98d..7c51ceef1489d 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
@@ -41,6 +41,7 @@ add w7, w9, w0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 7 (29.2%)
+# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
More information about the llvm-commits
mailing list