[llvm] [AArch64] C1-Ultra Scheduling model (PR #182251)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 20 07:54:17 PDT 2026
================
@@ -0,0 +1,3167 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=c1-ultra -mattr=+aes -instruction-tables < %p/../Inputs/neon-instructions.s | FileCheck %s
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 2 0.17 abs d29, d24
+# CHECK-NEXT: 1 2 0.17 abs v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.17 abs v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 abs v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 abs v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.17 abs v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 abs v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.17 abs v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 add d17, d31, d29
+# CHECK-NEXT: 1 2 0.17 add v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.17 addhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 addhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 addhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 addhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 addhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 addhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 addp v7.2s, v1.2s, v2.2s
+# CHECK-NEXT: 1 2 0.17 addp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 addp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.17 addp d1, v14.2d
+# CHECK-NEXT: 1 2 0.25 addv s0, v0.4s
+# CHECK-NEXT: 1 2 0.25 addv h0, v0.4h
+# CHECK-NEXT: 1 4 0.33 addv h0, v0.8h
+# CHECK-NEXT: 1 4 0.33 addv b0, v0.8b
+# CHECK-NEXT: 1 4 0.50 addv b0, v0.16b
+# CHECK-NEXT: 1 2 0.25 aesd v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 aese v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 aesimc v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 aesmc v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.17 and v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.17 bic v0.4h, #15, lsl #8
+# CHECK-NEXT: 1 2 0.17 bic v23.8h, #101
+# CHECK-NEXT: 1 2 0.17 bic v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.17 bic v25.16b, v10.16b, v9.16b
+# CHECK-NEXT: 1 2 0.17 bic v24.2s, #70
+# CHECK-NEXT: 1 2 0.17 bit v5.8b, v12.8b, v22.8b
+# CHECK-NEXT: 1 2 0.17 bif v0.8b, v25.8b, v4.8b
+# CHECK-NEXT: 1 2 0.17 bif v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.17 bit v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.17 bsl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.17 bsl v27.16b, v13.16b, v21.16b
+# CHECK-NEXT: 1 2 0.17 cls v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.17 cls v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 cls v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.17 cls v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 cls v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.17 cls v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 clz v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.17 clz v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 clz v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.17 clz v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 clz v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.17 clz v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 cmeq v9.8h, v16.8h, v24.8h
+# CHECK-NEXT: 1 2 0.17 cmeq v14.4h, v18.4h, #0
+# CHECK-NEXT: 1 2 0.17 cmeq d20, d21, #0
+# CHECK-NEXT: 1 2 0.17 cmeq d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 cmeq v0.16b, v0.16b, #0
+# CHECK-NEXT: 1 2 0.17 cmeq v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.17 cmge v22.8h, v16.8h, v3.8h
+# CHECK-NEXT: 1 2 0.17 cmge v22.16b, v30.16b, #0
+# CHECK-NEXT: 1 2 0.17 cmge d20, d21, #0
+# CHECK-NEXT: 1 2 0.17 cmge d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 cmge v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.17 cmge v0.8b, v0.8b, #0
+# CHECK-NEXT: 1 2 0.17 cmgt v3.2d, v29.2d, v11.2d
+# CHECK-NEXT: 1 2 0.17 cmgt d20, d21, #0
+# CHECK-NEXT: 1 2 0.17 cmgt d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 cmgt v0.2s, v0.2s, #0
+# CHECK-NEXT: 1 2 0.17 cmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 cmhi v28.4h, v25.4h, v21.4h
+# CHECK-NEXT: 1 2 0.17 cmhi d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 cmhi v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 cmhs d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 cmhs v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.17 cmle v21.2s, v19.2s, #0
+# CHECK-NEXT: 1 2 0.17 cmle d20, d21, #0
+# CHECK-NEXT: 1 2 0.17 cmle v0.2d, v0.2d, #0
+# CHECK-NEXT: 1 2 0.17 cmlt v26.4h, v12.4h, #0
+# CHECK-NEXT: 1 2 0.17 cmlt d20, d21, #0
+# CHECK-NEXT: 1 2 0.17 cmlt v0.8h, v0.8h, #0
+# CHECK-NEXT: 1 2 0.17 cmtst d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 cmtst v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 cmtst v13.2d, v13.2d, v13.2d
+# CHECK-NEXT: 1 2 0.17 cnt v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.17 cnt v0.8b, v0.8b
+# CHECK-NEXT: 1 3 1.00 dup v0.16b, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.2d, x28
+# CHECK-NEXT: 1 3 1.00 dup v0.2s, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.4h, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.4s, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.8b, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.8h, w28
+# CHECK-NEXT: 1 2 0.17 mov b0, v0.b[1]
+# CHECK-NEXT: 1 2 0.17 mov d0, v0.d[1]
+# CHECK-NEXT: 1 2 0.17 mov h0, v0.h[1]
+# CHECK-NEXT: 1 2 0.17 mov s0, v0.s[1]
+# CHECK-NEXT: 1 2 0.17 dup v0.16b, v0.b[1]
+# CHECK-NEXT: 1 2 0.17 dup v0.2d, v0.d[1]
+# CHECK-NEXT: 1 2 0.17 dup v0.2s, v0.s[1]
+# CHECK-NEXT: 1 2 0.17 dup v0.4h, v0.h[1]
+# CHECK-NEXT: 1 2 0.17 dup v0.4s, v0.s[1]
+# CHECK-NEXT: 1 2 0.17 dup v0.8b, v0.b[1]
+# CHECK-NEXT: 1 2 0.17 dup v0.8h, v0.h[1]
+# CHECK-NEXT: 1 2 0.17 eor v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.17 ext v0.16b, v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.17 ext v0.8b, v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.17 fabd d29, d24, d20
+# CHECK-NEXT: 1 2 0.17 fabd s29, s24, s20
+# CHECK-NEXT: 1 2 0.17 fabd h27, h20, h17
+# CHECK-NEXT: 1 2 0.17 fabd v13.8h, v28.8h, v12.8h
+# CHECK-NEXT: 1 2 0.17 fabd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 fabs h25, h7
+# CHECK-NEXT: 1 2 0.17 fabs v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 fabs v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 fabs v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.17 fabs v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 fabs v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 facge d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 facge s10, s11, s12
+# CHECK-NEXT: 1 2 0.17 facge h24, h26, h29
+# CHECK-NEXT: 1 2 0.17 facge v25.4h, v16.4h, v11.4h
+# CHECK-NEXT: 1 2 0.17 facge v19.2s, v24.2s, v5.2s
+# CHECK-NEXT: 1 2 0.17 facge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 facgt d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 facgt s10, s11, s12
+# CHECK-NEXT: 1 2 0.17 facgt h0, h4, h10
+# CHECK-NEXT: 1 2 0.17 facgt v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 facgt v22.8h, v14.8h, v31.8h
+# CHECK-NEXT: 1 2 0.17 facgt v22.4s, v8.4s, v2.4s
+# CHECK-NEXT: 1 2 0.17 fadd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 faddp h10, v19.2h
+# CHECK-NEXT: 1 2 0.17 faddp d11, v28.2d
+# CHECK-NEXT: 1 2 0.17 faddp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 faddp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 faddp v16.2d, v11.2d, v5.2d
+# CHECK-NEXT: 1 2 0.17 fcmeq h30, h6, h1
+# CHECK-NEXT: 1 2 0.17 fcmeq h19, h23, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmeq d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmeq d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 fcmeq s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmeq s10, s11, s12
+# CHECK-NEXT: 1 2 0.17 fcmeq v0.2s, v0.2s, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmeq v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 fcmeq v12.4s, v11.4s, v26.4s
+# CHECK-NEXT: 1 2 0.17 fcmeq v18.2d, v17.2d, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmge h10, h23, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmge h1, h16, h12
+# CHECK-NEXT: 1 2 0.17 fcmge d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmge d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 fcmge s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmge s10, s11, s12
+# CHECK-NEXT: 1 2 0.17 fcmge v0.2d, v0.2d, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmge v17.2d, v11.2d, v13.2d
+# CHECK-NEXT: 1 2 0.17 fcmge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 fcmge v18.4h, v27.4h, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmge v20.8h, v19.8h, v22.8h
+# CHECK-NEXT: 1 2 0.17 fcmge v17.2s, v11.2s, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmgt h4, h5, h0
+# CHECK-NEXT: 1 2 0.17 fcmgt h0, h18, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmgt d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmgt d20, d21, d22
+# CHECK-NEXT: 1 2 0.17 fcmgt s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmgt s10, s11, s12
+# CHECK-NEXT: 1 2 0.17 fcmgt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 fcmgt v24.8h, v24.8h, v28.8h
+# CHECK-NEXT: 1 2 0.17 fcmgt v0.8h, v11.8h, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmgt v19.2d, v31.2d, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmle v16.8h, v11.8h, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmle v22.4s, v30.4s, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmle d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmle s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmle v0.2d, v0.2d, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmle h18, h28, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmlt h23, h7, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmlt d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmlt s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmlt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmlt v8.4h, v2.4h, #0.0
+# CHECK-NEXT: 1 2 0.17 fcmlt v7.2d, v16.2d, #0.0
+# CHECK-NEXT: 1 3 0.25 fcvtas d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtas s12, s13
+# CHECK-NEXT: 1 3 0.25 fcvtas h12, h13
+# CHECK-NEXT: 1 3 0.25 fcvtas v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtas v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtas v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtas v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtas v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtau d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtau s12, s13
+# CHECK-NEXT: 1 3 0.25 fcvtau h12, h13
+# CHECK-NEXT: 1 3 0.25 fcvtau v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtau v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtau v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtau v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtau v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtl v0.2d, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtl v0.4s, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtl2 v0.2d, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtl2 v0.4s, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtms d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtms s22, s13
+# CHECK-NEXT: 1 3 0.25 fcvtms h22, h13
+# CHECK-NEXT: 1 3 0.25 fcvtms v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtms v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtms v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtms v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtms v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtmu d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtmu s12, s13
+# CHECK-NEXT: 1 3 0.25 fcvtmu h12, h13
+# CHECK-NEXT: 1 3 0.25 fcvtmu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtmu v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtmu v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtmu v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtmu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtn v0.2s, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtn v0.4h, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtns d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtns s22, s13
+# CHECK-NEXT: 1 3 0.25 fcvtns h22, h13
+# CHECK-NEXT: 1 3 0.25 fcvtns v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtns v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtns v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtns v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtns v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtnu d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtnu s12, s13
+# CHECK-NEXT: 1 3 0.25 fcvtnu h12, h13
+# CHECK-NEXT: 1 3 0.25 fcvtnu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtnu v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtnu v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtnu v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtnu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtps d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtps s22, s13
+# CHECK-NEXT: 1 3 0.25 fcvtps h22, h13
+# CHECK-NEXT: 1 3 0.25 fcvtps v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtps v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtps v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtps v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtps v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtpu d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtpu s12, s13
+# CHECK-NEXT: 1 3 0.25 fcvtpu h12, h13
+# CHECK-NEXT: 1 3 0.25 fcvtpu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtpu v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtpu v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtpu v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtpu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtxn s22, d13
+# CHECK-NEXT: 1 3 0.25 fcvtxn v0.2s, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtxn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtzs d21, d12, #1
+# CHECK-NEXT: 1 3 0.25 fcvtzs d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtzs s12, s13
+# CHECK-NEXT: 1 3 0.25 fcvtzs s21, s12, #1
+# CHECK-NEXT: 1 3 0.25 fcvtzs h21, h14
+# CHECK-NEXT: 1 3 0.25 fcvtzs h21, h12, #1
+# CHECK-NEXT: 1 3 0.25 fcvtzs v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.25 fcvtzs v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.25 fcvtzs v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtzs v20.4h, v24.4h, #11
+# CHECK-NEXT: 1 3 0.25 fcvtzs v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 3 0.25 fcvtzs v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtzs v18.8h, v10.8h, #7
+# CHECK-NEXT: 1 3 0.25 fcvtzu d21, d12, #1
+# CHECK-NEXT: 1 3 0.25 fcvtzu d21, d14
+# CHECK-NEXT: 1 3 0.25 fcvtzu s12, s13
+# CHECK-NEXT: 1 3 0.25 fcvtzu s21, s12, #1
+# CHECK-NEXT: 1 3 0.25 fcvtzu h12, h13
+# CHECK-NEXT: 1 3 0.25 fcvtzu h21, h12, #1
+# CHECK-NEXT: 1 3 0.25 fcvtzu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.25 fcvtzu v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.25 fcvtzu v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 fcvtzu v19.4h, v26.4h, #9
+# CHECK-NEXT: 1 3 0.25 fcvtzu v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 3 0.25 fcvtzu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 fcvtzu v27.8h, v6.8h, #11
+# CHECK-NEXT: 1 13 2.00 fdiv v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 9 2.00 fdiv v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 8 4.00 fdiv v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 11 4.00 fdiv v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 12 8.00 fdiv v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 fmax v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 fmax v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 fmax v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 fmaxnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 fmaxnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 fmaxnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.17 fmaxnmp h25, v19.2h
+# CHECK-NEXT: 1 3 0.17 fmaxnmp d17, v29.2d
+# CHECK-NEXT: 1 3 0.17 fmaxnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.17 fmaxnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.17 fmaxnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.17 fmaxnmv h0, v13.4h
+# CHECK-NEXT: 1 6 0.17 fmaxnmv h12, v11.8h
+# CHECK-NEXT: 1 4 0.17 fmaxnmv s28, v31.4s
+# CHECK-NEXT: 1 3 0.17 fmaxp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.17 fmaxp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.17 fmaxp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.17 fmaxp h15, v25.2h
+# CHECK-NEXT: 1 3 0.17 fmaxp s6, v2.2s
+# CHECK-NEXT: 1 4 0.17 fmaxv h0, v0.4h
+# CHECK-NEXT: 1 6 0.17 fmaxv h0, v0.8h
+# CHECK-NEXT: 1 4 0.17 fmaxv s0, v0.4s
+# CHECK-NEXT: 1 2 0.17 fmin v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 fmin v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 fmin v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 fminnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 fminnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 fminnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.17 fminnmp h20, v14.2h
+# CHECK-NEXT: 1 3 0.17 fminnmp d15, v8.2d
+# CHECK-NEXT: 1 3 0.17 fminnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.17 fminnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.17 fminnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.17 fminnmv h19, v25.4h
+# CHECK-NEXT: 1 6 0.17 fminnmv h23, v17.8h
+# CHECK-NEXT: 1 4 0.17 fminnmv s29, v17.4s
+# CHECK-NEXT: 1 3 0.17 fminp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.17 fminp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.17 fminp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.17 fminp h7, v10.2h
+# CHECK-NEXT: 1 3 0.17 fminp s17, v7.2s
+# CHECK-NEXT: 1 4 0.17 fminv h3, v30.4h
+# CHECK-NEXT: 1 6 0.17 fminv h29, v12.8h
+# CHECK-NEXT: 1 4 0.17 fminv s16, v19.4s
+# CHECK-NEXT: 1 4 0.17 fmla d0, d1, v0.d[1]
+# CHECK-NEXT: 1 4 0.17 fmla h23, h24, v15.h[4]
+# CHECK-NEXT: 1 4 0.17 fmla s0, s1, v0.s[3]
+# CHECK-NEXT: 1 4 0.17 fmla v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.17 fmla v29.8h, v15.8h, v10.h[4]
+# CHECK-NEXT: 1 4 0.17 fmla v2.2s, v16.2s, v28.s[0]
+# CHECK-NEXT: 1 4 0.17 fmla v14.4s, v14.4s, v5.s[3]
+# CHECK-NEXT: 1 4 0.17 fmla v1.4s, v24.4s, v12.4s
+# CHECK-NEXT: 1 4 0.17 fmla v10.2d, v14.2d, v21.d[1]
+# CHECK-NEXT: 1 4 0.17 fmls d0, d4, v0.d[1]
+# CHECK-NEXT: 1 4 0.17 fmls h8, h14, v7.h[4]
+# CHECK-NEXT: 1 4 0.17 fmls s3, s5, v0.s[3]
+# CHECK-NEXT: 1 4 0.17 fmls v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.17 fmls v30.8h, v18.8h, v4.h[6]
+# CHECK-NEXT: 1 4 0.17 fmls v10.2s, v27.2s, v0.s[0]
+# CHECK-NEXT: 1 4 0.17 fmls v27.4s, v7.4s, v24.s[0]
+# CHECK-NEXT: 1 4 0.17 fmls v10.2d, v22.2d, v29.d[0]
+# CHECK-NEXT: 1 4 0.17 fmls v6.8h, v15.8h, v23.8h
+# CHECK-NEXT: 1 2 0.17 fmov v0.2d, #-1.25000000
+# CHECK-NEXT: 1 2 0.17 fmov v0.2s, #13.00000000
+# CHECK-NEXT: 1 2 0.17 fmov v0.4s, #1.00000000
+# CHECK-NEXT: 1 3 0.17 fmul h18, h4, v7.h[3]
+# CHECK-NEXT: 1 3 0.17 fmul v10.4h, v2.4h, v7.h[5]
+# CHECK-NEXT: 1 3 0.17 fmul v5.2s, v12.2s, v9.s[0]
+# CHECK-NEXT: 1 3 0.17 fmul v15.4s, v30.4s, v2.s[3]
+# CHECK-NEXT: 1 3 0.17 fmul v11.2d, v31.2d, v24.d[1]
+# CHECK-NEXT: 1 3 0.17 fmul h28, h14, h3
+# CHECK-NEXT: 1 3 0.17 fmul d0, d1, v0.d[1]
+# CHECK-NEXT: 1 3 0.17 fmul s0, s1, v0.s[3]
+# CHECK-NEXT: 1 3 0.17 fmul v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.17 fmulx d0, d4, v0.d[1]
+# CHECK-NEXT: 1 2 0.17 fmulx d23, d11, d1
+# CHECK-NEXT: 1 2 0.17 fmulx s20, s22, s15
+# CHECK-NEXT: 1 3 0.17 fmulx h18, h17, v7.h[1]
+# CHECK-NEXT: 1 2 0.17 fmulx h20, h25, h0
+# CHECK-NEXT: 1 3 0.17 fmulx s3, s5, v0.s[3]
+# CHECK-NEXT: 1 3 0.17 fmulx v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.17 fmulx v28.4h, v25.4h, v15.h[1]
+# CHECK-NEXT: 1 3 0.17 fmulx v3.2s, v22.2s, v23.s[3]
+# CHECK-NEXT: 1 3 0.17 fmulx v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.17 fmulx v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.17 fmulx v5.4s, v28.4s, v15.s[3]
+# CHECK-NEXT: 1 3 0.17 fmulx v22.2d, v18.2d, v25.d[1]
+# CHECK-NEXT: 1 2 0.17 fneg v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.17 fneg v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.17 fneg v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.17 fneg v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.17 fneg v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 frecpe h20, h8
+# CHECK-NEXT: 1 3 0.25 frecpe d13, d13
+# CHECK-NEXT: 1 3 0.25 frecpe s19, s14
+# CHECK-NEXT: 1 2 0.17 frecpe v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 frecpe v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 frecpe v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 frecpe v0.4s, v0.4s
+# CHECK-NEXT: 1 6 1.00 frecpe v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 frecps h29, h19, h8
+# CHECK-NEXT: 1 3 0.25 frecpx h18, h11
+# CHECK-NEXT: 1 4 0.17 frecps v12.8h, v25.8h, v4.8h
+# CHECK-NEXT: 1 4 0.17 frecps v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.17 frecps d22, d30, d21
+# CHECK-NEXT: 1 4 0.17 frecps s21, s16, s13
+# CHECK-NEXT: 1 4 0.17 frecps v7.2d, v29.2d, v18.2d
+# CHECK-NEXT: 1 3 0.25 frecpx d16, d19
+# CHECK-NEXT: 1 3 0.25 frecpx s18, s10
+# CHECK-NEXT: 1 3 0.25 frinta v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 frinta v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 frinta v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 frinta v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 frinta v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 frinti v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 frinti v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 frinti v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 frinti v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 frinti v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 frintm v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 frintm v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 frintm v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 frintm v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 frintm v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 frintn v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 frintn v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 frintn v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 frintn v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 frintn v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 frintp v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 frintp v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 frintp v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 frintp v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 frintp v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 frintx v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 frintx v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 frintx v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 frintx v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 frintx v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 frintz v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 frintz v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 frintz v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.25 frintz v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.25 frintz v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 frsqrte h23, h26
+# CHECK-NEXT: 1 3 0.25 frsqrte d21, d12
+# CHECK-NEXT: 1 3 0.25 frsqrte s22, s13
+# CHECK-NEXT: 1 3 0.25 frsqrte v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 frsqrte v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 frsqrte v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 frsqrte v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.17 frsqrts v20.4s, v26.4s, v27.4s
+# CHECK-NEXT: 1 4 0.17 frsqrts v8.4h, v9.4h, v30.4h
+# CHECK-NEXT: 1 6 1.00 frsqrte v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 frsqrts h28, h26, h1
+# CHECK-NEXT: 1 4 0.17 frsqrts d8, d22, d18
+# CHECK-NEXT: 1 4 0.17 frsqrts s21, s5, s12
+# CHECK-NEXT: 1 4 0.17 frsqrts v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 13 2.00 fsqrt v0.2d, v0.2d
+# CHECK-NEXT: 1 9 2.00 fsqrt v0.2s, v0.2s
+# CHECK-NEXT: 1 8 4.00 fsqrt v0.4h, v0.4h
+# CHECK-NEXT: 1 11 4.00 fsqrt v0.4s, v0.4s
+# CHECK-NEXT: 1 12 8.00 fsqrt v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.17 fsub v13.8h, v15.8h, v17.8h
+# CHECK-NEXT: 1 2 0.17 fsub v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 6 0.25 * ld1 { v0.16b }, [x0]
+# CHECK-NEXT: 2 6 0.50 * ld1 { v0.16b, v1.16b }, [x14]
+# CHECK-NEXT: 3 6 0.75 * ld1 { v19.16b, v20.16b, v21.16b }, [x10]
+# CHECK-NEXT: 4 7 1.00 * ld1 { v13.16b, v14.16b, v15.16b, v16.16b }, [x9]
+# CHECK-NEXT: 1 6 0.25 * ld1 { v24.8h }, [x27]
+# CHECK-NEXT: 2 6 0.50 * ld1 { v1.8h, v2.8h }, [x27]
+# CHECK-NEXT: 3 6 0.50 * ld1 { v0.8h, v1.8h }, [sp], #32
+# CHECK-NEXT: 3 6 0.75 * ld1 { v21.8h, v22.8h, v23.8h }, [x22]
+# CHECK-NEXT: 4 7 1.00 * ld1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x21]
+# CHECK-NEXT: 1 6 0.25 * ld1 { v3.4s }, [x4]
+# CHECK-NEXT: 2 6 0.50 * ld1 { v11.4s, v12.4s }, [x30]
+# CHECK-NEXT: 3 6 0.75 * ld1 { v0.4s, v1.4s, v2.4s }, [x24]
+# CHECK-NEXT: 4 7 1.00 * ld1 { v15.4s, v16.4s, v17.4s, v18.4s }, [x28]
+# CHECK-NEXT: 4 6 0.75 * ld1 { v0.4s, v1.4s, v2.4s }, [x0], #48
+# CHECK-NEXT: 1 6 0.25 * ld1 { v3.2d }, [x28]
+# CHECK-NEXT: 2 6 0.50 * ld1 { v13.2d, v14.2d }, [x13]
+# CHECK-NEXT: 3 6 0.75 * ld1 { v12.2d, v13.2d, v14.2d }, [x15]
+# CHECK-NEXT: 4 6 0.75 * ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: 4 7 1.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: 2 6 0.25 * ld1 { v0.1d }, [x15], x2
+# CHECK-NEXT: 2 6 0.50 * ld1 { v27.1d, v28.1d }, [x7]
+# CHECK-NEXT: 3 6 0.75 * ld1 { v14.1d, v15.1d, v16.1d }, [x3]
+# CHECK-NEXT: 4 7 1.00 * ld1 { v22.1d, v23.1d, v24.1d, v25.1d }, [x4]
+# CHECK-NEXT: 2 6 0.50 * ld1 { v0.2s, v1.2s }, [x15]
+# CHECK-NEXT: 3 6 0.75 * ld1 { v16.2s, v17.2s, v18.2s }, [x27]
+# CHECK-NEXT: 4 7 1.00 * ld1 { v21.2s, v22.2s, v23.2s, v24.2s }, [x21]
+# CHECK-NEXT: 2 6 0.50 * ld1 { v25.4h, v26.4h }, [x3]
+# CHECK-NEXT: 4 7 1.00 * ld1 { v20.4h, v21.4h, v22.4h, v23.4h }, [x15]
+# CHECK-NEXT: 3 6 0.75 * ld1 { v0.4h, v1.4h, v2.4h }, [sp]
+# CHECK-NEXT: 2 6 0.50 * ld1 { v24.8b, v25.8b }, [x6]
+# CHECK-NEXT: 3 6 0.75 * ld1 { v7.8b, v8.8b, v9.8b }, [x12]
+# CHECK-NEXT: 4 7 1.00 * ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x13]
+# CHECK-NEXT: 3 6 0.50 * ld1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: 3 6 0.75 * ld1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: 5 7 1.00 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: 2 8 0.25 * ld1 { v0.b }[7], [x0]
+# CHECK-NEXT: 3 8 0.25 * ld1 { v0.h }[3], [x0], #2
+# CHECK-NEXT: 2 8 0.25 * ld1 { v18.h }[3], [x1]
+# CHECK-NEXT: 2 8 0.25 * ld1 { v0.s }[1], [x15]
+# CHECK-NEXT: 3 8 0.25 * ld1 { v0.d }[0], [x15], #8
+# CHECK-NEXT: 2 8 0.25 * ld1 { v11.d }[0], [x13]
+# CHECK-NEXT: 2 6 0.25 * ld1 { v0.8h }, [x15], x2
+# CHECK-NEXT: 2 6 0.50 * ld1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: 2 8 0.25 * ld1 { v0.b }[9], [x0]
+# CHECK-NEXT: 3 8 0.25 * ld1 { v0.b }[9], [x0], #1
+# CHECK-NEXT: 2 6 0.25 * ld1r { v0.16b }, [x0]
+# CHECK-NEXT: 3 6 0.25 * ld1r { v0.8h }, [x0], #2
+# CHECK-NEXT: 2 6 0.25 * ld1r { v0.4s }, [x15]
+# CHECK-NEXT: 2 6 0.25 * ld1r { v3.1d }, [x15]
+# CHECK-NEXT: 3 6 0.25 * ld1r { v0.2d }, [x15], x16
+# CHECK-NEXT: 2 6 0.25 * ld1r { v18.2d }, [x0]
+# CHECK-NEXT: 2 6 0.25 * ld1r { v8.8b }, [x23]
+# CHECK-NEXT: 2 6 0.25 * ld1r { v28.4h }, [x9]
+# CHECK-NEXT: 2 6 0.25 * ld1r { v3.8h }, [x16]
+# CHECK-NEXT: 2 6 0.25 * ld1r { v10.2s }, [x20]
+# CHECK-NEXT: 3 8 0.33 * ld2 { v0.4h, v1.4h }, [x21]
+# CHECK-NEXT: 4 8 0.50 * ld2 { v8.8h, v9.8h }, [x28]
+# CHECK-NEXT: 3 8 0.33 * ld2 { v2.2s, v3.2s }, [x16]
+# CHECK-NEXT: 4 8 0.50 * ld2 { v22.4s, v23.4s }, [x4]
+# CHECK-NEXT: 4 8 0.50 * ld2 { v22.2d, v23.2d }, [x17]
+# CHECK-NEXT: 3 8 0.33 * ld2 { v29.b, v30.b }[3], [x1]
+# CHECK-NEXT: 3 8 0.33 * ld2 { v26.s, v27.s }[1], [x17]
+# CHECK-NEXT: 3 8 0.33 * ld2 { v1.d, v2.d }[0], [x10]
+# CHECK-NEXT: 4 8 0.50 * ld2 { v0.16b, v1.16b }, [x0]
+# CHECK-NEXT: 3 8 0.33 * ld2 { v13.8b, v14.8b }, [x4]
+# CHECK-NEXT: 4 8 0.33 * ld2 { v0.8b, v1.8b }, [x0], #16
+# CHECK-NEXT: 3 6 0.25 * ld1r { v0.16b }, [x0], #1
+# CHECK-NEXT: 2 6 0.25 * ld1r { v0.8h }, [x15]
+# CHECK-NEXT: 3 6 0.25 * ld1r { v0.8h }, [x15], #2
+# CHECK-NEXT: 5 8 0.50 * ld2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: 3 8 0.33 * ld2 { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: 3 8 0.33 * ld2 { v0.h, v1.h }[7], [x15]
+# CHECK-NEXT: 4 8 0.33 * ld2 { v0.h, v1.h }[7], [x15], x8
+# CHECK-NEXT: 4 8 0.33 * ld2 { v0.h, v1.h }[7], [x15], #4
+# CHECK-NEXT: 3 8 0.33 * ld2r { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: 3 8 0.33 * ld2r { v10.16b, v11.16b }, [x23]
+# CHECK-NEXT: 4 8 0.33 * ld2r { v0.4h, v1.4h }, [x0], #4
+# CHECK-NEXT: 3 8 0.33 * ld2r { v25.4h, v26.4h }, [x11]
+# CHECK-NEXT: 3 8 0.33 * ld2r { v23.8h, v24.8h }, [x10]
+# CHECK-NEXT: 3 8 0.33 * ld2r { v0.2s, v1.2s }, [sp]
+# CHECK-NEXT: 3 8 0.33 * ld2r { v8.4s, v9.4s }, [x17]
+# CHECK-NEXT: 4 8 0.33 * ld2r { v0.1d, v1.1d }, [sp], x8
+# CHECK-NEXT: 3 8 0.33 * ld2r { v9.1d, v10.1d }, [x25]
+# CHECK-NEXT: 3 8 0.33 * ld2r { v26.2d, v27.2d }, [x8]
+# CHECK-NEXT: 5 8 0.50 * ld3 { v8.8b, v9.8b, v10.8b }, [x0]
+# CHECK-NEXT: 6 9 0.75 * ld3 { v15.16b, v16.16b, v17.16b }, [x5]
+# CHECK-NEXT: 3 8 0.33 * ld2r { v0.2d, v1.2d }, [x0]
+# CHECK-NEXT: 4 8 0.33 * ld2r { v0.2d, v1.2d }, [x0], #16
+# CHECK-NEXT: 3 8 0.33 * ld2r { v0.4s, v1.4s }, [sp]
+# CHECK-NEXT: 4 8 0.33 * ld2r { v0.4s, v1.4s }, [sp], #8
+# CHECK-NEXT: 5 8 0.50 * ld3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 7 9 0.75 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], #48
+# CHECK-NEXT: 6 9 0.75 * ld3 { v7.8h, v8.8h, v9.8h }, [x21]
+# CHECK-NEXT: 5 8 0.50 * ld3 { v16.2s, v17.2s, v18.2s }, [x0]
+# CHECK-NEXT: 6 9 0.75 * ld3 { v12.4s, v13.4s, v14.4s }, [x25]
+# CHECK-NEXT: 5 8 0.50 * ld3 { v17.b, v18.b, v19.b }[2], [x27]
+# CHECK-NEXT: 5 8 0.50 * ld3 { v18.h, v19.h, v20.h }[5], [x16]
+# CHECK-NEXT: 6 9 0.75 * ld3 { v10.2d, v11.2d, v12.2d }, [x18]
+# CHECK-NEXT: 7 9 0.75 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: 5 8 0.50 * ld3 { v0.s, v1.s, v2.s }[3], [sp]
+# CHECK-NEXT: 6 8 0.50 * ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
+# CHECK-NEXT: 5 8 0.50 * ld3 { v5.d, v6.d, v7.d }[1], [x14]
+# CHECK-NEXT: 5 8 0.50 * ld3r { v0.8b, v1.8b, v2.8b }, [x15]
+# CHECK-NEXT: 6 8 0.75 * ld3r { v17.16b, v18.16b, v19.16b }, [x3]
+# CHECK-NEXT: 5 8 0.50 * ld3r { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 6 8 0.50 * ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
+# CHECK-NEXT: 5 8 0.50 * ld3r { v3.4h, v4.4h, v5.4h }, [x1]
+# CHECK-NEXT: 6 8 0.75 * ld3r { v6.8h, v7.8h, v8.8h }, [x28]
+# CHECK-NEXT: 5 8 0.50 * ld3r { v0.2s, v1.2s, v2.2s }, [x0]
+# CHECK-NEXT: 6 8 0.75 * ld3r { v28.4s, v29.4s, v30.4s }, [x2]
+# CHECK-NEXT: 6 8 0.50 * ld3r { v0.1d, v1.1d, v2.1d }, [x0], x0
+# CHECK-NEXT: 5 8 0.50 * ld3r { v1.1d, v2.1d, v3.1d }, [x28]
+# CHECK-NEXT: 6 8 0.75 * ld3r { v8.2d, v9.2d, v10.2d }, [x3]
+# CHECK-NEXT: 7 8 0.75 * ld4 { v6.8b, v7.8b, v8.8b, v9.8b }, [x27]
+# CHECK-NEXT: 10 9 1.50 * ld4 { v11.16b, v12.16b, v13.16b, v14.16b }, [x5]
+# CHECK-NEXT: 7 8 0.75 * ld4 { v21.4h, v22.4h, v23.4h, v24.4h }, [x14]
+# CHECK-NEXT: 10 9 1.50 * ld4 { v9.8h, v10.8h, v11.8h, v12.8h }, [x1]
+# CHECK-NEXT: 10 9 1.50 * ld4 { v17.4s, v18.4s, v19.4s, v20.4s }, [x4]
+# CHECK-NEXT: 5 8 0.50 * ld3r { v0.8b, v1.8b, v2.8b }, [x0]
+# CHECK-NEXT: 6 8 0.50 * ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3
+# CHECK-NEXT: 7 8 0.75 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 11 9 1.50 * ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: 7 8 0.75 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+# CHECK-NEXT: 10 9 1.50 * ld4 { v2.2d, v3.2d, v4.2d, v5.2d }, [x24]
+# CHECK-NEXT: 7 8 0.75 * ld4 { v4.b, v5.b, v6.b, v7.b }[12], [x27]
+# CHECK-NEXT: 7 8 0.75 * ld4 { v5.h, v6.h, v7.h, v8.h }[0], [x4]
+# CHECK-NEXT: 8 8 0.75 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+# CHECK-NEXT: 8 8 0.75 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+# CHECK-NEXT: 7 8 0.75 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [x26]
+# CHECK-NEXT: 7 8 0.75 * ld4r { v20.8b, v21.8b, v22.8b, v23.8b }, [x23]
+# CHECK-NEXT: 8 8 1.00 * ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x25]
+# CHECK-NEXT: 7 8 0.75 * ld4r { v16.4h, v17.4h, v18.4h, v19.4h }, [x6]
+# CHECK-NEXT: 7 8 0.75 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+# CHECK-NEXT: 8 8 1.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# CHECK-NEXT: 8 8 1.00 * ld4r { v4.8h, v5.8h, v6.8h, v7.8h }, [x23]
+# CHECK-NEXT: 7 8 0.75 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x30]
+# CHECK-NEXT: 8 8 0.75 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
+# CHECK-NEXT: 8 8 1.00 * ld4r { v7.4s, v8.4s, v9.4s, v10.4s }, [x23]
+# CHECK-NEXT: 9 8 1.00 * ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], x8
+# CHECK-NEXT: 8 8 0.75 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7
+# CHECK-NEXT: 7 8 0.75 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 8 8 0.75 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30
+# CHECK-NEXT: 1 4 0.25 mla v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.25 mla v15.8h, v22.8h, v4.h[3]
+# CHECK-NEXT: 1 4 0.25 mla v28.2s, v10.2s, v2.s[0]
+# CHECK-NEXT: 1 4 0.25 mls v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 mls v25.8h, v29.8h, v0.h[4]
+# CHECK-NEXT: 1 4 0.25 mls v22.2s, v29.2s, v0.s[3]
+# CHECK-NEXT: 1 4 0.25 mls v26.4s, v5.4s, v28.4s
+# CHECK-NEXT: 1 2 0.17 mov b0, v0.b[15]
+# CHECK-NEXT: 1 2 0.17 mov d6, v0.d[1]
+# CHECK-NEXT: 1 2 0.17 mov h2, v0.h[5]
+# CHECK-NEXT: 1 2 0.17 mov s17, v0.s[2]
+# CHECK-NEXT: 1 2 0.17 mov w8, v8.s[0]
+# CHECK-NEXT: 1 2 0.17 mov x30, v18.d[0]
+# CHECK-NEXT: 1 2 0.17 mov v2.b[0], v0.b[0]
+# CHECK-NEXT: 1 2 0.17 mov v2.h[1], v0.h[1]
+# CHECK-NEXT: 1 2 0.17 mov v2.s[2], v0.s[2]
+# CHECK-NEXT: 1 2 0.17 mov v2.d[1], v0.d[1]
+# CHECK-NEXT: 2 5 1.00 mov v0.b[0], w8
----------------
walkerkd wrote:
SWOG Table 2-15 "ASIMD logical", should have latency 2 (and I think the throughput value is also incorrect)
https://github.com/llvm/llvm-project/pull/182251
More information about the llvm-commits
mailing list