[llvm] [AArch64] C1-Ultra Scheduling model (PR #182251)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 20 07:54:17 PDT 2026


================
@@ -0,0 +1,3167 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=c1-ultra -mattr=+aes -instruction-tables < %p/../Inputs/neon-instructions.s | FileCheck %s
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      2     0.17                        abs	d29, d24
+# CHECK-NEXT:  1      2     0.17                        abs	v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.17                        abs	v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        abs	v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        abs	v0.4h, v0.4h
+# CHECK-NEXT:  1      2     0.17                        abs	v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        abs	v0.8b, v0.8b
+# CHECK-NEXT:  1      2     0.17                        abs	v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        add	d17, d31, d29
+# CHECK-NEXT:  1      2     0.17                        add	v0.8b, v0.8b, v0.8b
+# CHECK-NEXT:  1      2     0.17                        addhn	v0.2s, v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        addhn	v0.4h, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        addhn	v0.8b, v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        addhn2	v0.16b, v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        addhn2	v0.4s, v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        addhn2	v0.8h, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        addp	v7.2s, v1.2s, v2.2s
+# CHECK-NEXT:  1      2     0.17                        addp	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        addp	v0.8b, v0.8b, v0.8b
+# CHECK-NEXT:  1      2     0.17                        addp	d1, v14.2d
+# CHECK-NEXT:  1      2     0.25                        addv	s0, v0.4s
+# CHECK-NEXT:  1      2     0.25                        addv	h0, v0.4h
+# CHECK-NEXT:  1      4     0.33                        addv	h0, v0.8h
+# CHECK-NEXT:  1      4     0.33                        addv	b0, v0.8b
+# CHECK-NEXT:  1      4     0.50                        addv	b0, v0.16b
+# CHECK-NEXT:  1      2     0.25                        aesd	v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.25                        aese	v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.25                        aesimc	v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.25                        aesmc	v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.17                        and	v0.8b, v0.8b, v0.8b
+# CHECK-NEXT:  1      2     0.17                        bic	v0.4h, #15, lsl #8
+# CHECK-NEXT:  1      2     0.17                        bic	v23.8h, #101
+# CHECK-NEXT:  1      2     0.17                        bic	v0.8b, v0.8b, v0.8b
+# CHECK-NEXT:  1      2     0.17                        bic	v25.16b, v10.16b, v9.16b
+# CHECK-NEXT:  1      2     0.17                        bic	v24.2s, #70
+# CHECK-NEXT:  1      2     0.17                        bit	v5.8b, v12.8b, v22.8b
+# CHECK-NEXT:  1      2     0.17                        bif	v0.8b, v25.8b, v4.8b
+# CHECK-NEXT:  1      2     0.17                        bif	v0.16b, v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.17                        bit	v0.16b, v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.17                        bsl	v0.8b, v0.8b, v0.8b
+# CHECK-NEXT:  1      2     0.17                        bsl	v27.16b, v13.16b, v21.16b
+# CHECK-NEXT:  1      2     0.17                        cls	v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.17                        cls	v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        cls	v0.4h, v0.4h
+# CHECK-NEXT:  1      2     0.17                        cls	v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        cls	v0.8b, v0.8b
+# CHECK-NEXT:  1      2     0.17                        cls	v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        clz	v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.17                        clz	v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        clz	v0.4h, v0.4h
+# CHECK-NEXT:  1      2     0.17                        clz	v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        clz	v0.8b, v0.8b
+# CHECK-NEXT:  1      2     0.17                        clz	v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        cmeq	v9.8h, v16.8h, v24.8h
+# CHECK-NEXT:  1      2     0.17                        cmeq	v14.4h, v18.4h, #0
+# CHECK-NEXT:  1      2     0.17                        cmeq	d20, d21, #0
+# CHECK-NEXT:  1      2     0.17                        cmeq	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        cmeq	v0.16b, v0.16b, #0
+# CHECK-NEXT:  1      2     0.17                        cmeq	v0.16b, v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.17                        cmge	v22.8h, v16.8h, v3.8h
+# CHECK-NEXT:  1      2     0.17                        cmge	v22.16b, v30.16b, #0
+# CHECK-NEXT:  1      2     0.17                        cmge	d20, d21, #0
+# CHECK-NEXT:  1      2     0.17                        cmge	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        cmge	v0.4h, v0.4h, v0.4h
+# CHECK-NEXT:  1      2     0.17                        cmge	v0.8b, v0.8b, #0
+# CHECK-NEXT:  1      2     0.17                        cmgt	v3.2d, v29.2d, v11.2d
+# CHECK-NEXT:  1      2     0.17                        cmgt	d20, d21, #0
+# CHECK-NEXT:  1      2     0.17                        cmgt	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        cmgt	v0.2s, v0.2s, #0
+# CHECK-NEXT:  1      2     0.17                        cmgt	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        cmhi	v28.4h, v25.4h, v21.4h
+# CHECK-NEXT:  1      2     0.17                        cmhi	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        cmhi	v0.8h, v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        cmhs	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        cmhs	v0.8b, v0.8b, v0.8b
+# CHECK-NEXT:  1      2     0.17                        cmle	v21.2s, v19.2s, #0
+# CHECK-NEXT:  1      2     0.17                        cmle	d20, d21, #0
+# CHECK-NEXT:  1      2     0.17                        cmle	v0.2d, v0.2d, #0
+# CHECK-NEXT:  1      2     0.17                        cmlt	v26.4h, v12.4h, #0
+# CHECK-NEXT:  1      2     0.17                        cmlt	d20, d21, #0
+# CHECK-NEXT:  1      2     0.17                        cmlt	v0.8h, v0.8h, #0
+# CHECK-NEXT:  1      2     0.17                        cmtst	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        cmtst	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        cmtst	v13.2d, v13.2d, v13.2d
+# CHECK-NEXT:  1      2     0.17                        cnt	v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.17                        cnt	v0.8b, v0.8b
+# CHECK-NEXT:  1      3     1.00                        dup	v0.16b, w28
+# CHECK-NEXT:  1      3     1.00                        dup	v0.2d, x28
+# CHECK-NEXT:  1      3     1.00                        dup	v0.2s, w28
+# CHECK-NEXT:  1      3     1.00                        dup	v0.4h, w28
+# CHECK-NEXT:  1      3     1.00                        dup	v0.4s, w28
+# CHECK-NEXT:  1      3     1.00                        dup	v0.8b, w28
+# CHECK-NEXT:  1      3     1.00                        dup	v0.8h, w28
+# CHECK-NEXT:  1      2     0.17                        mov	b0, v0.b[1]
+# CHECK-NEXT:  1      2     0.17                        mov	d0, v0.d[1]
+# CHECK-NEXT:  1      2     0.17                        mov	h0, v0.h[1]
+# CHECK-NEXT:  1      2     0.17                        mov	s0, v0.s[1]
+# CHECK-NEXT:  1      2     0.17                        dup	v0.16b, v0.b[1]
+# CHECK-NEXT:  1      2     0.17                        dup	v0.2d, v0.d[1]
+# CHECK-NEXT:  1      2     0.17                        dup	v0.2s, v0.s[1]
+# CHECK-NEXT:  1      2     0.17                        dup	v0.4h, v0.h[1]
+# CHECK-NEXT:  1      2     0.17                        dup	v0.4s, v0.s[1]
+# CHECK-NEXT:  1      2     0.17                        dup	v0.8b, v0.b[1]
+# CHECK-NEXT:  1      2     0.17                        dup	v0.8h, v0.h[1]
+# CHECK-NEXT:  1      2     0.17                        eor	v0.16b, v0.16b, v0.16b
+# CHECK-NEXT:  1      2     0.17                        ext	v0.16b, v0.16b, v0.16b, #3
+# CHECK-NEXT:  1      2     0.17                        ext	v0.8b, v0.8b, v0.8b, #3
+# CHECK-NEXT:  1      2     0.17                        fabd	d29, d24, d20
+# CHECK-NEXT:  1      2     0.17                        fabd	s29, s24, s20
+# CHECK-NEXT:  1      2     0.17                        fabd	h27, h20, h17
+# CHECK-NEXT:  1      2     0.17                        fabd	v13.8h, v28.8h, v12.8h
+# CHECK-NEXT:  1      2     0.17                        fabd	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        fabs	h25, h7
+# CHECK-NEXT:  1      2     0.17                        fabs	v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        fabs	v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        fabs	v0.4h, v0.4h
+# CHECK-NEXT:  1      2     0.17                        fabs	v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        fabs	v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        facge	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        facge	s10, s11, s12
+# CHECK-NEXT:  1      2     0.17                        facge	h24, h26, h29
+# CHECK-NEXT:  1      2     0.17                        facge	v25.4h, v16.4h, v11.4h
+# CHECK-NEXT:  1      2     0.17                        facge	v19.2s, v24.2s, v5.2s
+# CHECK-NEXT:  1      2     0.17                        facge	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        facgt	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        facgt	s10, s11, s12
+# CHECK-NEXT:  1      2     0.17                        facgt	h0, h4, h10
+# CHECK-NEXT:  1      2     0.17                        facgt	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        facgt	v22.8h, v14.8h, v31.8h
+# CHECK-NEXT:  1      2     0.17                        facgt	v22.4s, v8.4s, v2.4s
+# CHECK-NEXT:  1      2     0.17                        fadd	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        faddp	h10, v19.2h
+# CHECK-NEXT:  1      2     0.17                        faddp	d11, v28.2d
+# CHECK-NEXT:  1      2     0.17                        faddp	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        faddp	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        faddp	v16.2d, v11.2d, v5.2d
+# CHECK-NEXT:  1      2     0.17                        fcmeq	h30, h6, h1
+# CHECK-NEXT:  1      2     0.17                        fcmeq	h19, h23, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmeq	d20, d21, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmeq	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        fcmeq	s10, s11, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmeq	s10, s11, s12
+# CHECK-NEXT:  1      2     0.17                        fcmeq	v0.2s, v0.2s, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmeq	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        fcmeq	v12.4s, v11.4s, v26.4s
+# CHECK-NEXT:  1      2     0.17                        fcmeq	v18.2d, v17.2d, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmge	h10, h23, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmge	h1, h16, h12
+# CHECK-NEXT:  1      2     0.17                        fcmge	d20, d21, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmge	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        fcmge	s10, s11, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmge	s10, s11, s12
+# CHECK-NEXT:  1      2     0.17                        fcmge	v0.2d, v0.2d, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmge	v17.2d, v11.2d, v13.2d
+# CHECK-NEXT:  1      2     0.17                        fcmge	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        fcmge	v18.4h, v27.4h, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmge	v20.8h, v19.8h, v22.8h
+# CHECK-NEXT:  1      2     0.17                        fcmge	v17.2s, v11.2s, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmgt	h4, h5, h0
+# CHECK-NEXT:  1      2     0.17                        fcmgt	h0, h18, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmgt	d20, d21, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmgt	d20, d21, d22
+# CHECK-NEXT:  1      2     0.17                        fcmgt	s10, s11, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmgt	s10, s11, s12
+# CHECK-NEXT:  1      2     0.17                        fcmgt	v0.4s, v0.4s, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmgt	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        fcmgt	v24.8h, v24.8h, v28.8h
+# CHECK-NEXT:  1      2     0.17                        fcmgt	v0.8h, v11.8h, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmgt	v19.2d, v31.2d, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmle	v16.8h, v11.8h, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmle	v22.4s, v30.4s, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmle	d20, d21, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmle	s10, s11, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmle	v0.2d, v0.2d, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmle	h18, h28, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmlt	h23, h7, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmlt	d20, d21, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmlt	s10, s11, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmlt	v0.4s, v0.4s, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmlt	v8.4h, v2.4h, #0.0
+# CHECK-NEXT:  1      2     0.17                        fcmlt	v7.2d, v16.2d, #0.0
+# CHECK-NEXT:  1      3     0.25                        fcvtas	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtas	s12, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtas	h12, h13
+# CHECK-NEXT:  1      3     0.25                        fcvtas	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtas	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtas	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtas	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtas	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtau	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtau	s12, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtau	h12, h13
+# CHECK-NEXT:  1      3     0.25                        fcvtau	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtau	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtau	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtau	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtau	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtl	v0.2d, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtl	v0.4s, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtl2	v0.2d, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtl2	v0.4s, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtms	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtms	s22, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtms	h22, h13
+# CHECK-NEXT:  1      3     0.25                        fcvtms	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtms	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtms	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtms	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtms	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtmu	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtmu	s12, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtmu	h12, h13
+# CHECK-NEXT:  1      3     0.25                        fcvtmu	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtmu	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtmu	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtmu	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtmu	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtn	v0.2s, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtn	v0.4h, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtn2	v0.4s, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtn2	v0.8h, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtns	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtns	s22, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtns	h22, h13
+# CHECK-NEXT:  1      3     0.25                        fcvtns	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtns	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtns	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtns	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtns	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtnu	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtnu	s12, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtnu	h12, h13
+# CHECK-NEXT:  1      3     0.25                        fcvtnu	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtnu	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtnu	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtnu	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtnu	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtps	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtps	s22, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtps	h22, h13
+# CHECK-NEXT:  1      3     0.25                        fcvtps	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtps	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtps	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtps	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtps	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtpu	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtpu	s12, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtpu	h12, h13
+# CHECK-NEXT:  1      3     0.25                        fcvtpu	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtpu	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtpu	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtpu	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtpu	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtxn	s22, d13
+# CHECK-NEXT:  1      3     0.25                        fcvtxn	v0.2s, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtxn2	v0.4s, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	d21, d12, #1
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	s12, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	s21, s12, #1
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	h21, h14
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	h21, h12, #1
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v0.2d, v0.2d, #3
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v0.2s, v0.2s, #3
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v20.4h, v24.4h, #11
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v0.4s, v0.4s, #3
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtzs	v18.8h, v10.8h, #7
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	d21, d12, #1
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	d21, d14
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	s12, s13
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	s21, s12, #1
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	h12, h13
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	h21, h12, #1
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v0.2d, v0.2d, #3
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v0.2s, v0.2s, #3
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v19.4h, v26.4h, #9
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v0.4s, v0.4s, #3
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        fcvtzu	v27.8h, v6.8h, #11
+# CHECK-NEXT:  1      13    2.00                        fdiv	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      9     2.00                        fdiv	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      8     4.00                        fdiv	v0.4h, v0.4h, v0.4h
+# CHECK-NEXT:  1      11    4.00                        fdiv	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      12    8.00                        fdiv	v0.8h, v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        fmax	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        fmax	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        fmax	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        fmaxnm	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        fmaxnm	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        fmaxnm	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.17                        fmaxnmp	h25, v19.2h
+# CHECK-NEXT:  1      3     0.17                        fmaxnmp	d17, v29.2d
+# CHECK-NEXT:  1      3     0.17                        fmaxnmp	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.17                        fmaxnmp	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.17                        fmaxnmp	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      4     0.17                        fmaxnmv	h0, v13.4h
+# CHECK-NEXT:  1      6     0.17                        fmaxnmv	h12, v11.8h
+# CHECK-NEXT:  1      4     0.17                        fmaxnmv	s28, v31.4s
+# CHECK-NEXT:  1      3     0.17                        fmaxp	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.17                        fmaxp	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.17                        fmaxp	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.17                        fmaxp	h15, v25.2h
+# CHECK-NEXT:  1      3     0.17                        fmaxp	s6, v2.2s
+# CHECK-NEXT:  1      4     0.17                        fmaxv	h0, v0.4h
+# CHECK-NEXT:  1      6     0.17                        fmaxv	h0, v0.8h
+# CHECK-NEXT:  1      4     0.17                        fmaxv	s0, v0.4s
+# CHECK-NEXT:  1      2     0.17                        fmin	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        fmin	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        fmin	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        fminnm	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        fminnm	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        fminnm	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.17                        fminnmp	h20, v14.2h
+# CHECK-NEXT:  1      3     0.17                        fminnmp	d15, v8.2d
+# CHECK-NEXT:  1      3     0.17                        fminnmp	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.17                        fminnmp	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.17                        fminnmp	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      4     0.17                        fminnmv	h19, v25.4h
+# CHECK-NEXT:  1      6     0.17                        fminnmv	h23, v17.8h
+# CHECK-NEXT:  1      4     0.17                        fminnmv	s29, v17.4s
+# CHECK-NEXT:  1      3     0.17                        fminp	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.17                        fminp	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.17                        fminp	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.17                        fminp	h7, v10.2h
+# CHECK-NEXT:  1      3     0.17                        fminp	s17, v7.2s
+# CHECK-NEXT:  1      4     0.17                        fminv	h3, v30.4h
+# CHECK-NEXT:  1      6     0.17                        fminv	h29, v12.8h
+# CHECK-NEXT:  1      4     0.17                        fminv	s16, v19.4s
+# CHECK-NEXT:  1      4     0.17                        fmla	d0, d1, v0.d[1]
+# CHECK-NEXT:  1      4     0.17                        fmla	h23, h24, v15.h[4]
+# CHECK-NEXT:  1      4     0.17                        fmla	s0, s1, v0.s[3]
+# CHECK-NEXT:  1      4     0.17                        fmla	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      4     0.17                        fmla	v29.8h, v15.8h, v10.h[4]
+# CHECK-NEXT:  1      4     0.17                        fmla	v2.2s, v16.2s, v28.s[0]
+# CHECK-NEXT:  1      4     0.17                        fmla	v14.4s, v14.4s, v5.s[3]
+# CHECK-NEXT:  1      4     0.17                        fmla	v1.4s, v24.4s, v12.4s
+# CHECK-NEXT:  1      4     0.17                        fmla	v10.2d, v14.2d, v21.d[1]
+# CHECK-NEXT:  1      4     0.17                        fmls	d0, d4, v0.d[1]
+# CHECK-NEXT:  1      4     0.17                        fmls	h8, h14, v7.h[4]
+# CHECK-NEXT:  1      4     0.17                        fmls	s3, s5, v0.s[3]
+# CHECK-NEXT:  1      4     0.17                        fmls	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      4     0.17                        fmls	v30.8h, v18.8h, v4.h[6]
+# CHECK-NEXT:  1      4     0.17                        fmls	v10.2s, v27.2s, v0.s[0]
+# CHECK-NEXT:  1      4     0.17                        fmls	v27.4s, v7.4s, v24.s[0]
+# CHECK-NEXT:  1      4     0.17                        fmls	v10.2d, v22.2d, v29.d[0]
+# CHECK-NEXT:  1      4     0.17                        fmls	v6.8h, v15.8h, v23.8h
+# CHECK-NEXT:  1      2     0.17                        fmov	v0.2d, #-1.25000000
+# CHECK-NEXT:  1      2     0.17                        fmov	v0.2s, #13.00000000
+# CHECK-NEXT:  1      2     0.17                        fmov	v0.4s, #1.00000000
+# CHECK-NEXT:  1      3     0.17                        fmul	h18, h4, v7.h[3]
+# CHECK-NEXT:  1      3     0.17                        fmul	v10.4h, v2.4h, v7.h[5]
+# CHECK-NEXT:  1      3     0.17                        fmul	v5.2s, v12.2s, v9.s[0]
+# CHECK-NEXT:  1      3     0.17                        fmul	v15.4s, v30.4s, v2.s[3]
+# CHECK-NEXT:  1      3     0.17                        fmul	v11.2d, v31.2d, v24.d[1]
+# CHECK-NEXT:  1      3     0.17                        fmul	h28, h14, h3
+# CHECK-NEXT:  1      3     0.17                        fmul	d0, d1, v0.d[1]
+# CHECK-NEXT:  1      3     0.17                        fmul	s0, s1, v0.s[3]
+# CHECK-NEXT:  1      3     0.17                        fmul	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.17                        fmulx	d0, d4, v0.d[1]
+# CHECK-NEXT:  1      2     0.17                        fmulx	d23, d11, d1
+# CHECK-NEXT:  1      2     0.17                        fmulx	s20, s22, s15
+# CHECK-NEXT:  1      3     0.17                        fmulx	h18, h17, v7.h[1]
+# CHECK-NEXT:  1      2     0.17                        fmulx	h20, h25, h0
+# CHECK-NEXT:  1      3     0.17                        fmulx	s3, s5, v0.s[3]
+# CHECK-NEXT:  1      3     0.17                        fmulx	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.17                        fmulx	v28.4h, v25.4h, v15.h[1]
+# CHECK-NEXT:  1      3     0.17                        fmulx	v3.2s, v22.2s, v23.s[3]
+# CHECK-NEXT:  1      3     0.17                        fmulx	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.17                        fmulx	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.17                        fmulx	v5.4s, v28.4s, v15.s[3]
+# CHECK-NEXT:  1      3     0.17                        fmulx	v22.2d, v18.2d, v25.d[1]
+# CHECK-NEXT:  1      2     0.17                        fneg	v0.2d, v0.2d
+# CHECK-NEXT:  1      2     0.17                        fneg	v0.2s, v0.2s
+# CHECK-NEXT:  1      2     0.17                        fneg	v0.4h, v0.4h
+# CHECK-NEXT:  1      2     0.17                        fneg	v0.4s, v0.4s
+# CHECK-NEXT:  1      2     0.17                        fneg	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        frecpe	h20, h8
+# CHECK-NEXT:  1      3     0.25                        frecpe	d13, d13
+# CHECK-NEXT:  1      3     0.25                        frecpe	s19, s14
+# CHECK-NEXT:  1      2     0.17                        frecpe	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        frecpe	v0.2s, v0.2s
+# CHECK-NEXT:  1      4     0.50                        frecpe	v0.4h, v0.4h
+# CHECK-NEXT:  1      4     0.50                        frecpe	v0.4s, v0.4s
+# CHECK-NEXT:  1      6     1.00                        frecpe	v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        frecps	h29, h19, h8
+# CHECK-NEXT:  1      3     0.25                        frecpx	h18, h11
+# CHECK-NEXT:  1      4     0.17                        frecps	v12.8h, v25.8h, v4.8h
+# CHECK-NEXT:  1      4     0.17                        frecps	v0.4s, v0.4s, v0.4s
+# CHECK-NEXT:  1      4     0.17                        frecps	d22, d30, d21
+# CHECK-NEXT:  1      4     0.17                        frecps	s21, s16, s13
+# CHECK-NEXT:  1      4     0.17                        frecps	v7.2d, v29.2d, v18.2d
+# CHECK-NEXT:  1      3     0.25                        frecpx	d16, d19
+# CHECK-NEXT:  1      3     0.25                        frecpx	s18, s10
+# CHECK-NEXT:  1      3     0.25                        frinta	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        frinta	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        frinta	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        frinta	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        frinta	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        frinti	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        frinti	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        frinti	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        frinti	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        frinti	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        frintm	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        frintm	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        frintm	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        frintm	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        frintm	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        frintn	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        frintn	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        frintn	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        frintn	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        frintn	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        frintp	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        frintp	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        frintp	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        frintp	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        frintp	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        frintx	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        frintx	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        frintx	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        frintx	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        frintx	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        frintz	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        frintz	v0.2s, v0.2s
+# CHECK-NEXT:  1      3     0.25                        frintz	v0.4h, v0.4h
+# CHECK-NEXT:  1      3     0.25                        frintz	v0.4s, v0.4s
+# CHECK-NEXT:  1      3     0.25                        frintz	v0.8h, v0.8h
+# CHECK-NEXT:  1      3     0.25                        frsqrte	h23, h26
+# CHECK-NEXT:  1      3     0.25                        frsqrte	d21, d12
+# CHECK-NEXT:  1      3     0.25                        frsqrte	s22, s13
+# CHECK-NEXT:  1      3     0.25                        frsqrte	v0.2d, v0.2d
+# CHECK-NEXT:  1      3     0.25                        frsqrte	v0.2s, v0.2s
+# CHECK-NEXT:  1      4     0.50                        frsqrte	v0.4h, v0.4h
+# CHECK-NEXT:  1      4     0.50                        frsqrte	v0.4s, v0.4s
+# CHECK-NEXT:  1      4     0.17                        frsqrts	v20.4s, v26.4s, v27.4s
+# CHECK-NEXT:  1      4     0.17                        frsqrts	v8.4h, v9.4h, v30.4h
+# CHECK-NEXT:  1      6     1.00                        frsqrte	v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        frsqrts	h28, h26, h1
+# CHECK-NEXT:  1      4     0.17                        frsqrts	d8, d22, d18
+# CHECK-NEXT:  1      4     0.17                        frsqrts	s21, s5, s12
+# CHECK-NEXT:  1      4     0.17                        frsqrts	v0.2d, v0.2d, v0.2d
+# CHECK-NEXT:  1      13    2.00                        fsqrt	v0.2d, v0.2d
+# CHECK-NEXT:  1      9     2.00                        fsqrt	v0.2s, v0.2s
+# CHECK-NEXT:  1      8     4.00                        fsqrt	v0.4h, v0.4h
+# CHECK-NEXT:  1      11    4.00                        fsqrt	v0.4s, v0.4s
+# CHECK-NEXT:  1      12    8.00                        fsqrt	v0.8h, v0.8h
+# CHECK-NEXT:  1      2     0.17                        fsub	v13.8h, v15.8h, v17.8h
+# CHECK-NEXT:  1      2     0.17                        fsub	v0.2s, v0.2s, v0.2s
+# CHECK-NEXT:  1      6     0.25    *                   ld1	{ v0.16b }, [x0]
+# CHECK-NEXT:  2      6     0.50    *                   ld1	{ v0.16b, v1.16b }, [x14]
+# CHECK-NEXT:  3      6     0.75    *                   ld1	{ v19.16b, v20.16b, v21.16b }, [x10]
+# CHECK-NEXT:  4      7     1.00    *                   ld1	{ v13.16b, v14.16b, v15.16b, v16.16b }, [x9]
+# CHECK-NEXT:  1      6     0.25    *                   ld1	{ v24.8h }, [x27]
+# CHECK-NEXT:  2      6     0.50    *                   ld1	{ v1.8h, v2.8h }, [x27]
+# CHECK-NEXT:  3      6     0.50    *                   ld1	{ v0.8h, v1.8h }, [sp], #32
+# CHECK-NEXT:  3      6     0.75    *                   ld1	{ v21.8h, v22.8h, v23.8h }, [x22]
+# CHECK-NEXT:  4      7     1.00    *                   ld1	{ v0.8h, v1.8h, v2.8h, v3.8h }, [x21]
+# CHECK-NEXT:  1      6     0.25    *                   ld1	{ v3.4s }, [x4]
+# CHECK-NEXT:  2      6     0.50    *                   ld1	{ v11.4s, v12.4s }, [x30]
+# CHECK-NEXT:  3      6     0.75    *                   ld1	{ v0.4s, v1.4s, v2.4s }, [x24]
+# CHECK-NEXT:  4      7     1.00    *                   ld1	{ v15.4s, v16.4s, v17.4s, v18.4s }, [x28]
+# CHECK-NEXT:  4      6     0.75    *                   ld1	{ v0.4s, v1.4s, v2.4s }, [x0], #48
+# CHECK-NEXT:  1      6     0.25    *                   ld1	{ v3.2d }, [x28]
+# CHECK-NEXT:  2      6     0.50    *                   ld1	{ v13.2d, v14.2d }, [x13]
+# CHECK-NEXT:  3      6     0.75    *                   ld1	{ v12.2d, v13.2d, v14.2d }, [x15]
+# CHECK-NEXT:  4      6     0.75    *                   ld1	{ v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT:  4      7     1.00    *                   ld1	{ v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT:  2      6     0.25    *                   ld1	{ v0.1d }, [x15], x2
+# CHECK-NEXT:  2      6     0.50    *                   ld1	{ v27.1d, v28.1d }, [x7]
+# CHECK-NEXT:  3      6     0.75    *                   ld1	{ v14.1d, v15.1d, v16.1d }, [x3]
+# CHECK-NEXT:  4      7     1.00    *                   ld1	{ v22.1d, v23.1d, v24.1d, v25.1d }, [x4]
+# CHECK-NEXT:  2      6     0.50    *                   ld1	{ v0.2s, v1.2s }, [x15]
+# CHECK-NEXT:  3      6     0.75    *                   ld1	{ v16.2s, v17.2s, v18.2s }, [x27]
+# CHECK-NEXT:  4      7     1.00    *                   ld1	{ v21.2s, v22.2s, v23.2s, v24.2s }, [x21]
+# CHECK-NEXT:  2      6     0.50    *                   ld1	{ v25.4h, v26.4h }, [x3]
+# CHECK-NEXT:  4      7     1.00    *                   ld1	{ v20.4h, v21.4h, v22.4h, v23.4h }, [x15]
+# CHECK-NEXT:  3      6     0.75    *                   ld1	{ v0.4h, v1.4h, v2.4h }, [sp]
+# CHECK-NEXT:  2      6     0.50    *                   ld1	{ v24.8b, v25.8b }, [x6]
+# CHECK-NEXT:  3      6     0.75    *                   ld1	{ v7.8b, v8.8b, v9.8b }, [x12]
+# CHECK-NEXT:  4      7     1.00    *                   ld1	{ v4.8b, v5.8b, v6.8b, v7.8b }, [x13]
+# CHECK-NEXT:  3      6     0.50    *                   ld1	{ v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT:  3      6     0.75    *                   ld1	{ v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT:  5      7     1.00    *                   ld1	{ v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT:  2      8     0.25    *                   ld1	{ v0.b }[7], [x0]
+# CHECK-NEXT:  3      8     0.25    *                   ld1	{ v0.h }[3], [x0], #2
+# CHECK-NEXT:  2      8     0.25    *                   ld1	{ v18.h }[3], [x1]
+# CHECK-NEXT:  2      8     0.25    *                   ld1	{ v0.s }[1], [x15]
+# CHECK-NEXT:  3      8     0.25    *                   ld1	{ v0.d }[0], [x15], #8
+# CHECK-NEXT:  2      8     0.25    *                   ld1	{ v11.d }[0], [x13]
+# CHECK-NEXT:  2      6     0.25    *                   ld1	{ v0.8h }, [x15], x2
+# CHECK-NEXT:  2      6     0.50    *                   ld1	{ v0.8h, v1.8h }, [x15]
+# CHECK-NEXT:  2      8     0.25    *                   ld1	{ v0.b }[9], [x0]
+# CHECK-NEXT:  3      8     0.25    *                   ld1	{ v0.b }[9], [x0], #1
+# CHECK-NEXT:  2      6     0.25    *                   ld1r	{ v0.16b }, [x0]
+# CHECK-NEXT:  3      6     0.25    *                   ld1r	{ v0.8h }, [x0], #2
+# CHECK-NEXT:  2      6     0.25    *                   ld1r	{ v0.4s }, [x15]
+# CHECK-NEXT:  2      6     0.25    *                   ld1r	{ v3.1d }, [x15]
+# CHECK-NEXT:  3      6     0.25    *                   ld1r	{ v0.2d }, [x15], x16
+# CHECK-NEXT:  2      6     0.25    *                   ld1r	{ v18.2d }, [x0]
+# CHECK-NEXT:  2      6     0.25    *                   ld1r	{ v8.8b }, [x23]
+# CHECK-NEXT:  2      6     0.25    *                   ld1r	{ v28.4h }, [x9]
+# CHECK-NEXT:  2      6     0.25    *                   ld1r	{ v3.8h }, [x16]
+# CHECK-NEXT:  2      6     0.25    *                   ld1r	{ v10.2s }, [x20]
+# CHECK-NEXT:  3      8     0.33    *                   ld2	{ v0.4h, v1.4h }, [x21]
+# CHECK-NEXT:  4      8     0.50    *                   ld2	{ v8.8h, v9.8h }, [x28]
+# CHECK-NEXT:  3      8     0.33    *                   ld2	{ v2.2s, v3.2s }, [x16]
+# CHECK-NEXT:  4      8     0.50    *                   ld2	{ v22.4s, v23.4s }, [x4]
+# CHECK-NEXT:  4      8     0.50    *                   ld2	{ v22.2d, v23.2d }, [x17]
+# CHECK-NEXT:  3      8     0.33    *                   ld2	{ v29.b, v30.b }[3], [x1]
+# CHECK-NEXT:  3      8     0.33    *                   ld2	{ v26.s, v27.s }[1], [x17]
+# CHECK-NEXT:  3      8     0.33    *                   ld2	{ v1.d, v2.d }[0], [x10]
+# CHECK-NEXT:  4      8     0.50    *                   ld2	{ v0.16b, v1.16b }, [x0]
+# CHECK-NEXT:  3      8     0.33    *                   ld2	{ v13.8b, v14.8b }, [x4]
+# CHECK-NEXT:  4      8     0.33    *                   ld2	{ v0.8b, v1.8b }, [x0], #16
+# CHECK-NEXT:  3      6     0.25    *                   ld1r	{ v0.16b }, [x0], #1
+# CHECK-NEXT:  2      6     0.25    *                   ld1r	{ v0.8h }, [x15]
+# CHECK-NEXT:  3      6     0.25    *                   ld1r	{ v0.8h }, [x15], #2
+# CHECK-NEXT:  5      8     0.50    *                   ld2	{ v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT:  3      8     0.33    *                   ld2	{ v0.8b, v1.8b }, [x0]
+# CHECK-NEXT:  3      8     0.33    *                   ld2	{ v0.h, v1.h }[7], [x15]
+# CHECK-NEXT:  4      8     0.33    *                   ld2	{ v0.h, v1.h }[7], [x15], x8
+# CHECK-NEXT:  4      8     0.33    *                   ld2	{ v0.h, v1.h }[7], [x15], #4
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v0.8b, v1.8b }, [x0]
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v10.16b, v11.16b }, [x23]
+# CHECK-NEXT:  4      8     0.33    *                   ld2r	{ v0.4h, v1.4h }, [x0], #4
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v25.4h, v26.4h }, [x11]
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v23.8h, v24.8h }, [x10]
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v0.2s, v1.2s }, [sp]
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v8.4s, v9.4s }, [x17]
+# CHECK-NEXT:  4      8     0.33    *                   ld2r	{ v0.1d, v1.1d }, [sp], x8
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v9.1d, v10.1d }, [x25]
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v26.2d, v27.2d }, [x8]
+# CHECK-NEXT:  5      8     0.50    *                   ld3	{ v8.8b, v9.8b, v10.8b }, [x0]
+# CHECK-NEXT:  6      9     0.75    *                   ld3	{ v15.16b, v16.16b, v17.16b }, [x5]
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v0.2d, v1.2d }, [x0]
+# CHECK-NEXT:  4      8     0.33    *                   ld2r	{ v0.2d, v1.2d }, [x0], #16
+# CHECK-NEXT:  3      8     0.33    *                   ld2r	{ v0.4s, v1.4s }, [sp]
+# CHECK-NEXT:  4      8     0.33    *                   ld2r	{ v0.4s, v1.4s }, [sp], #8
+# CHECK-NEXT:  5      8     0.50    *                   ld3	{ v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT:  7      9     0.75    *                   ld3	{ v0.8h, v1.8h, v2.8h }, [x15], #48
+# CHECK-NEXT:  6      9     0.75    *                   ld3	{ v7.8h, v8.8h, v9.8h }, [x21]
+# CHECK-NEXT:  5      8     0.50    *                   ld3	{ v16.2s, v17.2s, v18.2s }, [x0]
+# CHECK-NEXT:  6      9     0.75    *                   ld3	{ v12.4s, v13.4s, v14.4s }, [x25]
+# CHECK-NEXT:  5      8     0.50    *                   ld3	{ v17.b, v18.b, v19.b }[2], [x27]
+# CHECK-NEXT:  5      8     0.50    *                   ld3	{ v18.h, v19.h, v20.h }[5], [x16]
+# CHECK-NEXT:  6      9     0.75    *                   ld3	{ v10.2d, v11.2d, v12.2d }, [x18]
+# CHECK-NEXT:  7      9     0.75    *                   ld3	{ v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT:  5      8     0.50    *                   ld3	{ v0.s, v1.s, v2.s }[3], [sp]
+# CHECK-NEXT:  6      8     0.50    *                   ld3	{ v0.s, v1.s, v2.s }[3], [sp], x3
+# CHECK-NEXT:  5      8     0.50    *                   ld3	{ v5.d, v6.d, v7.d }[1], [x14]
+# CHECK-NEXT:  5      8     0.50    *                   ld3r	{ v0.8b, v1.8b, v2.8b }, [x15]
+# CHECK-NEXT:  6      8     0.75    *                   ld3r	{ v17.16b, v18.16b, v19.16b }, [x3]
+# CHECK-NEXT:  5      8     0.50    *                   ld3r	{ v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT:  6      8     0.50    *                   ld3r	{ v0.4h, v1.4h, v2.4h }, [x15], #6
+# CHECK-NEXT:  5      8     0.50    *                   ld3r	{ v3.4h, v4.4h, v5.4h }, [x1]
+# CHECK-NEXT:  6      8     0.75    *                   ld3r	{ v6.8h, v7.8h, v8.8h }, [x28]
+# CHECK-NEXT:  5      8     0.50    *                   ld3r	{ v0.2s, v1.2s, v2.2s }, [x0]
+# CHECK-NEXT:  6      8     0.75    *                   ld3r	{ v28.4s, v29.4s, v30.4s }, [x2]
+# CHECK-NEXT:  6      8     0.50    *                   ld3r	{ v0.1d, v1.1d, v2.1d }, [x0], x0
+# CHECK-NEXT:  5      8     0.50    *                   ld3r	{ v1.1d, v2.1d, v3.1d }, [x28]
+# CHECK-NEXT:  6      8     0.75    *                   ld3r	{ v8.2d, v9.2d, v10.2d }, [x3]
+# CHECK-NEXT:  7      8     0.75    *                   ld4	{ v6.8b, v7.8b, v8.8b, v9.8b }, [x27]
+# CHECK-NEXT:  10     9     1.50    *                   ld4	{ v11.16b, v12.16b, v13.16b, v14.16b }, [x5]
+# CHECK-NEXT:  7      8     0.75    *                   ld4	{ v21.4h, v22.4h, v23.4h, v24.4h }, [x14]
+# CHECK-NEXT:  10     9     1.50    *                   ld4	{ v9.8h, v10.8h, v11.8h, v12.8h }, [x1]
+# CHECK-NEXT:  10     9     1.50    *                   ld4	{ v17.4s, v18.4s, v19.4s, v20.4s }, [x4]
+# CHECK-NEXT:  5      8     0.50    *                   ld3r	{ v0.8b, v1.8b, v2.8b }, [x0]
+# CHECK-NEXT:  6      8     0.50    *                   ld3r	{ v0.8b, v1.8b, v2.8b }, [x0], #3
+# CHECK-NEXT:  7      8     0.75    *                   ld4	{ v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT:  11     9     1.50    *                   ld4	{ v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT:  7      8     0.75    *                   ld4	{ v0.d, v1.d, v2.d, v3.d }[1], [x0]
+# CHECK-NEXT:  10     9     1.50    *                   ld4	{ v2.2d, v3.2d, v4.2d, v5.2d }, [x24]
+# CHECK-NEXT:  7      8     0.75    *                   ld4	{ v4.b, v5.b, v6.b, v7.b }[12], [x27]
+# CHECK-NEXT:  7      8     0.75    *                   ld4	{ v5.h, v6.h, v7.h, v8.h }[0], [x4]
+# CHECK-NEXT:  8      8     0.75    *                   ld4	{ v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+# CHECK-NEXT:  8      8     0.75    *                   ld4	{ v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+# CHECK-NEXT:  7      8     0.75    *                   ld4	{ v0.s, v1.s, v2.s, v3.s }[0], [x26]
+# CHECK-NEXT:  7      8     0.75    *                   ld4r	{ v20.8b, v21.8b, v22.8b, v23.8b }, [x23]
+# CHECK-NEXT:  8      8     1.00    *                   ld4r	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x25]
+# CHECK-NEXT:  7      8     0.75    *                   ld4r	{ v16.4h, v17.4h, v18.4h, v19.4h }, [x6]
+# CHECK-NEXT:  7      8     0.75    *                   ld4r	{ v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+# CHECK-NEXT:  8      8     1.00    *                   ld4r	{ v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# CHECK-NEXT:  8      8     1.00    *                   ld4r	{ v4.8h, v5.8h, v6.8h, v7.8h }, [x23]
+# CHECK-NEXT:  7      8     0.75    *                   ld4r	{ v0.2s, v1.2s, v2.2s, v3.2s }, [x30]
+# CHECK-NEXT:  8      8     0.75    *                   ld4r	{ v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
+# CHECK-NEXT:  8      8     1.00    *                   ld4r	{ v7.4s, v8.4s, v9.4s, v10.4s }, [x23]
+# CHECK-NEXT:  9      8     1.00    *                   ld4r	{ v0.4s, v1.4s, v2.4s, v3.4s }, [sp], x8
+# CHECK-NEXT:  8      8     0.75    *                   ld4r	{ v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7
+# CHECK-NEXT:  7      8     0.75    *                   ld4r	{ v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT:  8      8     0.75    *                   ld4r	{ v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30
+# CHECK-NEXT:  1      4     0.25                        mla	v0.8b, v0.8b, v0.8b
+# CHECK-NEXT:  1      4     0.25                        mla	v15.8h, v22.8h, v4.h[3]
+# CHECK-NEXT:  1      4     0.25                        mla	v28.2s, v10.2s, v2.s[0]
+# CHECK-NEXT:  1      4     0.25                        mls	v0.4h, v0.4h, v0.4h
+# CHECK-NEXT:  1      4     0.25                        mls	v25.8h, v29.8h, v0.h[4]
+# CHECK-NEXT:  1      4     0.25                        mls	v22.2s, v29.2s, v0.s[3]
+# CHECK-NEXT:  1      4     0.25                        mls	v26.4s, v5.4s, v28.4s
+# CHECK-NEXT:  1      2     0.17                        mov	b0, v0.b[15]
+# CHECK-NEXT:  1      2     0.17                        mov	d6, v0.d[1]
+# CHECK-NEXT:  1      2     0.17                        mov	h2, v0.h[5]
+# CHECK-NEXT:  1      2     0.17                        mov	s17, v0.s[2]
+# CHECK-NEXT:  1      2     0.17                        mov	w8, v8.s[0]
+# CHECK-NEXT:  1      2     0.17                        mov	x30, v18.d[0]
+# CHECK-NEXT:  1      2     0.17                        mov	v2.b[0], v0.b[0]
+# CHECK-NEXT:  1      2     0.17                        mov	v2.h[1], v0.h[1]
+# CHECK-NEXT:  1      2     0.17                        mov	v2.s[2], v0.s[2]
+# CHECK-NEXT:  1      2     0.17                        mov	v2.d[1], v0.d[1]
+# CHECK-NEXT:  2      5     1.00                        mov	v0.b[0], w8
----------------
walkerkd wrote:

SWOG Table 2-15 "ASIMD logical", should have latency 2 (and I think the throughput value is also incorrect)

https://github.com/llvm/llvm-project/pull/182251


More information about the llvm-commits mailing list