[llvm] 35f3298 - [AArch64] Add extra stp suppression tests.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 16 07:00:34 PST 2024


Author: Florian Hahn
Date: 2024-02-16T14:58:54Z
New Revision: 35f3298ead71bbef73ae0321055470ab761a3eb2

URL: https://github.com/llvm/llvm-project/commit/35f3298ead71bbef73ae0321055470ab761a3eb2
DIFF: https://github.com/llvm/llvm-project/commit/35f3298ead71bbef73ae0321055470ab761a3eb2.diff

LOG: [AArch64] Add extra stp suppression tests.

Add test case for store suppression that still trigger after
https://github.com/llvm/llvm-project/pull/81749

Added: 
    llvm/test/CodeGen/AArch64/storepairsuppress.ll

Modified: 
    llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/storepairsuppress.ll b/llvm/test/CodeGen/AArch64/storepairsuppress.ll
new file mode 100644
index 00000000000000..9892c09581ea2b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/storepairsuppress.ll
@@ -0,0 +1,676 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mcpu=apple-m1 -mattr=+store-pair-suppress -o - %s | FileCheck --check-prefix=SUPPRESS %s
+; RUN: llc -mtriple=aarch64 -mcpu=apple-m1 -mattr=-store-pair-suppress -o - %s | FileCheck --check-prefix=NOSUPPRESS %s
+
+define void @vector_units_critial(ptr %dst, <8 x i32> %v0) {
+; SUPPRESS-LABEL: vector_units_critial:
+; SUPPRESS:       // %bb.0: // %entry
+; SUPPRESS-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; SUPPRESS-NEXT:    xtn v0.8b, v0.8h
+; SUPPRESS-NEXT:    mul v1.8b, v0.8b, v0.8b
+; SUPPRESS-NEXT:    str d1, [x0, #8]
+; SUPPRESS-NEXT:    str d0, [x0]
+; SUPPRESS-NEXT:    ret
+;
+; NOSUPPRESS-LABEL: vector_units_critial:
+; NOSUPPRESS:       // %bb.0: // %entry
+; NOSUPPRESS-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; NOSUPPRESS-NEXT:    xtn v0.8b, v0.8h
+; NOSUPPRESS-NEXT:    mul v1.8b, v0.8b, v0.8b
+; NOSUPPRESS-NEXT:    stp d0, d1, [x0]
+; NOSUPPRESS-NEXT:    ret
+entry:
+  %add.ptr.1 = getelementptr i8, ptr %dst, i64 8
+  %t = trunc <8 x i32> %v0 to <8 x i8>
+  %mul = mul <8 x i8> %t, %t
+  store <8 x i8> %mul, ptr %add.ptr.1, align 1
+  store <8 x i8> %t, ptr %dst, align 1
+  ret void
+}
+
+%struct.widget = type { float, float }
+
+define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef %arg2, i64 noundef %arg3) {
+; SUPPRESS-LABEL: load_store_units_critical:
+; SUPPRESS:       // %bb.0: // %bb
+; SUPPRESS-NEXT:    ldr x8, [x0, #8]
+; SUPPRESS-NEXT:    ldp s0, s1, [x8]
+; SUPPRESS-NEXT:    add x8, x1, x2, lsl #3
+; SUPPRESS-NEXT:    ldp s2, s3, [x8]
+; SUPPRESS-NEXT:    ldp s4, s5, [x8, #8]
+; SUPPRESS-NEXT:    fmul s6, s5, s1
+; SUPPRESS-NEXT:    fmul s1, s4, s1
+; SUPPRESS-NEXT:    fnmsub s4, s4, s0, s6
+; SUPPRESS-NEXT:    fmadd s0, s5, s0, s1
+; SUPPRESS-NEXT:    fadd s1, s4, s2
+; SUPPRESS-NEXT:    fadd s5, s0, s3
+; SUPPRESS-NEXT:    str s1, [x8]
+; SUPPRESS-NEXT:    str s5, [x8, #4]
+; SUPPRESS-NEXT:    fsub s2, s2, s4
+; SUPPRESS-NEXT:    fsub s0, s3, s0
+; SUPPRESS-NEXT:    str s2, [x8, #8]
+; SUPPRESS-NEXT:    str s0, [x8, #12]
+; SUPPRESS-NEXT:    ldr x9, [x0, #8]
+; SUPPRESS-NEXT:    ldp s3, s4, [x9]
+; SUPPRESS-NEXT:    ldp s6, s7, [x8, #16]
+; SUPPRESS-NEXT:    ldp s16, s17, [x8, #24]
+; SUPPRESS-NEXT:    fmul s18, s17, s4
+; SUPPRESS-NEXT:    fmul s4, s16, s4
+; SUPPRESS-NEXT:    fnmsub s16, s16, s3, s18
+; SUPPRESS-NEXT:    fmadd s3, s17, s3, s4
+; SUPPRESS-NEXT:    fadd s4, s16, s6
+; SUPPRESS-NEXT:    fadd s17, s3, s7
+; SUPPRESS-NEXT:    str s4, [x8, #16]
+; SUPPRESS-NEXT:    str s17, [x8, #20]
+; SUPPRESS-NEXT:    fsub s6, s6, s16
+; SUPPRESS-NEXT:    fsub s3, s7, s3
+; SUPPRESS-NEXT:    str s6, [x8, #24]
+; SUPPRESS-NEXT:    str s3, [x8, #28]
+; SUPPRESS-NEXT:    ldr x9, [x0, #8]
+; SUPPRESS-NEXT:    ldp s7, s16, [x9]
+; SUPPRESS-NEXT:    fmul s18, s16, s17
+; SUPPRESS-NEXT:    fmul s17, s7, s17
+; SUPPRESS-NEXT:    fnmsub s7, s7, s4, s18
+; SUPPRESS-NEXT:    fmadd s4, s16, s4, s17
+; SUPPRESS-NEXT:    fadd s16, s7, s1
+; SUPPRESS-NEXT:    fadd s17, s4, s5
+; SUPPRESS-NEXT:    str s16, [x8]
+; SUPPRESS-NEXT:    str s17, [x8, #4]
+; SUPPRESS-NEXT:    fsub s1, s1, s7
+; SUPPRESS-NEXT:    fsub s4, s5, s4
+; SUPPRESS-NEXT:    str s1, [x8, #16]
+; SUPPRESS-NEXT:    str s4, [x8, #20]
+; SUPPRESS-NEXT:    ldr x10, [x0, #8]
+; SUPPRESS-NEXT:    lsl x9, x3, #4
+; SUPPRESS-NEXT:    add x10, x10, x9
+; SUPPRESS-NEXT:    ldp s1, s4, [x10]
+; SUPPRESS-NEXT:    fmul s5, s4, s3
+; SUPPRESS-NEXT:    fmul s3, s1, s3
+; SUPPRESS-NEXT:    fnmsub s1, s1, s6, s5
+; SUPPRESS-NEXT:    fmadd s3, s4, s6, s3
+; SUPPRESS-NEXT:    fadd s4, s1, s2
+; SUPPRESS-NEXT:    fadd s5, s3, s0
+; SUPPRESS-NEXT:    str s4, [x8, #8]
+; SUPPRESS-NEXT:    str s5, [x8, #12]
+; SUPPRESS-NEXT:    fsub s1, s2, s1
+; SUPPRESS-NEXT:    fsub s0, s0, s3
+; SUPPRESS-NEXT:    str s1, [x8, #24]
+; SUPPRESS-NEXT:    str s0, [x8, #28]
+; SUPPRESS-NEXT:    ldr x10, [x0, #8]
+; SUPPRESS-NEXT:    ldp s0, s1, [x10]
+; SUPPRESS-NEXT:    ldp s2, s3, [x8, #32]
+; SUPPRESS-NEXT:    ldp s4, s5, [x8, #40]
+; SUPPRESS-NEXT:    fmul s6, s5, s1
+; SUPPRESS-NEXT:    fmul s1, s4, s1
+; SUPPRESS-NEXT:    fnmsub s4, s4, s0, s6
+; SUPPRESS-NEXT:    fmadd s0, s5, s0, s1
+; SUPPRESS-NEXT:    fadd s1, s4, s2
+; SUPPRESS-NEXT:    fadd s5, s0, s3
+; SUPPRESS-NEXT:    str s1, [x8, #32]
+; SUPPRESS-NEXT:    str s5, [x8, #36]
+; SUPPRESS-NEXT:    fsub s2, s2, s4
+; SUPPRESS-NEXT:    fsub s3, s3, s0
+; SUPPRESS-NEXT:    str s2, [x8, #40]
+; SUPPRESS-NEXT:    str s3, [x8, #44]
+; SUPPRESS-NEXT:    ldr x10, [x0, #8]
+; SUPPRESS-NEXT:    ldp s0, s4, [x10]
+; SUPPRESS-NEXT:    ldp s6, s7, [x8, #48]
+; SUPPRESS-NEXT:    ldp s16, s17, [x8, #56]
+; SUPPRESS-NEXT:    fmul s18, s17, s4
+; SUPPRESS-NEXT:    fmul s4, s16, s4
+; SUPPRESS-NEXT:    fnmsub s16, s16, s0, s18
+; SUPPRESS-NEXT:    fmadd s0, s17, s0, s4
+; SUPPRESS-NEXT:    fadd s4, s16, s6
+; SUPPRESS-NEXT:    fadd s17, s0, s7
+; SUPPRESS-NEXT:    str s4, [x8, #48]
+; SUPPRESS-NEXT:    str s17, [x8, #52]
+; SUPPRESS-NEXT:    fsub s6, s6, s16
+; SUPPRESS-NEXT:    fsub s0, s7, s0
+; SUPPRESS-NEXT:    str s6, [x8, #56]
+; SUPPRESS-NEXT:    str s0, [x8, #60]
+; SUPPRESS-NEXT:    ldr x10, [x0, #8]
+; SUPPRESS-NEXT:    ldp s7, s16, [x10]
+; SUPPRESS-NEXT:    fmul s18, s16, s17
+; SUPPRESS-NEXT:    fmul s17, s7, s17
+; SUPPRESS-NEXT:    fnmsub s7, s7, s4, s18
+; SUPPRESS-NEXT:    fmadd s4, s16, s4, s17
+; SUPPRESS-NEXT:    fadd s16, s7, s1
+; SUPPRESS-NEXT:    fadd s17, s4, s5
+; SUPPRESS-NEXT:    str s16, [x8, #32]
+; SUPPRESS-NEXT:    str s17, [x8, #36]
+; SUPPRESS-NEXT:    fsub s7, s1, s7
+; SUPPRESS-NEXT:    fsub s4, s5, s4
+; SUPPRESS-NEXT:    str s7, [x8, #48]
+; SUPPRESS-NEXT:    str s4, [x8, #52]
+; SUPPRESS-NEXT:    ldr x10, [x0, #8]
+; SUPPRESS-NEXT:    add x9, x10, x9
+; SUPPRESS-NEXT:    ldp s1, s5, [x9]
+; SUPPRESS-NEXT:    fmul s18, s5, s0
+; SUPPRESS-NEXT:    fmul s0, s1, s0
+; SUPPRESS-NEXT:    fnmsub s1, s1, s6, s18
+; SUPPRESS-NEXT:    fmadd s5, s5, s6, s0
+; SUPPRESS-NEXT:    fadd s6, s1, s2
+; SUPPRESS-NEXT:    fadd s18, s5, s3
+; SUPPRESS-NEXT:    str s6, [x8, #40]
+; SUPPRESS-NEXT:    str s18, [x8, #44]
+; SUPPRESS-NEXT:    fsub s0, s2, s1
+; SUPPRESS-NEXT:    fsub s1, s3, s5
+; SUPPRESS-NEXT:    str s0, [x8, #56]
+; SUPPRESS-NEXT:    str s1, [x8, #60]
+; SUPPRESS-NEXT:    ldr x9, [x0, #8]
+; SUPPRESS-NEXT:    ldp s2, s3, [x9]
+; SUPPRESS-NEXT:    ldp s5, s19, [x8]
+; SUPPRESS-NEXT:    fmul s20, s17, s3
+; SUPPRESS-NEXT:    fmul s3, s16, s3
+; SUPPRESS-NEXT:    fnmsub s16, s16, s2, s20
+; SUPPRESS-NEXT:    fmadd s2, s17, s2, s3
+; SUPPRESS-NEXT:    fadd s3, s16, s5
+; SUPPRESS-NEXT:    fadd s17, s2, s19
+; SUPPRESS-NEXT:    str s3, [x8]
+; SUPPRESS-NEXT:    str s17, [x8, #4]
+; SUPPRESS-NEXT:    fsub s3, s5, s16
+; SUPPRESS-NEXT:    fsub s2, s19, s2
+; SUPPRESS-NEXT:    str s3, [x8, #32]
+; SUPPRESS-NEXT:    str s2, [x8, #36]
+; SUPPRESS-NEXT:    ldr x9, [x0, #8]
+; SUPPRESS-NEXT:    add x9, x9, w3, sxtw #3
+; SUPPRESS-NEXT:    ldp s2, s3, [x9]
+; SUPPRESS-NEXT:    ldp s5, s16, [x8, #8]
+; SUPPRESS-NEXT:    fmul s17, s18, s3
+; SUPPRESS-NEXT:    fmul s3, s6, s3
+; SUPPRESS-NEXT:    fnmsub s6, s6, s2, s17
+; SUPPRESS-NEXT:    fmadd s2, s18, s2, s3
+; SUPPRESS-NEXT:    fadd s3, s6, s5
+; SUPPRESS-NEXT:    fadd s17, s2, s16
+; SUPPRESS-NEXT:    str s3, [x8, #8]
+; SUPPRESS-NEXT:    str s17, [x8, #12]
+; SUPPRESS-NEXT:    fsub s3, s5, s6
+; SUPPRESS-NEXT:    fsub s2, s16, s2
+; SUPPRESS-NEXT:    str s3, [x8, #40]
+; SUPPRESS-NEXT:    str s2, [x8, #44]
+; SUPPRESS-NEXT:    lsl x9, x3, #33
+; SUPPRESS-NEXT:    ldr x10, [x0, #8]
+; SUPPRESS-NEXT:    add x9, x10, x9, asr #29
+; SUPPRESS-NEXT:    ldp s2, s3, [x9]
+; SUPPRESS-NEXT:    ldp s5, s6, [x8, #16]
+; SUPPRESS-NEXT:    fmul s16, s4, s3
+; SUPPRESS-NEXT:    fmul s3, s7, s3
+; SUPPRESS-NEXT:    fnmsub s7, s7, s2, s16
+; SUPPRESS-NEXT:    fmadd s2, s4, s2, s3
+; SUPPRESS-NEXT:    fadd s3, s7, s5
+; SUPPRESS-NEXT:    fadd s4, s2, s6
+; SUPPRESS-NEXT:    str s3, [x8, #16]
+; SUPPRESS-NEXT:    str s4, [x8, #20]
+; SUPPRESS-NEXT:    fsub s3, s5, s7
+; SUPPRESS-NEXT:    fsub s2, s6, s2
+; SUPPRESS-NEXT:    str s3, [x8, #48]
+; SUPPRESS-NEXT:    str s2, [x8, #52]
+; SUPPRESS-NEXT:    add w9, w3, w3, lsl #1
+; SUPPRESS-NEXT:    ldr x10, [x0, #8]
+; SUPPRESS-NEXT:    add x9, x10, w9, sxtw #3
+; SUPPRESS-NEXT:    ldp s2, s3, [x9]
+; SUPPRESS-NEXT:    ldp s4, s5, [x8, #24]
+; SUPPRESS-NEXT:    fmul s6, s1, s3
+; SUPPRESS-NEXT:    fmul s3, s0, s3
+; SUPPRESS-NEXT:    fnmsub s0, s0, s2, s6
+; SUPPRESS-NEXT:    fmadd s1, s1, s2, s3
+; SUPPRESS-NEXT:    fadd s2, s0, s4
+; SUPPRESS-NEXT:    fadd s3, s1, s5
+; SUPPRESS-NEXT:    str s2, [x8, #24]
+; SUPPRESS-NEXT:    str s3, [x8, #28]
+; SUPPRESS-NEXT:    fsub s0, s4, s0
+; SUPPRESS-NEXT:    fsub s1, s5, s1
+; SUPPRESS-NEXT:    str s0, [x8, #56]
+; SUPPRESS-NEXT:    str s1, [x8, #60]
+; SUPPRESS-NEXT:    ret
+;
+; NOSUPPRESS-LABEL: load_store_units_critical:
+; NOSUPPRESS:       // %bb.0: // %bb
+; NOSUPPRESS-NEXT:    ldr x8, [x0, #8]
+; NOSUPPRESS-NEXT:    ldp s0, s1, [x8]
+; NOSUPPRESS-NEXT:    add x8, x1, x2, lsl #3
+; NOSUPPRESS-NEXT:    ldp s2, s3, [x8]
+; NOSUPPRESS-NEXT:    ldp s4, s5, [x8, #8]
+; NOSUPPRESS-NEXT:    fmul s6, s5, s1
+; NOSUPPRESS-NEXT:    fmul s1, s4, s1
+; NOSUPPRESS-NEXT:    fnmsub s4, s4, s0, s6
+; NOSUPPRESS-NEXT:    fmadd s0, s5, s0, s1
+; NOSUPPRESS-NEXT:    fadd s1, s4, s2
+; NOSUPPRESS-NEXT:    fadd s5, s0, s3
+; NOSUPPRESS-NEXT:    stp s1, s5, [x8]
+; NOSUPPRESS-NEXT:    fsub s2, s2, s4
+; NOSUPPRESS-NEXT:    fsub s0, s3, s0
+; NOSUPPRESS-NEXT:    stp s2, s0, [x8, #8]
+; NOSUPPRESS-NEXT:    ldr x9, [x0, #8]
+; NOSUPPRESS-NEXT:    ldp s3, s4, [x9]
+; NOSUPPRESS-NEXT:    ldp s6, s7, [x8, #16]
+; NOSUPPRESS-NEXT:    ldp s16, s17, [x8, #24]
+; NOSUPPRESS-NEXT:    fmul s18, s17, s4
+; NOSUPPRESS-NEXT:    fmul s4, s16, s4
+; NOSUPPRESS-NEXT:    fnmsub s16, s16, s3, s18
+; NOSUPPRESS-NEXT:    fmadd s3, s17, s3, s4
+; NOSUPPRESS-NEXT:    fadd s4, s16, s6
+; NOSUPPRESS-NEXT:    fadd s17, s3, s7
+; NOSUPPRESS-NEXT:    stp s4, s17, [x8, #16]
+; NOSUPPRESS-NEXT:    fsub s6, s6, s16
+; NOSUPPRESS-NEXT:    fsub s3, s7, s3
+; NOSUPPRESS-NEXT:    stp s6, s3, [x8, #24]
+; NOSUPPRESS-NEXT:    ldr x9, [x0, #8]
+; NOSUPPRESS-NEXT:    ldp s7, s16, [x9]
+; NOSUPPRESS-NEXT:    fmul s18, s16, s17
+; NOSUPPRESS-NEXT:    fmul s17, s7, s17
+; NOSUPPRESS-NEXT:    fnmsub s7, s7, s4, s18
+; NOSUPPRESS-NEXT:    fmadd s4, s16, s4, s17
+; NOSUPPRESS-NEXT:    fadd s16, s7, s1
+; NOSUPPRESS-NEXT:    fadd s17, s4, s5
+; NOSUPPRESS-NEXT:    stp s16, s17, [x8]
+; NOSUPPRESS-NEXT:    fsub s1, s1, s7
+; NOSUPPRESS-NEXT:    fsub s4, s5, s4
+; NOSUPPRESS-NEXT:    stp s1, s4, [x8, #16]
+; NOSUPPRESS-NEXT:    ldr x10, [x0, #8]
+; NOSUPPRESS-NEXT:    lsl x9, x3, #4
+; NOSUPPRESS-NEXT:    add x10, x10, x9
+; NOSUPPRESS-NEXT:    ldp s1, s4, [x10]
+; NOSUPPRESS-NEXT:    fmul s5, s4, s3
+; NOSUPPRESS-NEXT:    fmul s3, s1, s3
+; NOSUPPRESS-NEXT:    fnmsub s1, s1, s6, s5
+; NOSUPPRESS-NEXT:    fmadd s3, s4, s6, s3
+; NOSUPPRESS-NEXT:    fadd s4, s1, s2
+; NOSUPPRESS-NEXT:    fadd s5, s3, s0
+; NOSUPPRESS-NEXT:    stp s4, s5, [x8, #8]
+; NOSUPPRESS-NEXT:    fsub s1, s2, s1
+; NOSUPPRESS-NEXT:    fsub s0, s0, s3
+; NOSUPPRESS-NEXT:    stp s1, s0, [x8, #24]
+; NOSUPPRESS-NEXT:    ldr x10, [x0, #8]
+; NOSUPPRESS-NEXT:    ldp s0, s1, [x10]
+; NOSUPPRESS-NEXT:    ldp s2, s3, [x8, #32]
+; NOSUPPRESS-NEXT:    ldp s4, s5, [x8, #40]
+; NOSUPPRESS-NEXT:    fmul s6, s5, s1
+; NOSUPPRESS-NEXT:    fmul s1, s4, s1
+; NOSUPPRESS-NEXT:    fnmsub s4, s4, s0, s6
+; NOSUPPRESS-NEXT:    fmadd s0, s5, s0, s1
+; NOSUPPRESS-NEXT:    fadd s1, s4, s2
+; NOSUPPRESS-NEXT:    fadd s5, s0, s3
+; NOSUPPRESS-NEXT:    stp s1, s5, [x8, #32]
+; NOSUPPRESS-NEXT:    fsub s2, s2, s4
+; NOSUPPRESS-NEXT:    fsub s3, s3, s0
+; NOSUPPRESS-NEXT:    stp s2, s3, [x8, #40]
+; NOSUPPRESS-NEXT:    ldr x10, [x0, #8]
+; NOSUPPRESS-NEXT:    ldp s0, s4, [x10]
+; NOSUPPRESS-NEXT:    ldp s6, s7, [x8, #48]
+; NOSUPPRESS-NEXT:    ldp s16, s17, [x8, #56]
+; NOSUPPRESS-NEXT:    fmul s18, s17, s4
+; NOSUPPRESS-NEXT:    fmul s4, s16, s4
+; NOSUPPRESS-NEXT:    fnmsub s16, s16, s0, s18
+; NOSUPPRESS-NEXT:    fmadd s0, s17, s0, s4
+; NOSUPPRESS-NEXT:    fadd s4, s16, s6
+; NOSUPPRESS-NEXT:    fadd s17, s0, s7
+; NOSUPPRESS-NEXT:    stp s4, s17, [x8, #48]
+; NOSUPPRESS-NEXT:    fsub s6, s6, s16
+; NOSUPPRESS-NEXT:    fsub s0, s7, s0
+; NOSUPPRESS-NEXT:    stp s6, s0, [x8, #56]
+; NOSUPPRESS-NEXT:    ldr x10, [x0, #8]
+; NOSUPPRESS-NEXT:    ldp s7, s16, [x10]
+; NOSUPPRESS-NEXT:    fmul s18, s16, s17
+; NOSUPPRESS-NEXT:    fmul s17, s7, s17
+; NOSUPPRESS-NEXT:    fnmsub s7, s7, s4, s18
+; NOSUPPRESS-NEXT:    fmadd s4, s16, s4, s17
+; NOSUPPRESS-NEXT:    fadd s16, s7, s1
+; NOSUPPRESS-NEXT:    fadd s17, s4, s5
+; NOSUPPRESS-NEXT:    stp s16, s17, [x8, #32]
+; NOSUPPRESS-NEXT:    fsub s7, s1, s7
+; NOSUPPRESS-NEXT:    fsub s4, s5, s4
+; NOSUPPRESS-NEXT:    stp s7, s4, [x8, #48]
+; NOSUPPRESS-NEXT:    ldr x10, [x0, #8]
+; NOSUPPRESS-NEXT:    add x9, x10, x9
+; NOSUPPRESS-NEXT:    ldp s1, s5, [x9]
+; NOSUPPRESS-NEXT:    fmul s18, s5, s0
+; NOSUPPRESS-NEXT:    fmul s0, s1, s0
+; NOSUPPRESS-NEXT:    fnmsub s1, s1, s6, s18
+; NOSUPPRESS-NEXT:    fmadd s5, s5, s6, s0
+; NOSUPPRESS-NEXT:    fadd s6, s1, s2
+; NOSUPPRESS-NEXT:    fadd s18, s5, s3
+; NOSUPPRESS-NEXT:    stp s6, s18, [x8, #40]
+; NOSUPPRESS-NEXT:    fsub s0, s2, s1
+; NOSUPPRESS-NEXT:    fsub s1, s3, s5
+; NOSUPPRESS-NEXT:    stp s0, s1, [x8, #56]
+; NOSUPPRESS-NEXT:    ldr x9, [x0, #8]
+; NOSUPPRESS-NEXT:    ldp s2, s3, [x9]
+; NOSUPPRESS-NEXT:    ldp s5, s19, [x8]
+; NOSUPPRESS-NEXT:    fmul s20, s17, s3
+; NOSUPPRESS-NEXT:    fmul s3, s16, s3
+; NOSUPPRESS-NEXT:    fnmsub s16, s16, s2, s20
+; NOSUPPRESS-NEXT:    fmadd s2, s17, s2, s3
+; NOSUPPRESS-NEXT:    fadd s3, s16, s5
+; NOSUPPRESS-NEXT:    fadd s17, s2, s19
+; NOSUPPRESS-NEXT:    stp s3, s17, [x8]
+; NOSUPPRESS-NEXT:    fsub s3, s5, s16
+; NOSUPPRESS-NEXT:    fsub s2, s19, s2
+; NOSUPPRESS-NEXT:    stp s3, s2, [x8, #32]
+; NOSUPPRESS-NEXT:    ldr x9, [x0, #8]
+; NOSUPPRESS-NEXT:    add x9, x9, w3, sxtw #3
+; NOSUPPRESS-NEXT:    ldp s2, s3, [x9]
+; NOSUPPRESS-NEXT:    ldp s5, s16, [x8, #8]
+; NOSUPPRESS-NEXT:    fmul s17, s18, s3
+; NOSUPPRESS-NEXT:    fmul s3, s6, s3
+; NOSUPPRESS-NEXT:    fnmsub s6, s6, s2, s17
+; NOSUPPRESS-NEXT:    fmadd s2, s18, s2, s3
+; NOSUPPRESS-NEXT:    fadd s3, s6, s5
+; NOSUPPRESS-NEXT:    fadd s17, s2, s16
+; NOSUPPRESS-NEXT:    stp s3, s17, [x8, #8]
+; NOSUPPRESS-NEXT:    fsub s3, s5, s6
+; NOSUPPRESS-NEXT:    fsub s2, s16, s2
+; NOSUPPRESS-NEXT:    stp s3, s2, [x8, #40]
+; NOSUPPRESS-NEXT:    lsl x9, x3, #33
+; NOSUPPRESS-NEXT:    ldr x10, [x0, #8]
+; NOSUPPRESS-NEXT:    add x9, x10, x9, asr #29
+; NOSUPPRESS-NEXT:    ldp s2, s3, [x9]
+; NOSUPPRESS-NEXT:    ldp s5, s6, [x8, #16]
+; NOSUPPRESS-NEXT:    fmul s16, s4, s3
+; NOSUPPRESS-NEXT:    fmul s3, s7, s3
+; NOSUPPRESS-NEXT:    fnmsub s7, s7, s2, s16
+; NOSUPPRESS-NEXT:    fmadd s2, s4, s2, s3
+; NOSUPPRESS-NEXT:    fadd s3, s7, s5
+; NOSUPPRESS-NEXT:    fadd s4, s2, s6
+; NOSUPPRESS-NEXT:    stp s3, s4, [x8, #16]
+; NOSUPPRESS-NEXT:    fsub s3, s5, s7
+; NOSUPPRESS-NEXT:    fsub s2, s6, s2
+; NOSUPPRESS-NEXT:    stp s3, s2, [x8, #48]
+; NOSUPPRESS-NEXT:    add w9, w3, w3, lsl #1
+; NOSUPPRESS-NEXT:    ldr x10, [x0, #8]
+; NOSUPPRESS-NEXT:    add x9, x10, w9, sxtw #3
+; NOSUPPRESS-NEXT:    ldp s2, s3, [x9]
+; NOSUPPRESS-NEXT:    ldp s4, s5, [x8, #24]
+; NOSUPPRESS-NEXT:    fmul s6, s1, s3
+; NOSUPPRESS-NEXT:    fmul s3, s0, s3
+; NOSUPPRESS-NEXT:    fnmsub s0, s0, s2, s6
+; NOSUPPRESS-NEXT:    fmadd s1, s1, s2, s3
+; NOSUPPRESS-NEXT:    fadd s2, s0, s4
+; NOSUPPRESS-NEXT:    fadd s3, s1, s5
+; NOSUPPRESS-NEXT:    stp s2, s3, [x8, #24]
+; NOSUPPRESS-NEXT:    fsub s0, s4, s0
+; NOSUPPRESS-NEXT:    fsub s1, s5, s1
+; NOSUPPRESS-NEXT:    stp s0, s1, [x8, #56]
+; NOSUPPRESS-NEXT:    ret
+bb:
+  %shl = shl i64 %arg3, 1
+  %getelementptr = getelementptr inbounds i8, ptr %arg, i64 8
+  %load = load ptr, ptr %getelementptr, align 8
+  %load4 = load float, ptr %load, align 4
+  %getelementptr5 = getelementptr inbounds i8, ptr %load, i64 4
+  %load6 = load float, ptr %getelementptr5, align 4
+  %getelementptr7 = getelementptr %struct.widget, ptr %arg1, i64 %arg2
+  %load8 = load float, ptr %getelementptr7, align 4
+  %getelementptr9 = getelementptr inbounds i8, ptr %getelementptr7, i64 4
+  %load10 = load float, ptr %getelementptr9, align 4
+  %getelementptr11 = getelementptr i8, ptr %getelementptr7, i64 8
+  %load12 = load float, ptr %getelementptr11, align 4
+  %getelementptr13 = getelementptr i8, ptr %getelementptr7, i64 12
+  %load14 = load float, ptr %getelementptr13, align 4
+  %fmul = fmul fast float %load12, %load4
+  %fmul15 = fmul fast float %load14, %load6
+  %fmul16 = fmul fast float %load14, %load4
+  %fmul17 = fmul fast float %load12, %load6
+  %fsub = fsub fast float %fmul, %fmul15
+  %fadd = fadd fast float %fmul16, %fmul17
+  %fadd18 = fadd fast float %fsub, %load8
+  %fadd19 = fadd fast float %fadd, %load10
+  store float %fadd18, ptr %getelementptr7, align 4
+  store float %fadd19, ptr %getelementptr9, align 4
+  %fsub20 = fsub fast float %load8, %fsub
+  %fsub21 = fsub fast float %load10, %fadd
+  store float %fsub20, ptr %getelementptr11, align 4
+  store float %fsub21, ptr %getelementptr13, align 4
+  %load22 = load ptr, ptr %getelementptr, align 8
+  %load23 = load float, ptr %load22, align 4
+  %getelementptr24 = getelementptr inbounds i8, ptr %load22, i64 4
+  %load25 = load float, ptr %getelementptr24, align 4
+  %getelementptr26 = getelementptr i8, ptr %getelementptr7, i64 16
+  %load27 = load float, ptr %getelementptr26, align 4
+  %getelementptr28 = getelementptr i8, ptr %getelementptr7, i64 20
+  %load29 = load float, ptr %getelementptr28, align 4
+  %getelementptr30 = getelementptr i8, ptr %getelementptr7, i64 24
+  %load31 = load float, ptr %getelementptr30, align 4
+  %getelementptr32 = getelementptr i8, ptr %getelementptr7, i64 28
+  %load33 = load float, ptr %getelementptr32, align 4
+  %fmul34 = fmul fast float %load31, %load23
+  %fmul35 = fmul fast float %load33, %load25
+  %fmul36 = fmul fast float %load33, %load23
+  %fmul37 = fmul fast float %load31, %load25
+  %fsub38 = fsub fast float %fmul34, %fmul35
+  %fadd39 = fadd fast float %fmul36, %fmul37
+  %fadd40 = fadd fast float %fsub38, %load27
+  %fadd41 = fadd fast float %fadd39, %load29
+  store float %fadd40, ptr %getelementptr26, align 4
+  store float %fadd41, ptr %getelementptr28, align 4
+  %fsub42 = fsub fast float %load27, %fsub38
+  %fsub43 = fsub fast float %load29, %fadd39
+  store float %fsub42, ptr %getelementptr30, align 4
+  store float %fsub43, ptr %getelementptr32, align 4
+  %load44 = load ptr, ptr %getelementptr, align 8
+  %load45 = load float, ptr %load44, align 4
+  %getelementptr46 = getelementptr inbounds i8, ptr %load44, i64 4
+  %load47 = load float, ptr %getelementptr46, align 4
+  %fmul48 = fmul fast float %load45, %fadd40
+  %fmul49 = fmul fast float %load47, %fadd41
+  %fmul50 = fmul fast float %load45, %fadd41
+  %fmul51 = fmul fast float %load47, %fadd40
+  %fsub52 = fsub fast float %fmul48, %fmul49
+  %fadd53 = fadd fast float %fmul51, %fmul50
+  %fadd54 = fadd fast float %fsub52, %fadd18
+  %fadd55 = fadd fast float %fadd53, %fadd19
+  store float %fadd54, ptr %getelementptr7, align 4
+  store float %fadd55, ptr %getelementptr9, align 4
+  %fsub56 = fsub fast float %fadd18, %fsub52
+  %fsub57 = fsub fast float %fadd19, %fadd53
+  store float %fsub56, ptr %getelementptr26, align 4
+  store float %fsub57, ptr %getelementptr28, align 4
+  %load58 = load ptr, ptr %getelementptr, align 8
+  %getelementptr59 = getelementptr inbounds %struct.widget, ptr %load58, i64 %shl
+  %load60 = load float, ptr %getelementptr59, align 4
+  %getelementptr61 = getelementptr inbounds i8, ptr %getelementptr59, i64 4
+  %load62 = load float, ptr %getelementptr61, align 4
+  %fmul63 = fmul fast float %load60, %fsub42
+  %fmul64 = fmul fast float %load62, %fsub43
+  %fmul65 = fmul fast float %load60, %fsub43
+  %fmul66 = fmul fast float %load62, %fsub42
+  %fsub67 = fsub fast float %fmul63, %fmul64
+  %fadd68 = fadd fast float %fmul66, %fmul65
+  %fadd69 = fadd fast float %fsub67, %fsub20
+  %fadd70 = fadd fast float %fadd68, %fsub21
+  store float %fadd69, ptr %getelementptr11, align 4
+  store float %fadd70, ptr %getelementptr13, align 4
+  %fsub71 = fsub fast float %fsub20, %fsub67
+  %fsub72 = fsub fast float %fsub21, %fadd68
+  store float %fsub71, ptr %getelementptr30, align 4
+  store float %fsub72, ptr %getelementptr32, align 4
+  %load73 = load ptr, ptr %getelementptr, align 8
+  %load74 = load float, ptr %load73, align 4
+  %getelementptr75 = getelementptr inbounds i8, ptr %load73, i64 4
+  %load76 = load float, ptr %getelementptr75, align 4
+  %getelementptr77 = getelementptr i8, ptr %getelementptr7, i64 32
+  %load78 = load float, ptr %getelementptr77, align 4
+  %getelementptr79 = getelementptr i8, ptr %getelementptr7, i64 36
+  %load80 = load float, ptr %getelementptr79, align 4
+  %getelementptr81 = getelementptr i8, ptr %getelementptr7, i64 40
+  %load82 = load float, ptr %getelementptr81, align 4
+  %getelementptr83 = getelementptr i8, ptr %getelementptr7, i64 44
+  %load84 = load float, ptr %getelementptr83, align 4
+  %fmul85 = fmul fast float %load82, %load74
+  %fmul86 = fmul fast float %load84, %load76
+  %fmul87 = fmul fast float %load84, %load74
+  %fmul88 = fmul fast float %load82, %load76
+  %fsub89 = fsub fast float %fmul85, %fmul86
+  %fadd90 = fadd fast float %fmul87, %fmul88
+  %fadd91 = fadd fast float %fsub89, %load78
+  %fadd92 = fadd fast float %fadd90, %load80
+  store float %fadd91, ptr %getelementptr77, align 4
+  store float %fadd92, ptr %getelementptr79, align 4
+  %fsub93 = fsub fast float %load78, %fsub89
+  %fsub94 = fsub fast float %load80, %fadd90
+  store float %fsub93, ptr %getelementptr81, align 4
+  store float %fsub94, ptr %getelementptr83, align 4
+  %load95 = load ptr, ptr %getelementptr, align 8
+  %load96 = load float, ptr %load95, align 4
+  %getelementptr97 = getelementptr inbounds i8, ptr %load95, i64 4
+  %load98 = load float, ptr %getelementptr97, align 4
+  %getelementptr99 = getelementptr i8, ptr %getelementptr7, i64 48
+  %load100 = load float, ptr %getelementptr99, align 4
+  %getelementptr101 = getelementptr i8, ptr %getelementptr7, i64 52
+  %load102 = load float, ptr %getelementptr101, align 4
+  %getelementptr103 = getelementptr i8, ptr %getelementptr7, i64 56
+  %load104 = load float, ptr %getelementptr103, align 4
+  %getelementptr105 = getelementptr i8, ptr %getelementptr7, i64 60
+  %load106 = load float, ptr %getelementptr105, align 4
+  %fmul107 = fmul fast float %load104, %load96
+  %fmul108 = fmul fast float %load106, %load98
+  %fmul109 = fmul fast float %load106, %load96
+  %fmul110 = fmul fast float %load104, %load98
+  %fsub111 = fsub fast float %fmul107, %fmul108
+  %fadd112 = fadd fast float %fmul109, %fmul110
+  %fadd113 = fadd fast float %fsub111, %load100
+  %fadd114 = fadd fast float %fadd112, %load102
+  store float %fadd113, ptr %getelementptr99, align 4
+  store float %fadd114, ptr %getelementptr101, align 4
+  %fsub115 = fsub fast float %load100, %fsub111
+  %fsub116 = fsub fast float %load102, %fadd112
+  store float %fsub115, ptr %getelementptr103, align 4
+  store float %fsub116, ptr %getelementptr105, align 4
+  %load117 = load ptr, ptr %getelementptr, align 8
+  %load118 = load float, ptr %load117, align 4
+  %getelementptr119 = getelementptr inbounds i8, ptr %load117, i64 4
+  %load120 = load float, ptr %getelementptr119, align 4
+  %fmul121 = fmul fast float %load118, %fadd113
+  %fmul122 = fmul fast float %load120, %fadd114
+  %fmul123 = fmul fast float %load118, %fadd114
+  %fmul124 = fmul fast float %load120, %fadd113
+  %fsub125 = fsub fast float %fmul121, %fmul122
+  %fadd126 = fadd fast float %fmul124, %fmul123
+  %fadd127 = fadd fast float %fsub125, %fadd91
+  %fadd128 = fadd fast float %fadd126, %fadd92
+  store float %fadd127, ptr %getelementptr77, align 4
+  store float %fadd128, ptr %getelementptr79, align 4
+  %fsub129 = fsub fast float %fadd91, %fsub125
+  %fsub130 = fsub fast float %fadd92, %fadd126
+  store float %fsub129, ptr %getelementptr99, align 4
+  store float %fsub130, ptr %getelementptr101, align 4
+  %load131 = load ptr, ptr %getelementptr, align 8
+  %getelementptr132 = getelementptr inbounds %struct.widget, ptr %load131, i64 %shl
+  %load133 = load float, ptr %getelementptr132, align 4
+  %getelementptr134 = getelementptr inbounds i8, ptr %getelementptr132, i64 4
+  %load135 = load float, ptr %getelementptr134, align 4
+  %fmul136 = fmul fast float %load133, %fsub115
+  %fmul137 = fmul fast float %load135, %fsub116
+  %fmul138 = fmul fast float %load133, %fsub116
+  %fmul139 = fmul fast float %load135, %fsub115
+  %fsub140 = fsub fast float %fmul136, %fmul137
+  %fadd141 = fadd fast float %fmul139, %fmul138
+  %fadd142 = fadd fast float %fsub140, %fsub93
+  %fadd143 = fadd fast float %fadd141, %fsub94
+  store float %fadd142, ptr %getelementptr81, align 4
+  store float %fadd143, ptr %getelementptr83, align 4
+  %fsub144 = fsub fast float %fsub93, %fsub140
+  %fsub145 = fsub fast float %fsub94, %fadd141
+  store float %fsub144, ptr %getelementptr103, align 4
+  store float %fsub145, ptr %getelementptr105, align 4
+  %load146 = load ptr, ptr %getelementptr, align 8
+  %load147 = load float, ptr %load146, align 4
+  %getelementptr148 = getelementptr inbounds i8, ptr %load146, i64 4
+  %load149 = load float, ptr %getelementptr148, align 4
+  %load150 = load float, ptr %getelementptr7, align 4
+  %load151 = load float, ptr %getelementptr9, align 4
+  %fmul152 = fmul fast float %fadd127, %load147
+  %fmul153 = fmul fast float %fadd128, %load149
+  %fmul154 = fmul fast float %fadd128, %load147
+  %fmul155 = fmul fast float %fadd127, %load149
+  %fsub156 = fsub fast float %fmul152, %fmul153
+  %fadd157 = fadd fast float %fmul154, %fmul155
+  %fadd158 = fadd fast float %fsub156, %load150
+  %fadd159 = fadd fast float %fadd157, %load151
+  store float %fadd158, ptr %getelementptr7, align 4
+  store float %fadd159, ptr %getelementptr9, align 4
+  %fsub160 = fsub fast float %load150, %fsub156
+  %fsub161 = fsub fast float %load151, %fadd157
+  store float %fsub160, ptr %getelementptr77, align 4
+  store float %fsub161, ptr %getelementptr79, align 4
+  %shl162 = shl i64 %arg3, 32
+  %ashr = ashr exact i64 %shl162, 32
+  %load163 = load ptr, ptr %getelementptr, align 8
+  %getelementptr164 = getelementptr inbounds %struct.widget, ptr %load163, i64 %ashr
+  %load165 = load float, ptr %getelementptr164, align 4
+  %getelementptr166 = getelementptr inbounds i8, ptr %getelementptr164, i64 4
+  %load167 = load float, ptr %getelementptr166, align 4
+  %load168 = load float, ptr %getelementptr11, align 4
+  %getelementptr169 = getelementptr i8, ptr %getelementptr7, i64 12
+  %load170 = load float, ptr %getelementptr169, align 4
+  %getelementptr171 = getelementptr i8, ptr %getelementptr7, i64 40
+  %getelementptr172 = getelementptr i8, ptr %getelementptr7, i64 44
+  %fmul173 = fmul fast float %fadd142, %load165
+  %fmul174 = fmul fast float %fadd143, %load167
+  %fmul175 = fmul fast float %fadd143, %load165
+  %fmul176 = fmul fast float %fadd142, %load167
+  %fsub177 = fsub fast float %fmul173, %fmul174
+  %fadd178 = fadd fast float %fmul175, %fmul176
+  %fadd179 = fadd fast float %fsub177, %load168
+  %fadd180 = fadd fast float %fadd178, %load170
+  store float %fadd179, ptr %getelementptr11, align 4
+  store float %fadd180, ptr %getelementptr169, align 4
+  %fsub181 = fsub fast float %load168, %fsub177
+  %fsub182 = fsub fast float %load170, %fadd178
+  store float %fsub181, ptr %getelementptr171, align 4
+  store float %fsub182, ptr %getelementptr172, align 4
+  %shl183 = shl i64 %arg3, 33
+  %ashr184 = ashr exact i64 %shl183, 32
+  %load185 = load ptr, ptr %getelementptr, align 8
+  %getelementptr186 = getelementptr inbounds %struct.widget, ptr %load185, i64 %ashr184
+  %load187 = load float, ptr %getelementptr186, align 4
+  %getelementptr188 = getelementptr inbounds i8, ptr %getelementptr186, i64 4
+  %load189 = load float, ptr %getelementptr188, align 4
+  %load190 = load float, ptr %getelementptr26, align 4
+  %getelementptr191 = getelementptr i8, ptr %getelementptr7, i64 20
+  %load192 = load float, ptr %getelementptr191, align 4
+  %getelementptr193 = getelementptr i8, ptr %getelementptr7, i64 48
+  %getelementptr194 = getelementptr i8, ptr %getelementptr7, i64 52
+  %fmul195 = fmul fast float %fsub129, %load187
+  %fmul196 = fmul fast float %fsub130, %load189
+  %fmul197 = fmul fast float %fsub130, %load187
+  %fmul198 = fmul fast float %fsub129, %load189
+  %fsub199 = fsub fast float %fmul195, %fmul196
+  %fadd200 = fadd fast float %fmul197, %fmul198
+  %fadd201 = fadd fast float %fsub199, %load190
+  %fadd202 = fadd fast float %fadd200, %load192
+  store float %fadd201, ptr %getelementptr26, align 4
+  store float %fadd202, ptr %getelementptr191, align 4
+  %fsub203 = fsub fast float %load190, %fsub199
+  %fsub204 = fsub fast float %load192, %fadd200
+  store float %fsub203, ptr %getelementptr193, align 4
+  store float %fsub204, ptr %getelementptr194, align 4
+  %add = add i64 %ashr184, %arg3
+  %shl205 = shl i64 %add, 32
+  %ashr206 = ashr exact i64 %shl205, 32
+  %load207 = load ptr, ptr %getelementptr, align 8
+  %getelementptr208 = getelementptr inbounds %struct.widget, ptr %load207, i64 %ashr206
+  %load209 = load float, ptr %getelementptr208, align 4
+  %getelementptr210 = getelementptr inbounds i8, ptr %getelementptr208, i64 4
+  %load211 = load float, ptr %getelementptr210, align 4
+  %load212 = load float, ptr %getelementptr30, align 4
+  %getelementptr213 = getelementptr i8, ptr %getelementptr7, i64 28
+  %load214 = load float, ptr %getelementptr213, align 4
+  %getelementptr215 = getelementptr i8, ptr %getelementptr7, i64 56
+  %getelementptr216 = getelementptr i8, ptr %getelementptr7, i64 60
+  %fmul217 = fmul fast float %fsub144, %load209
+  %fmul218 = fmul fast float %fsub145, %load211
+  %fmul219 = fmul fast float %fsub145, %load209
+  %fmul220 = fmul fast float %fsub144, %load211
+  %fsub221 = fsub fast float %fmul217, %fmul218
+  %fadd222 = fadd fast float %fmul219, %fmul220
+  %fadd223 = fadd fast float %fsub221, %load212
+  %fadd224 = fadd fast float %fadd222, %load214
+  store float %fadd223, ptr %getelementptr30, align 4
+  store float %fadd224, ptr %getelementptr213, align 4
+  %fsub225 = fsub fast float %load212, %fsub221
+  %fsub226 = fsub fast float %load214, %fadd222
+  store float %fsub225, ptr %getelementptr215, align 4
+  store float %fsub226, ptr %getelementptr216, align 4
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll b/llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll
index 6d1986ebb8182b..93fb9c1bc4b9d0 100644
--- a/llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll
+++ b/llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll
@@ -1,72 +1,56 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 -mcpu=cortex-a55 -mattr=+store-pair-suppress -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mcpu=apple-m1 -mattr=+store-pair-suppress -o - %s | FileCheck %s
 
 ; Check that stp are not suppressed at minsize.
 
-%T_IN_BLOCK = type [ 2 x { double, { double, double } } ]
-declare %T_IN_BLOCK @return_in_block()
- at in_block_store = dso_local global %T_IN_BLOCK zeroinitializer, align 8
-
-define void @test_default() uwtable {
+define void @test_default(ptr %dst, <8 x i32> %v0) {
 ; CHECK-LABEL: test_default:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl return_in_block
-; CHECK-NEXT:    adrp x8, in_block_store
-; CHECK-NEXT:    add x8, x8, :lo12:in_block_store
-; CHECK-NEXT:    str d0, [x8]
-; CHECK-NEXT:    str d1, [x8, #8]
-; CHECK-NEXT:    str d2, [x8, #16]
-; CHECK-NEXT:    str d3, [x8, #24]
-; CHECK-NEXT:    str d4, [x8, #32]
-; CHECK-NEXT:    str d5, [x8, #40]
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    .cfi_restore w30
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    mul v1.8b, v0.8b, v0.8b
+; CHECK-NEXT:    str d1, [x0, #8]
+; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
-  %1 = call %T_IN_BLOCK @return_in_block()
-  store %T_IN_BLOCK %1, ptr @in_block_store
+entry:
+  %add.ptr.1 = getelementptr i8, ptr %dst, i64 8
+  %t = trunc <8 x i32> %v0 to <8 x i8>
+  %mul = mul <8 x i8> %t, %t
+  store <8 x i8> %mul, ptr %add.ptr.1, align 1
+  store <8 x i8> %t, ptr %dst, align 1
   ret void
 }
 
-define void @test_minsize() minsize uwtable {
+define void @test_minsize(ptr %dst, <8 x i32> %v0) minsize {
 ; CHECK-LABEL: test_minsize:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl return_in_block
-; CHECK-NEXT:    adrp x8, in_block_store
-; CHECK-NEXT:    add x8, x8, :lo12:in_block_store
-; CHECK-NEXT:    stp d0, d1, [x8]
-; CHECK-NEXT:    stp d2, d3, [x8, #16]
-; CHECK-NEXT:    stp d4, d5, [x8, #32]
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    mul v1.8b, v0.8b, v0.8b
+; CHECK-NEXT:    stp d0, d1, [x0]
 ; CHECK-NEXT:    ret
-  %1 = call %T_IN_BLOCK @return_in_block()
-  store %T_IN_BLOCK %1, ptr @in_block_store
+entry:
+  %add.ptr.1 = getelementptr i8, ptr %dst, i64 8
+  %t = trunc <8 x i32> %v0 to <8 x i8>
+  %mul = mul <8 x i8> %t, %t
+  store <8 x i8> %mul, ptr %add.ptr.1, align 1
+  store <8 x i8> %t, ptr %dst, align 1
   ret void
 }
 
-define void @test_optsize() optsize uwtable {
+define void @test_optsize(ptr %dst, <8 x i32> %v0) optsize {
 ; CHECK-LABEL: test_optsize:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl return_in_block
-; CHECK-NEXT:    adrp x8, in_block_store
-; CHECK-NEXT:    add x8, x8, :lo12:in_block_store
-; CHECK-NEXT:    stp d0, d1, [x8]
-; CHECK-NEXT:    stp d2, d3, [x8, #16]
-; CHECK-NEXT:    stp d4, d5, [x8, #32]
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    .cfi_restore w30
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    mul v1.8b, v0.8b, v0.8b
+; CHECK-NEXT:    stp d0, d1, [x0]
 ; CHECK-NEXT:    ret
-  %1 = call %T_IN_BLOCK @return_in_block()
-  store %T_IN_BLOCK %1, ptr @in_block_store
+entry:
+  %add.ptr.1 = getelementptr i8, ptr %dst, i64 8
+  %t = trunc <8 x i32> %v0 to <8 x i8>
+  %mul = mul <8 x i8> %t, %t
+  store <8 x i8> %mul, ptr %add.ptr.1, align 1
+  store <8 x i8> %t, ptr %dst, align 1
   ret void
 }


        


More information about the llvm-commits mailing list