[llvm] [AArch64][SME] Allow memory operations lowering to custom SME functions. (PR #79263)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 26 02:55:31 PST 2024


================
@@ -0,0 +1,552 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs -aarch64-enable-sme-mops=false < %s | FileCheck %s -check-prefixes=NO_SME_MOPS
+
+ at dst = global [512 x i8] zeroinitializer, align 1
+ at src = global [512 x i8] zeroinitializer, align 1
+
+define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: sc_memcpy:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    adrp x0, :got:dst
+; CHECK-NEXT:    adrp x1, :got:src
+; CHECK-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT:    ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT:    bl __arm_sc_memcpy
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO_SME_MOPS-LABEL: sc_memcpy:
+; NO_SME_MOPS:       // %bb.0: // %entry
+; NO_SME_MOPS-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT:    .cfi_offset w19, -8
+; NO_SME_MOPS-NEXT:    .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT:    .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT:    .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT:    .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT:    .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT:    .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT:    .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT:    .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT:    .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT:    mov x2, x0
+; NO_SME_MOPS-NEXT:    bl __arm_sme_state
+; NO_SME_MOPS-NEXT:    adrp x8, :got:dst
+; NO_SME_MOPS-NEXT:    adrp x1, :got:src
+; NO_SME_MOPS-NEXT:    and x19, x0, #0x1
+; NO_SME_MOPS-NEXT:    ldr x8, [x8, :got_lo12:dst]
+; NO_SME_MOPS-NEXT:    ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT:    tbz w19, #0, .LBB0_2
+; NO_SME_MOPS-NEXT:  // %bb.1: // %entry
+; NO_SME_MOPS-NEXT:    smstop sm
+; NO_SME_MOPS-NEXT:  .LBB0_2: // %entry
+; NO_SME_MOPS-NEXT:    mov x0, x8
+; NO_SME_MOPS-NEXT:    bl memcpy
+; NO_SME_MOPS-NEXT:    tbz w19, #0, .LBB0_4
+; NO_SME_MOPS-NEXT:  // %bb.3: // %entry
+; NO_SME_MOPS-NEXT:    smstart sm
+; NO_SME_MOPS-NEXT:  .LBB0_4: // %entry
+; NO_SME_MOPS-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ret
+entry:
+  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+  ret void
+}
+
+define void @sc_memset(i64 noundef %n) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: sc_memset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    adrp x0, :got:dst
+; CHECK-NEXT:    mov w1, #2 // =0x2
+; CHECK-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT:    bl __arm_sc_memset
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO_SME_MOPS-LABEL: sc_memset:
+; NO_SME_MOPS:       // %bb.0: // %entry
+; NO_SME_MOPS-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT:    .cfi_offset w19, -8
+; NO_SME_MOPS-NEXT:    .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT:    .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT:    .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT:    .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT:    .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT:    .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT:    .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT:    .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT:    .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT:    mov x2, x0
+; NO_SME_MOPS-NEXT:    bl __arm_sme_state
+; NO_SME_MOPS-NEXT:    and x19, x0, #0x1
+; NO_SME_MOPS-NEXT:    adrp x0, :got:dst
+; NO_SME_MOPS-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT:    tbz w19, #0, .LBB1_2
+; NO_SME_MOPS-NEXT:  // %bb.1: // %entry
+; NO_SME_MOPS-NEXT:    smstop sm
+; NO_SME_MOPS-NEXT:  .LBB1_2: // %entry
+; NO_SME_MOPS-NEXT:    mov w1, #2 // =0x2
+; NO_SME_MOPS-NEXT:    bl memset
+; NO_SME_MOPS-NEXT:    tbz w19, #0, .LBB1_4
+; NO_SME_MOPS-NEXT:  // %bb.3: // %entry
+; NO_SME_MOPS-NEXT:    smstart sm
+; NO_SME_MOPS-NEXT:  .LBB1_4: // %entry
+; NO_SME_MOPS-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
+  ret void
+}
+
+define void @sc_memmove(i64 noundef %n) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: sc_memmove:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    adrp x0, :got:dst
+; CHECK-NEXT:    adrp x1, :got:src
+; CHECK-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT:    ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT:    bl __arm_sc_memmove
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO_SME_MOPS-LABEL: sc_memmove:
+; NO_SME_MOPS:       // %bb.0: // %entry
+; NO_SME_MOPS-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT:    .cfi_offset w19, -8
+; NO_SME_MOPS-NEXT:    .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT:    .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT:    .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT:    .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT:    .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT:    .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT:    .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT:    .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT:    .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT:    mov x2, x0
+; NO_SME_MOPS-NEXT:    bl __arm_sme_state
+; NO_SME_MOPS-NEXT:    adrp x8, :got:dst
+; NO_SME_MOPS-NEXT:    adrp x1, :got:src
+; NO_SME_MOPS-NEXT:    and x19, x0, #0x1
+; NO_SME_MOPS-NEXT:    ldr x8, [x8, :got_lo12:dst]
+; NO_SME_MOPS-NEXT:    ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT:    tbz w19, #0, .LBB2_2
+; NO_SME_MOPS-NEXT:  // %bb.1: // %entry
+; NO_SME_MOPS-NEXT:    smstop sm
+; NO_SME_MOPS-NEXT:  .LBB2_2: // %entry
+; NO_SME_MOPS-NEXT:    mov x0, x8
+; NO_SME_MOPS-NEXT:    bl memmove
+; NO_SME_MOPS-NEXT:    tbz w19, #0, .LBB2_4
+; NO_SME_MOPS-NEXT:  // %bb.3: // %entry
+; NO_SME_MOPS-NEXT:    smstart sm
+; NO_SME_MOPS-NEXT:  .LBB2_4: // %entry
+; NO_SME_MOPS-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+  ret void
+}
+
+define void @se_memcpy(i64 noundef %n) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: se_memcpy:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    adrp x0, :got:dst
+; CHECK-NEXT:    adrp x1, :got:src
+; CHECK-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT:    ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT:    bl __arm_sc_memcpy
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO_SME_MOPS-LABEL: se_memcpy:
+; NO_SME_MOPS:       // %bb.0: // %entry
+; NO_SME_MOPS-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT:    .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT:    .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT:    .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT:    .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT:    .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT:    .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT:    .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT:    .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT:    .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT:    .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT:    mov x2, x0
+; NO_SME_MOPS-NEXT:    adrp x0, :got:dst
+; NO_SME_MOPS-NEXT:    adrp x1, :got:src
+; NO_SME_MOPS-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT:    ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT:    smstop sm
+; NO_SME_MOPS-NEXT:    bl memcpy
+; NO_SME_MOPS-NEXT:    smstart sm
+; NO_SME_MOPS-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ret
+entry:
+  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+  ret void
+}
+
+define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: se_memset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    adrp x0, :got:dst
+; CHECK-NEXT:    mov w1, #2 // =0x2
+; CHECK-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT:    bl __arm_sc_memset
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO_SME_MOPS-LABEL: se_memset:
+; NO_SME_MOPS:       // %bb.0: // %entry
+; NO_SME_MOPS-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT:    .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT:    .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT:    .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT:    .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT:    .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT:    .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT:    .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT:    .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT:    .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT:    .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT:    mov x2, x0
+; NO_SME_MOPS-NEXT:    adrp x0, :got:dst
+; NO_SME_MOPS-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT:    smstop sm
+; NO_SME_MOPS-NEXT:    mov w1, #2 // =0x2
+; NO_SME_MOPS-NEXT:    bl memset
+; NO_SME_MOPS-NEXT:    smstart sm
+; NO_SME_MOPS-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
+  ret void
+}
+
+define void @se_memmove(i64 noundef %n) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: se_memmove:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    adrp x0, :got:dst
+; CHECK-NEXT:    adrp x1, :got:src
+; CHECK-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT:    ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT:    bl __arm_sc_memmove
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO_SME_MOPS-LABEL: se_memmove:
+; NO_SME_MOPS:       // %bb.0: // %entry
+; NO_SME_MOPS-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT:    .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT:    .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT:    .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT:    .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT:    .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT:    .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT:    .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT:    .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT:    .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT:    .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT:    mov x2, x0
+; NO_SME_MOPS-NEXT:    adrp x0, :got:dst
+; NO_SME_MOPS-NEXT:    adrp x1, :got:src
+; NO_SME_MOPS-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT:    ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT:    smstop sm
+; NO_SME_MOPS-NEXT:    bl memmove
+; NO_SME_MOPS-NEXT:    smstart sm
+; NO_SME_MOPS-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+  ret void
+}
+
+define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sb_memcpy:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    .cfi_offset b8, -24
+; CHECK-NEXT:    .cfi_offset b9, -32
+; CHECK-NEXT:    .cfi_offset b10, -40
+; CHECK-NEXT:    .cfi_offset b11, -48
+; CHECK-NEXT:    .cfi_offset b12, -56
+; CHECK-NEXT:    .cfi_offset b13, -64
+; CHECK-NEXT:    .cfi_offset b14, -72
+; CHECK-NEXT:    .cfi_offset b15, -80
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    adrp x0, :got:dst
+; CHECK-NEXT:    adrp x1, :got:src
+; CHECK-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT:    ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT:    bl __arm_sc_memcpy
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO_SME_MOPS-LABEL: sb_memcpy:
+; NO_SME_MOPS:       // %bb.0: // %entry
+; NO_SME_MOPS-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT:    .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT:    .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT:    .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT:    .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT:    .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT:    .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT:    .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT:    .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT:    .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT:    .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT:    mov x2, x0
+; NO_SME_MOPS-NEXT:    smstart sm
+; NO_SME_MOPS-NEXT:    adrp x0, :got:dst
+; NO_SME_MOPS-NEXT:    adrp x1, :got:src
+; NO_SME_MOPS-NEXT:    ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT:    ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT:    smstop sm
+; NO_SME_MOPS-NEXT:    bl memcpy
+; NO_SME_MOPS-NEXT:    smstart sm
+; NO_SME_MOPS-NEXT:    smstop sm
+; NO_SME_MOPS-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT:    ret
+entry:
+  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+  ret void
+}
+
+define void @sb_memset(i64 noundef %n) "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sb_memset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    .cfi_offset b8, -24
+; CHECK-NEXT:    .cfi_offset b9, -32
+; CHECK-NEXT:    .cfi_offset b10, -40
+; CHECK-NEXT:    .cfi_offset b11, -48
+; CHECK-NEXT:    .cfi_offset b12, -56
+; CHECK-NEXT:    .cfi_offset b13, -64
+; CHECK-NEXT:    .cfi_offset b14, -72
+; CHECK-NEXT:    .cfi_offset b15, -80
----------------
sdesmalen-arm wrote:

nit: can you add `nounwind` to the tests, to reduce the number of CHECK lines?

https://github.com/llvm/llvm-project/pull/79263


More information about the llvm-commits mailing list