[llvm] [AArch64] Add command-line flag to disable MOPS for memory intrinsics (PR #172722)
Ties Stuij via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 2 08:32:00 PST 2026
https://github.com/stuij updated https://github.com/llvm/llvm-project/pull/172722
>From dc0463db736fb4dee33d57b80f9bc688ae2be555 Mon Sep 17 00:00:00 2001
From: Ties Stuij <ties.stuij at arm.com>
Date: Wed, 17 Dec 2025 18:25:38 +0000
Subject: [PATCH 1/3] [AArch64] Add command-line flag to disable MOPS for
memory intrinsics
Add -aarch64-use-mops flag (default true) to control whether the AArch64
backend uses Memory Copy and Set instructions (MOPS) introduced in
Armv8.8-A/Armv9.3-A for lowering memory intrinsics (memcpy, memset,
memmove).
When MOPS is disabled, we will either inline the operation or calls the library
function based on size thresholds.
---
.../AArch64/AArch64SelectionDAGInfo.cpp | 12 +-
.../CodeGen/AArch64/aarch64-use-mops-flag.ll | 130 ++++++++++++++++++
2 files changed, 139 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/aarch64-use-mops-flag.ll
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 5c3f59ef20ca8..3254c9f2733e5 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -27,6 +27,12 @@ static cl::opt<bool>
"to lower to librt functions"),
cl::init(true));
+static cl::opt<bool>
+ UseMOPS("aarch64-use-mops", cl::Hidden,
+ cl::desc("Enable AArch64 MOPS instructions "
+ "for memcpy/memset/memmove"),
+ cl::init(true));
+
AArch64SelectionDAGInfo::AArch64SelectionDAGInfo()
: SelectionDAGGenTargetInfo(AArch64GenSDNodeInfo) {}
@@ -214,7 +220,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
- if (STI.hasMOPS())
+ if (UseMOPS && STI.hasMOPS())
return EmitMOPS(AArch64::MOPSMemoryCopyPseudo, DAG, DL, Chain, Dst, Src,
Size, Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
@@ -233,7 +239,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
- if (STI.hasMOPS())
+ if (UseMOPS && STI.hasMOPS())
return EmitMOPS(AArch64::MOPSMemorySetPseudo, DAG, dl, Chain, Dst, Src,
Size, Alignment, isVolatile, DstPtrInfo,
MachinePointerInfo{});
@@ -253,7 +259,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
- if (STI.hasMOPS())
+ if (UseMOPS && STI.hasMOPS())
return EmitMOPS(AArch64::MOPSMemoryMovePseudo, DAG, dl, Chain, Dst, Src,
Size, Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-use-mops-flag.ll b/llvm/test/CodeGen/AArch64/aarch64-use-mops-flag.ll
new file mode 100644
index 0000000000000..c7cc8bc970632
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-use-mops-flag.ll
@@ -0,0 +1,130 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; Test the -aarch64-use-mops flag that controls whether MOPS
+; instructions are used in memset/memcpy/memmove intrinsics
+; when the target supports them.
+
+; RUN: llc %s -o - -mtriple=aarch64 -mattr=+mops | FileCheck %s --check-prefix=MOPS-ENABLED
+; RUN: llc %s -o - -mtriple=aarch64 -mattr=+mops -aarch64-use-mops=false | FileCheck %s --check-prefix=MOPS-DISABLED
+; RUN: llc %s -o - -mtriple=aarch64 -mattr=+mops -aarch64-use-mops=true | FileCheck %s --check-prefix=MOPS-ENABLED
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
+
+; Test memset at threshold (512 bytes) - should be inlined when MOPS disabled
+define void @memset_threshold(ptr %dst) {
+; MOPS-ENABLED-LABEL: memset_threshold:
+; MOPS-ENABLED: // %bb.0:
+; MOPS-ENABLED-NEXT: movi v0.2d, #0000000000000000
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #32]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #64]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #96]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #128]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #160]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #192]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #224]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #256]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #288]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #320]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #352]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #384]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #416]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #448]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #480]
+; MOPS-ENABLED-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_threshold:
+; MOPS-DISABLED: // %bb.0:
+; MOPS-DISABLED-NEXT: movi v0.2d, #0000000000000000
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #32]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #64]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #96]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #128]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #160]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #192]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #224]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #256]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #288]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #320]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #352]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #384]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #416]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #448]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #480]
+; MOPS-DISABLED-NEXT: ret
+ call void @llvm.memset.p0.i64(ptr align 16 %dst, i8 0, i64 512, i1 false)
+ ret void
+}
+
+; Test memset just over threshold (513 bytes) - should use MOPS when enabled, library call when disabled
+define void @memset_over_threshold(ptr %dst) {
+; MOPS-ENABLED-LABEL: memset_over_threshold:
+; MOPS-ENABLED: // %bb.0:
+; MOPS-ENABLED-NEXT: mov w8, #513 // =0x201
+; MOPS-ENABLED-NEXT: setp [x0]!, x8!, xzr
+; MOPS-ENABLED-NEXT: setm [x0]!, x8!, xzr
+; MOPS-ENABLED-NEXT: sete [x0]!, x8!, xzr
+; MOPS-ENABLED-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_over_threshold:
+; MOPS-DISABLED: // %bb.0:
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w1, wzr
+; MOPS-DISABLED-NEXT: mov w2, #513 // =0x201
+; MOPS-DISABLED-NEXT: bl memset
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+ call void @llvm.memset.p0.i64(ptr align 16 %dst, i8 0, i64 513, i1 false)
+ ret void
+}
+
+; Test memcpy (should use MOPS when enabled, library call when disabled)
+define void @memcpy_large(ptr %dst, ptr %src) {
+; MOPS-ENABLED-LABEL: memcpy_large:
+; MOPS-ENABLED: // %bb.0:
+; MOPS-ENABLED-NEXT: mov w8, #2048 // =0x800
+; MOPS-ENABLED-NEXT: cpyfp [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfm [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfe [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_large:
+; MOPS-DISABLED: // %bb.0:
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w2, #2048 // =0x800
+; MOPS-DISABLED-NEXT: bl memcpy
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 2048, i1 false)
+ ret void
+}
+
+; Test memmove (should use MOPS when enabled, library call when disabled)
+define void @memmove_large(ptr %dst, ptr %src) {
+; MOPS-ENABLED-LABEL: memmove_large:
+; MOPS-ENABLED: // %bb.0:
+; MOPS-ENABLED-NEXT: mov w8, #800 // =0x320
+; MOPS-ENABLED-NEXT: cpyp [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpym [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpye [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memmove_large:
+; MOPS-DISABLED: // %bb.0:
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w2, #800 // =0x320
+; MOPS-DISABLED-NEXT: bl memmove
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+ call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 800, i1 false)
+ ret void
+}
>From 3865936d858d9d5553f1b20c16d079770dfd1243 Mon Sep 17 00:00:00 2001
From: Ties Stuij <ties.stuij at arm.com>
Date: Thu, 18 Dec 2025 10:09:24 +0000
Subject: [PATCH 2/3] reformat UseMOPS
---
llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 3254c9f2733e5..df8ba2c8e922f 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -27,11 +27,10 @@ static cl::opt<bool>
"to lower to librt functions"),
cl::init(true));
-static cl::opt<bool>
- UseMOPS("aarch64-use-mops", cl::Hidden,
- cl::desc("Enable AArch64 MOPS instructions "
- "for memcpy/memset/memmove"),
- cl::init(true));
+static cl::opt<bool> UseMOPS("aarch64-use-mops", cl::Hidden,
+ cl::desc("Enable AArch64 MOPS instructions "
+ "for memcpy/memset/memmove"),
+ cl::init(true));
AArch64SelectionDAGInfo::AArch64SelectionDAGInfo()
: SelectionDAGGenTargetInfo(AArch64GenSDNodeInfo) {}
>From 82110d73d6d98715fec62c98f77eaa5515553cdf Mon Sep 17 00:00:00 2001
From: Ties Stuij <ties.stuij at arm.com>
Date: Fri, 2 Jan 2026 16:20:56 +0000
Subject: [PATCH 3/3] Merge the enabled/disabled checks into general
aarch64-mops.ll file
---
llvm/test/CodeGen/AArch64/aarch64-mops.ll | 771 ++++++++++++++++++
.../CodeGen/AArch64/aarch64-use-mops-flag.ll | 130 ---
2 files changed, 771 insertions(+), 130 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/aarch64-use-mops-flag.ll
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index 1710fad9f2539..33945a0689140 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -6,6 +6,8 @@
; RUN: llc %s -o - -mtriple=aarch64 -global-isel=1 -global-isel-abort=1 -mattr=+mops | FileCheck %s --check-prefixes=GISel-MOPS,GISel-MOPS-O3
; RUN: llc %s -o - -mtriple=aarch64 -O2 | FileCheck %s --check-prefix=SDAG-WITHOUT-MOPS-O2
; RUN: llc %s -o - -mtriple=aarch64 -O2 -mattr=+mops | FileCheck %s --check-prefix=SDAG-MOPS-O2
+; RUN: llc %s -o - -mtriple=aarch64 -mattr=+mops -aarch64-use-mops=false | FileCheck %s --check-prefix=MOPS-DISABLED
+; RUN: llc %s -o - -mtriple=aarch64 -mattr=+mops -aarch64-use-mops=true | FileCheck %s --check-prefix=MOPS-ENABLED
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
@@ -31,6 +33,14 @@ define void @memset_0_zeroval(ptr %dst) {
; SDAG-MOPS-O2-LABEL: memset_0_zeroval:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_0_zeroval:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_0_zeroval:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 0, i1 false)
ret void
@@ -52,6 +62,14 @@ define void @memset_0_zeroval_volatile(ptr %dst) {
; SDAG-MOPS-O2-LABEL: memset_0_zeroval_volatile:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_0_zeroval_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_0_zeroval_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 0, i1 true)
ret void
@@ -81,6 +99,18 @@ define void @memset_10_zeroval(ptr %dst) {
; SDAG-MOPS-O2-NEXT: strh wzr, [x0, #8]
; SDAG-MOPS-O2-NEXT: str xzr, [x0]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_10_zeroval:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: strh wzr, [x0, #8]
+; MOPS-DISABLED-NEXT: str xzr, [x0]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_10_zeroval:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: strh wzr, [x0, #8]
+; MOPS-ENABLED-NEXT: str xzr, [x0]
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 10, i1 false)
ret void
@@ -110,6 +140,18 @@ define void @memset_10_zeroval_volatile(ptr %dst) {
; SDAG-MOPS-O2-NEXT: strh wzr, [x0, #8]
; SDAG-MOPS-O2-NEXT: str xzr, [x0]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_10_zeroval_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: strh wzr, [x0, #8]
+; MOPS-DISABLED-NEXT: str xzr, [x0]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_10_zeroval_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: strh wzr, [x0, #8]
+; MOPS-ENABLED-NEXT: str xzr, [x0]
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 10, i1 true)
ret void
@@ -175,6 +217,25 @@ define void @memset_10000_zeroval(ptr %dst) {
; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, xzr
; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, xzr
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_10000_zeroval:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w1, wzr
+; MOPS-DISABLED-NEXT: mov w2, #10000 // =0x2710
+; MOPS-DISABLED-NEXT: bl memset
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_10000_zeroval:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #10000 // =0x2710
+; MOPS-ENABLED-NEXT: setp [x0]!, x8!, xzr
+; MOPS-ENABLED-NEXT: setm [x0]!, x8!, xzr
+; MOPS-ENABLED-NEXT: sete [x0]!, x8!, xzr
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 10000, i1 false)
ret void
@@ -240,6 +301,25 @@ define void @memset_10000_zeroval_volatile(ptr %dst) {
; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, xzr
; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, xzr
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_10000_zeroval_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w1, wzr
+; MOPS-DISABLED-NEXT: mov w2, #10000 // =0x2710
+; MOPS-DISABLED-NEXT: bl memset
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_10000_zeroval_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #10000 // =0x2710
+; MOPS-ENABLED-NEXT: setp [x0]!, x8!, xzr
+; MOPS-ENABLED-NEXT: setm [x0]!, x8!, xzr
+; MOPS-ENABLED-NEXT: sete [x0]!, x8!, xzr
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 10000, i1 true)
ret void
@@ -289,6 +369,24 @@ define void @memset_size_zeroval(ptr %dst, i64 %size) {
; SDAG-MOPS-O2-NEXT: setm [x0]!, x1!, xzr
; SDAG-MOPS-O2-NEXT: sete [x0]!, x1!, xzr
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_size_zeroval:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov x2, x1
+; MOPS-DISABLED-NEXT: mov w1, wzr
+; MOPS-DISABLED-NEXT: bl memset
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_size_zeroval:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: setp [x0]!, x1!, xzr
+; MOPS-ENABLED-NEXT: setm [x0]!, x1!, xzr
+; MOPS-ENABLED-NEXT: sete [x0]!, x1!, xzr
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 %size, i1 false)
ret void
@@ -338,6 +436,24 @@ define void @memset_size_zeroval_volatile(ptr %dst, i64 %size) {
; SDAG-MOPS-O2-NEXT: setm [x0]!, x1!, xzr
; SDAG-MOPS-O2-NEXT: sete [x0]!, x1!, xzr
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_size_zeroval_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov x2, x1
+; MOPS-DISABLED-NEXT: mov w1, wzr
+; MOPS-DISABLED-NEXT: bl memset
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_size_zeroval_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: setp [x0]!, x1!, xzr
+; MOPS-ENABLED-NEXT: setm [x0]!, x1!, xzr
+; MOPS-ENABLED-NEXT: sete [x0]!, x1!, xzr
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 %size, i1 true)
ret void
@@ -360,6 +476,14 @@ define void @memset_0(ptr %dst, i32 %value) {
; SDAG-MOPS-O2-LABEL: memset_0:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_0:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_0:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 0, i1 false)
@@ -382,6 +506,14 @@ define void @memset_0_volatile(ptr %dst, i32 %value) {
; SDAG-MOPS-O2-LABEL: memset_0_volatile:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_0_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_0_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 0, i64 0, i1 true)
@@ -452,6 +584,26 @@ define void @memset_10(ptr %dst, i32 %value) {
; SDAG-MOPS-O2-NEXT: str x8, [x0]
; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_10:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: // kill: def $w1 killed $w1 def $x1
+; MOPS-DISABLED-NEXT: mov x8, #72340172838076673 // =0x101010101010101
+; MOPS-DISABLED-NEXT: and x9, x1, #0xff
+; MOPS-DISABLED-NEXT: mul x8, x9, x8
+; MOPS-DISABLED-NEXT: str x8, [x0]
+; MOPS-DISABLED-NEXT: strh w8, [x0, #8]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_10:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: // kill: def $w1 killed $w1 def $x1
+; MOPS-ENABLED-NEXT: mov x8, #72340172838076673 // =0x101010101010101
+; MOPS-ENABLED-NEXT: and x9, x1, #0xff
+; MOPS-ENABLED-NEXT: mul x8, x9, x8
+; MOPS-ENABLED-NEXT: str x8, [x0]
+; MOPS-ENABLED-NEXT: strh w8, [x0, #8]
+; MOPS-ENABLED-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 %value_trunc, i64 10, i1 false)
@@ -522,12 +674,185 @@ define void @memset_10_volatile(ptr %dst, i32 %value) {
; SDAG-MOPS-O2-NEXT: str x8, [x0]
; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_10_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: // kill: def $w1 killed $w1 def $x1
+; MOPS-DISABLED-NEXT: mov x8, #72340172838076673 // =0x101010101010101
+; MOPS-DISABLED-NEXT: and x9, x1, #0xff
+; MOPS-DISABLED-NEXT: mul x8, x9, x8
+; MOPS-DISABLED-NEXT: str x8, [x0]
+; MOPS-DISABLED-NEXT: strh w8, [x0, #8]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_10_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: // kill: def $w1 killed $w1 def $x1
+; MOPS-ENABLED-NEXT: mov x8, #72340172838076673 // =0x101010101010101
+; MOPS-ENABLED-NEXT: and x9, x1, #0xff
+; MOPS-ENABLED-NEXT: mul x8, x9, x8
+; MOPS-ENABLED-NEXT: str x8, [x0]
+; MOPS-ENABLED-NEXT: strh w8, [x0, #8]
+; MOPS-ENABLED-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 %value_trunc, i64 10, i1 true)
ret void
}
+; Test memset at threshold (512 bytes) - should be inlined when MOPS disabled
+define void @memset_threshold(ptr %dst) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_threshold:
+; GISel-WITHOUT-MOPS-O0: // %bb.0:
+; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #512 // =0x200
+; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT: mov w1, wzr
+; GISel-WITHOUT-MOPS-O0-NEXT: bl memset
+; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT: ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_threshold:
+; GISel-WITHOUT-MOPS-O3: // %bb.0:
+; GISel-WITHOUT-MOPS-O3-NEXT: movi v0.2d, #0000000000000000
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #32]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #64]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #96]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #128]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #160]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #192]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #224]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #256]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #288]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #320]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #352]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #384]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #416]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #448]
+; GISel-WITHOUT-MOPS-O3-NEXT: stp q0, q0, [x0, #480]
+; GISel-WITHOUT-MOPS-O3-NEXT: ret
+;
+; GISel-MOPS-O0-LABEL: memset_threshold:
+; GISel-MOPS-O0: // %bb.0:
+; GISel-MOPS-O0-NEXT: mov w8, #512 // =0x200
+; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT: mov x9, xzr
+; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT: setm [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT: sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT: ret
+;
+; GISel-MOPS-O3-LABEL: memset_threshold:
+; GISel-MOPS-O3: // %bb.0:
+; GISel-MOPS-O3-NEXT: movi v0.2d, #0000000000000000
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #32]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #64]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #96]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #128]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #160]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #192]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #224]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #256]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #288]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #320]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #352]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #384]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #416]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #448]
+; GISel-MOPS-O3-NEXT: stp q0, q0, [x0, #480]
+; GISel-MOPS-O3-NEXT: ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_threshold:
+; SDAG-WITHOUT-MOPS-O2: // %bb.0:
+; SDAG-WITHOUT-MOPS-O2-NEXT: movi v0.2d, #0000000000000000
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #32]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #64]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #96]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #128]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #160]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #192]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #224]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #256]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #288]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #320]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #352]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #384]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #416]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #448]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q0, q0, [x0, #480]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ret
+;
+; SDAG-MOPS-O2-LABEL: memset_threshold:
+; SDAG-MOPS-O2: // %bb.0:
+; SDAG-MOPS-O2-NEXT: movi v0.2d, #0000000000000000
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #32]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #64]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #96]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #128]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #160]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #192]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #224]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #256]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #288]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #320]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #352]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #384]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #416]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #448]
+; SDAG-MOPS-O2-NEXT: stp q0, q0, [x0, #480]
+; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_threshold:
+; MOPS-DISABLED: // %bb.0:
+; MOPS-DISABLED-NEXT: movi v0.2d, #0000000000000000
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #32]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #64]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #96]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #128]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #160]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #192]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #224]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #256]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #288]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #320]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #352]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #384]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #416]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #448]
+; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #480]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_threshold:
+; MOPS-ENABLED: // %bb.0:
+; MOPS-ENABLED-NEXT: movi v0.2d, #0000000000000000
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #32]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #64]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #96]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #128]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #160]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #192]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #224]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #256]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #288]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #320]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #352]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #384]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #416]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #448]
+; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #480]
+; MOPS-ENABLED-NEXT: ret
+ call void @llvm.memset.p0.i64(ptr align 16 %dst, i8 0, i64 512, i1 false)
+ ret void
+}
+
define void @memset_10000(ptr %dst, i32 %value) {
; GISel-WITHOUT-MOPS-O0-LABEL: memset_10000:
; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
@@ -588,6 +913,25 @@ define void @memset_10000(ptr %dst, i32 %value) {
; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, x1
; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, x1
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_10000:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w2, #10000 // =0x2710
+; MOPS-DISABLED-NEXT: bl memset
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_10000:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #10000 // =0x2710
+; MOPS-ENABLED-NEXT: // kill: def $w1 killed $w1 def $x1
+; MOPS-ENABLED-NEXT: setp [x0]!, x8!, x1
+; MOPS-ENABLED-NEXT: setm [x0]!, x8!, x1
+; MOPS-ENABLED-NEXT: sete [x0]!, x8!, x1
+; MOPS-ENABLED-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 %value_trunc, i64 10000, i1 false)
@@ -654,6 +998,25 @@ define void @memset_10000_volatile(ptr %dst, i32 %value) {
; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, x1
; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, x1
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_10000_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w2, #10000 // =0x2710
+; MOPS-DISABLED-NEXT: bl memset
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_10000_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #10000 // =0x2710
+; MOPS-ENABLED-NEXT: // kill: def $w1 killed $w1 def $x1
+; MOPS-ENABLED-NEXT: setp [x0]!, x8!, x1
+; MOPS-ENABLED-NEXT: setm [x0]!, x8!, x1
+; MOPS-ENABLED-NEXT: sete [x0]!, x8!, x1
+; MOPS-ENABLED-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 %value_trunc, i64 10000, i1 true)
@@ -723,6 +1086,26 @@ define void @memset_size(ptr %dst, i64 %size, i32 %value) {
; SDAG-MOPS-O2-NEXT: setm [x0]!, x1!, x2
; SDAG-MOPS-O2-NEXT: sete [x0]!, x1!, x2
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_size:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov x8, x1
+; MOPS-DISABLED-NEXT: mov w1, w2
+; MOPS-DISABLED-NEXT: mov x2, x8
+; MOPS-DISABLED-NEXT: bl memset
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_size:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: // kill: def $w2 killed $w2 def $x2
+; MOPS-ENABLED-NEXT: setp [x0]!, x1!, x2
+; MOPS-ENABLED-NEXT: setm [x0]!, x1!, x2
+; MOPS-ENABLED-NEXT: sete [x0]!, x1!, x2
+; MOPS-ENABLED-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 %value_trunc, i64 %size, i1 false)
@@ -792,6 +1175,26 @@ define void @memset_size_volatile(ptr %dst, i64 %size, i32 %value) {
; SDAG-MOPS-O2-NEXT: setm [x0]!, x1!, x2
; SDAG-MOPS-O2-NEXT: sete [x0]!, x1!, x2
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memset_size_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov x8, x1
+; MOPS-DISABLED-NEXT: mov w1, w2
+; MOPS-DISABLED-NEXT: mov x2, x8
+; MOPS-DISABLED-NEXT: bl memset
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memset_size_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: // kill: def $w2 killed $w2 def $x2
+; MOPS-ENABLED-NEXT: setp [x0]!, x1!, x2
+; MOPS-ENABLED-NEXT: setm [x0]!, x1!, x2
+; MOPS-ENABLED-NEXT: sete [x0]!, x1!, x2
+; MOPS-ENABLED-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
call void @llvm.memset.p0.i64(ptr align 1 %dst, i8 %value_trunc, i64 %size, i1 true)
@@ -815,6 +1218,14 @@ define void @memcpy_0(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-LABEL: memcpy_0:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_0:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_0:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 0, i1 false)
ret void
@@ -836,6 +1247,14 @@ define void @memcpy_0_volatile(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-LABEL: memcpy_0_volatile:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_0_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_0_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 0, i1 true)
ret void
@@ -873,6 +1292,22 @@ define void @memcpy_10(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8]
; SDAG-MOPS-O2-NEXT: str x9, [x0]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_10:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ldrh w8, [x1, #8]
+; MOPS-DISABLED-NEXT: ldr x9, [x1]
+; MOPS-DISABLED-NEXT: strh w8, [x0, #8]
+; MOPS-DISABLED-NEXT: str x9, [x0]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_10:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ldrh w8, [x1, #8]
+; MOPS-ENABLED-NEXT: ldr x9, [x1]
+; MOPS-ENABLED-NEXT: strh w8, [x0, #8]
+; MOPS-ENABLED-NEXT: str x9, [x0]
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 10, i1 false)
ret void
@@ -910,6 +1345,22 @@ define void @memcpy_10_volatile(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: strh w9, [x0, #8]
; SDAG-MOPS-O2-NEXT: str x8, [x0]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_10_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ldr x8, [x1]
+; MOPS-DISABLED-NEXT: ldrh w9, [x1, #8]
+; MOPS-DISABLED-NEXT: strh w9, [x0, #8]
+; MOPS-DISABLED-NEXT: str x8, [x0]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_10_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ldr x8, [x1]
+; MOPS-ENABLED-NEXT: ldrh w9, [x1, #8]
+; MOPS-ENABLED-NEXT: strh w9, [x0, #8]
+; MOPS-ENABLED-NEXT: str x8, [x0]
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 10, i1 true)
ret void
@@ -971,6 +1422,24 @@ define void @memcpy_1000(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_1000:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w2, #1000 // =0x3e8
+; MOPS-DISABLED-NEXT: bl memcpy
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_1000:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #1000 // =0x3e8
+; MOPS-ENABLED-NEXT: cpyfp [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfm [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfe [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 1000, i1 false)
ret void
@@ -1032,6 +1501,24 @@ define void @memcpy_1000_volatile(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_1000_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w2, #1000 // =0x3e8
+; MOPS-DISABLED-NEXT: bl memcpy
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_1000_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #1000 // =0x3e8
+; MOPS-ENABLED-NEXT: cpyfp [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfm [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfe [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 1000, i1 true)
ret void
@@ -1069,6 +1556,22 @@ define void @memcpy_n(ptr %dst, ptr %src, i64 %size, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x2!
; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x2!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_n:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: bl memcpy
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_n:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: cpyfp [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: cpyfm [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: cpyfe [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 %size, i1 false)
ret void
@@ -1106,6 +1609,22 @@ define void @memcpy_n_volatile(ptr %dst, ptr %src, i64 %size, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x2!
; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x2!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_n_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: bl memcpy
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_n_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: cpyfp [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: cpyfm [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: cpyfe [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 %size, i1 true)
ret void
@@ -1128,6 +1647,14 @@ define void @memcpy_inline_0(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-LABEL: memcpy_inline_0:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_inline_0:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_inline_0:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 0, i1 false)
ret void
@@ -1149,6 +1676,14 @@ define void @memcpy_inline_0_volatile(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-LABEL: memcpy_inline_0_volatile:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_inline_0_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_inline_0_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 0, i1 true)
ret void
@@ -1186,6 +1721,22 @@ define void @memcpy_inline_10(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8]
; SDAG-MOPS-O2-NEXT: str x9, [x0]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_inline_10:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ldrh w8, [x1, #8]
+; MOPS-DISABLED-NEXT: ldr x9, [x1]
+; MOPS-DISABLED-NEXT: strh w8, [x0, #8]
+; MOPS-DISABLED-NEXT: str x9, [x0]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_inline_10:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ldrh w8, [x1, #8]
+; MOPS-ENABLED-NEXT: ldr x9, [x1]
+; MOPS-ENABLED-NEXT: strh w8, [x0, #8]
+; MOPS-ENABLED-NEXT: str x9, [x0]
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 10, i1 false)
ret void
@@ -1223,6 +1774,22 @@ define void @memcpy_inline_10_volatile(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: strh w9, [x0, #8]
; SDAG-MOPS-O2-NEXT: str x8, [x0]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_inline_10_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ldr x8, [x1]
+; MOPS-DISABLED-NEXT: ldrh w9, [x1, #8]
+; MOPS-DISABLED-NEXT: strh w9, [x0, #8]
+; MOPS-DISABLED-NEXT: str x8, [x0]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_inline_10_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ldr x8, [x1]
+; MOPS-ENABLED-NEXT: ldrh w9, [x1, #8]
+; MOPS-ENABLED-NEXT: strh w9, [x0, #8]
+; MOPS-ENABLED-NEXT: str x8, [x0]
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 10, i1 true)
ret void
@@ -1440,6 +2007,42 @@ define void @memcpy_inline_300(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_inline_300:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ldp q1, q0, [x1, #16]
+; MOPS-DISABLED-NEXT: add x8, x1, #284
+; MOPS-DISABLED-NEXT: ldr q2, [x1]
+; MOPS-DISABLED-NEXT: stp q1, q0, [x0, #16]
+; MOPS-DISABLED-NEXT: str q2, [x0]
+; MOPS-DISABLED-NEXT: ldp q1, q0, [x1, #80]
+; MOPS-DISABLED-NEXT: ldp q2, q3, [x1, #48]
+; MOPS-DISABLED-NEXT: stp q1, q0, [x0, #80]
+; MOPS-DISABLED-NEXT: stp q2, q3, [x0, #48]
+; MOPS-DISABLED-NEXT: ldp q1, q0, [x1, #144]
+; MOPS-DISABLED-NEXT: ldp q2, q3, [x1, #112]
+; MOPS-DISABLED-NEXT: stp q1, q0, [x0, #144]
+; MOPS-DISABLED-NEXT: stp q2, q3, [x0, #112]
+; MOPS-DISABLED-NEXT: ldp q1, q0, [x1, #208]
+; MOPS-DISABLED-NEXT: ldp q2, q3, [x1, #176]
+; MOPS-DISABLED-NEXT: stp q1, q0, [x0, #208]
+; MOPS-DISABLED-NEXT: stp q2, q3, [x0, #176]
+; MOPS-DISABLED-NEXT: ldp q3, q1, [x1, #256]
+; MOPS-DISABLED-NEXT: ldr q0, [x8]
+; MOPS-DISABLED-NEXT: ldr q2, [x1, #240]
+; MOPS-DISABLED-NEXT: add x8, x0, #284
+; MOPS-DISABLED-NEXT: str q0, [x8]
+; MOPS-DISABLED-NEXT: stp q3, q1, [x0, #256]
+; MOPS-DISABLED-NEXT: str q2, [x0, #240]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_inline_300:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #300 // =0x12c
+; MOPS-ENABLED-NEXT: cpyfp [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfm [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfe [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 300, i1 false)
ret void
@@ -1585,6 +2188,58 @@ define void @memcpy_inline_300_volatile(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memcpy_inline_300_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ldr q0, [x1]
+; MOPS-DISABLED-NEXT: ldr q1, [x1, #16]
+; MOPS-DISABLED-NEXT: ldr q2, [x1, #32]
+; MOPS-DISABLED-NEXT: ldr q3, [x1, #48]
+; MOPS-DISABLED-NEXT: str q3, [x0, #48]
+; MOPS-DISABLED-NEXT: str q2, [x0, #32]
+; MOPS-DISABLED-NEXT: str q1, [x0, #16]
+; MOPS-DISABLED-NEXT: str q0, [x0]
+; MOPS-DISABLED-NEXT: ldr q0, [x1, #64]
+; MOPS-DISABLED-NEXT: ldr q1, [x1, #80]
+; MOPS-DISABLED-NEXT: ldr q2, [x1, #96]
+; MOPS-DISABLED-NEXT: ldr q3, [x1, #112]
+; MOPS-DISABLED-NEXT: str q3, [x0, #112]
+; MOPS-DISABLED-NEXT: str q2, [x0, #96]
+; MOPS-DISABLED-NEXT: str q1, [x0, #80]
+; MOPS-DISABLED-NEXT: str q0, [x0, #64]
+; MOPS-DISABLED-NEXT: ldr q0, [x1, #128]
+; MOPS-DISABLED-NEXT: ldr q1, [x1, #144]
+; MOPS-DISABLED-NEXT: ldr q2, [x1, #160]
+; MOPS-DISABLED-NEXT: ldr q3, [x1, #176]
+; MOPS-DISABLED-NEXT: str q3, [x0, #176]
+; MOPS-DISABLED-NEXT: str q2, [x0, #160]
+; MOPS-DISABLED-NEXT: str q1, [x0, #144]
+; MOPS-DISABLED-NEXT: str q0, [x0, #128]
+; MOPS-DISABLED-NEXT: ldr q0, [x1, #192]
+; MOPS-DISABLED-NEXT: ldr q1, [x1, #208]
+; MOPS-DISABLED-NEXT: ldr q2, [x1, #224]
+; MOPS-DISABLED-NEXT: ldr q3, [x1, #240]
+; MOPS-DISABLED-NEXT: str q3, [x0, #240]
+; MOPS-DISABLED-NEXT: str q2, [x0, #224]
+; MOPS-DISABLED-NEXT: str q1, [x0, #208]
+; MOPS-DISABLED-NEXT: str q0, [x0, #192]
+; MOPS-DISABLED-NEXT: ldr q0, [x1, #256]
+; MOPS-DISABLED-NEXT: ldr q1, [x1, #272]
+; MOPS-DISABLED-NEXT: ldr x8, [x1, #288]
+; MOPS-DISABLED-NEXT: ldr w9, [x1, #296]
+; MOPS-DISABLED-NEXT: str w9, [x0, #296]
+; MOPS-DISABLED-NEXT: str x8, [x0, #288]
+; MOPS-DISABLED-NEXT: str q1, [x0, #272]
+; MOPS-DISABLED-NEXT: str q0, [x0, #256]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memcpy_inline_300_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #300 // =0x12c
+; MOPS-ENABLED-NEXT: cpyfp [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfm [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpyfe [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 300, i1 true)
ret void
@@ -1606,6 +2261,14 @@ define void @memmove_0(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-LABEL: memmove_0:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memmove_0:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memmove_0:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 0, i1 false)
ret void
@@ -1627,6 +2290,14 @@ define void @memmove_0_volatile(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-LABEL: memmove_0_volatile:
; SDAG-MOPS-O2: // %bb.0: // %entry
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memmove_0_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memmove_0_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 0, i1 true)
ret void
@@ -1680,6 +2351,22 @@ define void @memmove_10(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8]
; SDAG-MOPS-O2-NEXT: str x9, [x0]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memmove_10:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ldrh w8, [x1, #8]
+; MOPS-DISABLED-NEXT: ldr x9, [x1]
+; MOPS-DISABLED-NEXT: strh w8, [x0, #8]
+; MOPS-DISABLED-NEXT: str x9, [x0]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memmove_10:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ldrh w8, [x1, #8]
+; MOPS-ENABLED-NEXT: ldr x9, [x1]
+; MOPS-ENABLED-NEXT: strh w8, [x0, #8]
+; MOPS-ENABLED-NEXT: str x9, [x0]
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 10, i1 false)
ret void
@@ -1733,6 +2420,22 @@ define void @memmove_10_volatile(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: strh w9, [x0, #8]
; SDAG-MOPS-O2-NEXT: str x8, [x0]
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memmove_10_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: ldr x8, [x1]
+; MOPS-DISABLED-NEXT: ldrh w9, [x1, #8]
+; MOPS-DISABLED-NEXT: strh w9, [x0, #8]
+; MOPS-DISABLED-NEXT: str x8, [x0]
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memmove_10_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: ldr x8, [x1]
+; MOPS-ENABLED-NEXT: ldrh w9, [x1, #8]
+; MOPS-ENABLED-NEXT: strh w9, [x0, #8]
+; MOPS-ENABLED-NEXT: str x8, [x0]
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 10, i1 true)
ret void
@@ -1794,6 +2497,24 @@ define void @memmove_1000(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memmove_1000:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w2, #1000 // =0x3e8
+; MOPS-DISABLED-NEXT: bl memmove
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memmove_1000:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #1000 // =0x3e8
+; MOPS-ENABLED-NEXT: cpyp [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpym [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpye [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 1000, i1 false)
ret void
@@ -1855,6 +2576,24 @@ define void @memmove_1000_volatile(ptr %dst, ptr %src, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x8!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memmove_1000_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: mov w2, #1000 // =0x3e8
+; MOPS-DISABLED-NEXT: bl memmove
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memmove_1000_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: mov w8, #1000 // =0x3e8
+; MOPS-ENABLED-NEXT: cpyp [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpym [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: cpye [x0]!, [x1]!, x8!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 1000, i1 true)
ret void
@@ -1892,6 +2631,22 @@ define void @memmove_n(ptr %dst, ptr %src, i64 %size, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x2!
; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x2!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memmove_n:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: bl memmove
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memmove_n:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: cpyp [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: cpym [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: cpye [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 %size, i1 false)
ret void
@@ -1929,6 +2684,22 @@ define void @memmove_n_volatile(ptr %dst, ptr %src, i64 %size, i32 %value) {
; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x2!
; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x2!
; SDAG-MOPS-O2-NEXT: ret
+;
+; MOPS-DISABLED-LABEL: memmove_n_volatile:
+; MOPS-DISABLED: // %bb.0: // %entry
+; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
+; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
+; MOPS-DISABLED-NEXT: bl memmove
+; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; MOPS-DISABLED-NEXT: ret
+;
+; MOPS-ENABLED-LABEL: memmove_n_volatile:
+; MOPS-ENABLED: // %bb.0: // %entry
+; MOPS-ENABLED-NEXT: cpyp [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: cpym [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: cpye [x0]!, [x1]!, x2!
+; MOPS-ENABLED-NEXT: ret
entry:
call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 %size, i1 true)
ret void
diff --git a/llvm/test/CodeGen/AArch64/aarch64-use-mops-flag.ll b/llvm/test/CodeGen/AArch64/aarch64-use-mops-flag.ll
deleted file mode 100644
index c7cc8bc970632..0000000000000
--- a/llvm/test/CodeGen/AArch64/aarch64-use-mops-flag.ll
+++ /dev/null
@@ -1,130 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-
-; Test the -aarch64-use-mops flag that controls whether MOPS
-; instructions are used in memset/memcpy/memmove intrinsics
-; when the target supports them.
-
-; RUN: llc %s -o - -mtriple=aarch64 -mattr=+mops | FileCheck %s --check-prefix=MOPS-ENABLED
-; RUN: llc %s -o - -mtriple=aarch64 -mattr=+mops -aarch64-use-mops=false | FileCheck %s --check-prefix=MOPS-DISABLED
-; RUN: llc %s -o - -mtriple=aarch64 -mattr=+mops -aarch64-use-mops=true | FileCheck %s --check-prefix=MOPS-ENABLED
-
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
-declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
-declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
-
-; Test memset at threshold (512 bytes) - should be inlined when MOPS disabled
-define void @memset_threshold(ptr %dst) {
-; MOPS-ENABLED-LABEL: memset_threshold:
-; MOPS-ENABLED: // %bb.0:
-; MOPS-ENABLED-NEXT: movi v0.2d, #0000000000000000
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #32]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #64]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #96]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #128]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #160]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #192]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #224]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #256]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #288]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #320]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #352]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #384]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #416]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #448]
-; MOPS-ENABLED-NEXT: stp q0, q0, [x0, #480]
-; MOPS-ENABLED-NEXT: ret
-;
-; MOPS-DISABLED-LABEL: memset_threshold:
-; MOPS-DISABLED: // %bb.0:
-; MOPS-DISABLED-NEXT: movi v0.2d, #0000000000000000
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #32]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #64]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #96]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #128]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #160]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #192]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #224]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #256]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #288]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #320]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #352]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #384]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #416]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #448]
-; MOPS-DISABLED-NEXT: stp q0, q0, [x0, #480]
-; MOPS-DISABLED-NEXT: ret
- call void @llvm.memset.p0.i64(ptr align 16 %dst, i8 0, i64 512, i1 false)
- ret void
-}
-
-; Test memset just over threshold (513 bytes) - should use MOPS when enabled, library call when disabled
-define void @memset_over_threshold(ptr %dst) {
-; MOPS-ENABLED-LABEL: memset_over_threshold:
-; MOPS-ENABLED: // %bb.0:
-; MOPS-ENABLED-NEXT: mov w8, #513 // =0x201
-; MOPS-ENABLED-NEXT: setp [x0]!, x8!, xzr
-; MOPS-ENABLED-NEXT: setm [x0]!, x8!, xzr
-; MOPS-ENABLED-NEXT: sete [x0]!, x8!, xzr
-; MOPS-ENABLED-NEXT: ret
-;
-; MOPS-DISABLED-LABEL: memset_over_threshold:
-; MOPS-DISABLED: // %bb.0:
-; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
-; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
-; MOPS-DISABLED-NEXT: mov w1, wzr
-; MOPS-DISABLED-NEXT: mov w2, #513 // =0x201
-; MOPS-DISABLED-NEXT: bl memset
-; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; MOPS-DISABLED-NEXT: ret
- call void @llvm.memset.p0.i64(ptr align 16 %dst, i8 0, i64 513, i1 false)
- ret void
-}
-
-; Test memcpy (should use MOPS when enabled, library call when disabled)
-define void @memcpy_large(ptr %dst, ptr %src) {
-; MOPS-ENABLED-LABEL: memcpy_large:
-; MOPS-ENABLED: // %bb.0:
-; MOPS-ENABLED-NEXT: mov w8, #2048 // =0x800
-; MOPS-ENABLED-NEXT: cpyfp [x0]!, [x1]!, x8!
-; MOPS-ENABLED-NEXT: cpyfm [x0]!, [x1]!, x8!
-; MOPS-ENABLED-NEXT: cpyfe [x0]!, [x1]!, x8!
-; MOPS-ENABLED-NEXT: ret
-;
-; MOPS-DISABLED-LABEL: memcpy_large:
-; MOPS-DISABLED: // %bb.0:
-; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
-; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
-; MOPS-DISABLED-NEXT: mov w2, #2048 // =0x800
-; MOPS-DISABLED-NEXT: bl memcpy
-; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; MOPS-DISABLED-NEXT: ret
- call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 2048, i1 false)
- ret void
-}
-
-; Test memmove (should use MOPS when enabled, library call when disabled)
-define void @memmove_large(ptr %dst, ptr %src) {
-; MOPS-ENABLED-LABEL: memmove_large:
-; MOPS-ENABLED: // %bb.0:
-; MOPS-ENABLED-NEXT: mov w8, #800 // =0x320
-; MOPS-ENABLED-NEXT: cpyp [x0]!, [x1]!, x8!
-; MOPS-ENABLED-NEXT: cpym [x0]!, [x1]!, x8!
-; MOPS-ENABLED-NEXT: cpye [x0]!, [x1]!, x8!
-; MOPS-ENABLED-NEXT: ret
-;
-; MOPS-DISABLED-LABEL: memmove_large:
-; MOPS-DISABLED: // %bb.0:
-; MOPS-DISABLED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; MOPS-DISABLED-NEXT: .cfi_def_cfa_offset 16
-; MOPS-DISABLED-NEXT: .cfi_offset w30, -16
-; MOPS-DISABLED-NEXT: mov w2, #800 // =0x320
-; MOPS-DISABLED-NEXT: bl memmove
-; MOPS-DISABLED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; MOPS-DISABLED-NEXT: ret
- call void @llvm.memmove.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 800, i1 false)
- ret void
-}
More information about the llvm-commits
mailing list