[llvm] [AArch64][SME] Allow memory operations lowering to custom SME functions. (PR #79263)
Dinar Temirbulatov via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 6 17:39:03 PST 2024
https://github.com/dtemirbulatov updated https://github.com/llvm/llvm-project/pull/79263
>From 2ef16b03a5611701c215b574a38688d1febf42b7 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Wed, 24 Jan 2024 08:14:07 +0000
Subject: [PATCH 1/8] [AArch64][SME] Enable memory operations lowering to
custom SME functions.
This change allows to lower memcpy, memset, memmove to custom SME version
provided by LibRT.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 10 ++-
.../AArch64/AArch64SelectionDAGInfo.cpp | 72 +++++++++++++++++++
.../Target/AArch64/AArch64SelectionDAGInfo.h | 4 ++
.../AArch64/Utils/AArch64SMEAttributes.cpp | 3 +
llvm/test/CodeGen/AArch64/sme2-mops.ll | 67 +++++++++++++++++
5 files changed, 154 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sme2-mops.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f572772d3c9808..6385b341bcf637 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7659,8 +7659,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SMEAttrs CalleeAttrs, CallerAttrs(MF.getFunction());
if (CLI.CB)
CalleeAttrs = SMEAttrs(*CLI.CB);
- else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
- CalleeAttrs = SMEAttrs(ES->getSymbol());
+ else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+ if (StringRef(ES->getSymbol()) == StringRef("__arm_sc_memcpy")) {
+ auto Attrs = AttributeList().addFnAttribute(
+ *DAG.getContext(), "aarch64_pstate_sm_compatible");
+ CalleeAttrs = SMEAttrs(Attrs);
+ } else
+ CalleeAttrs = SMEAttrs(ES->getSymbol());
+ }
auto DescribeCallsite =
[&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 9e43f206efcf78..fff4e2333194e3 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -76,12 +76,74 @@ SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
}
}
+SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, RTLIB::Libcall LC) const {
+ const AArch64Subtarget &STI =
+ DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+ const AArch64TargetLowering *TLI = STI.getTargetLowering();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ enum { SME_MEMCPY = 0, SME_MEMMOVE, SME_MEMSET } SMELibcall;
+ switch (LC) {
+ case RTLIB::MEMCPY:
+ SMELibcall = SME_MEMCPY;
+ Entry.Node = Src;
+ Args.push_back(Entry);
+ break;
+ case RTLIB::MEMMOVE:
+ SMELibcall = SME_MEMMOVE;
+ Entry.Node = Src;
+ Args.push_back(Entry);
+ break;
+ case RTLIB::MEMSET:
+ SMELibcall = SME_MEMSET;
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src);
+ else if (Src.getValueType().bitsLT(MVT::i32))
+ Src = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+ Entry.IsSExt = false;
+ Args.push_back(Entry);
+ break;
+ default:
+ return SDValue();
+ }
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ char const *FunctionNames[3] = {"__arm_sc_memcpy", "__arm_sc_memmove",
+ "__arm_sc_memset"};
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL)
+ .setChain(Chain)
+ .setLibCallee(
+ TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(FunctionNames[SMELibcall],
+ TLI->getPointerTy(DAG.getDataLayout())),
+ std::move(Args))
+ .setDiscardResult();
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+
+ SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
+ if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface())
+ return EmitSpecializedLibcall(DAG, DL, Chain, Dst, Src, Size,
+ RTLIB::MEMCPY);
if (STI.hasMOPS())
return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size,
Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
@@ -95,6 +157,11 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+ SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
+ if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface())
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
+ RTLIB::MEMSET);
+
if (STI.hasMOPS()) {
return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
@@ -108,6 +175,11 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+
+ SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
+ if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface())
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
+ RTLIB::MEMMOVE);
if (STI.hasMOPS()) {
return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size,
Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 73f93724d6fc73..9c55c21f3c3202 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -47,6 +47,10 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo {
SDValue Chain, SDValue Op1, SDValue Op2,
MachinePointerInfo DstPtrInfo,
bool ZeroData) const override;
+
+ SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, RTLIB::Libcall LC) const;
};
}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 3ee54e5df0a13d..5080e4a0b4f9a2 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -51,6 +51,9 @@ SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) {
if (FuncName == "__arm_tpidr2_restore")
Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared |
SMEAttrs::SME_ABI_Routine);
+ if (FuncName == "__arm_sc_memcpy" || FuncName == "__arm_sc_memset" ||
+ FuncName == "__arm_sc_memmove")
+ Bitmask |= SMEAttrs::SM_Compatible;
}
SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
diff --git a/llvm/test/CodeGen/AArch64/sme2-mops.ll b/llvm/test/CodeGen/AArch64/sme2-mops.ll
new file mode 100644
index 00000000000000..0ded6e965ecb9c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-mops.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
+
+ at dst = global [512 x i8] zeroinitializer, align 1
+ at src = global [512 x i8] zeroinitializer, align 1
+
+
+define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: sc_memcpy:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memcpy
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+define void @sc_memset(i64 noundef %n) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: sc_memset:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: mov w1, #2 // =0x2
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: // kill: def $w2 killed $w2 killed $x2
+; CHECK-NEXT: bl __arm_sc_memset
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
+ ret void
+}
+
+define void @sc_memmove(i64 noundef %n) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: sc_memmove:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memmove
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
>From 736bcd7eeb54af03f866329b99f18ee61b9df6b7 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Tue, 30 Jan 2024 09:08:43 +0000
Subject: [PATCH 2/8] Resolved comments
---
.../Target/AArch64/AArch64ISelLowering.cpp | 10 ++------
.../AArch64/AArch64SelectionDAGInfo.cpp | 24 +++++++------------
llvm/test/CodeGen/AArch64/sme2-mops.ll | 1 -
3 files changed, 10 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6385b341bcf637..f572772d3c9808 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7659,14 +7659,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SMEAttrs CalleeAttrs, CallerAttrs(MF.getFunction());
if (CLI.CB)
CalleeAttrs = SMEAttrs(*CLI.CB);
- else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
- if (StringRef(ES->getSymbol()) == StringRef("__arm_sc_memcpy")) {
- auto Attrs = AttributeList().addFnAttribute(
- *DAG.getContext(), "aarch64_pstate_sm_compatible");
- CalleeAttrs = SMEAttrs(Attrs);
- } else
- CalleeAttrs = SMEAttrs(ES->getSymbol());
- }
+ else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
+ CalleeAttrs = SMEAttrs(ES->getSymbol());
auto DescribeCallsite =
[&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index fff4e2333194e3..1c4142e535793c 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -84,28 +84,26 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
const AArch64TargetLowering *TLI = STI.getTargetLowering();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
+ SDValue Symbol;
Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
Entry.Node = Dst;
Args.push_back(Entry);
+ EVT Ty = TLI->getPointerTy(DAG.getDataLayout());
- enum { SME_MEMCPY = 0, SME_MEMMOVE, SME_MEMSET } SMELibcall;
switch (LC) {
case RTLIB::MEMCPY:
- SMELibcall = SME_MEMCPY;
+ Symbol = DAG.getExternalSymbol("__arm_sc_memcpy", Ty);
Entry.Node = Src;
Args.push_back(Entry);
break;
case RTLIB::MEMMOVE:
- SMELibcall = SME_MEMMOVE;
+ Symbol = DAG.getExternalSymbol("__arm_sc_memmove", Ty);
Entry.Node = Src;
Args.push_back(Entry);
break;
case RTLIB::MEMSET:
- SMELibcall = SME_MEMSET;
- if (Src.getValueType().bitsGT(MVT::i32))
- Src = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src);
- else if (Src.getValueType().bitsLT(MVT::i32))
- Src = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
+ Symbol = DAG.getExternalSymbol("__arm_sc_memset", Ty);
+ Src = DAG.getZExtOrTrunc(Src, DL, MVT::i32);
Entry.Node = Src;
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
Entry.IsSExt = false;
@@ -116,18 +114,12 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
}
Entry.Node = Size;
Args.push_back(Entry);
- char const *FunctionNames[3] = {"__arm_sc_memcpy", "__arm_sc_memmove",
- "__arm_sc_memset"};
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL)
.setChain(Chain)
- .setLibCallee(
- TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(FunctionNames[SMELibcall],
- TLI->getPointerTy(DAG.getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()), Symbol, std::move(Args))
.setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
diff --git a/llvm/test/CodeGen/AArch64/sme2-mops.ll b/llvm/test/CodeGen/AArch64/sme2-mops.ll
index 0ded6e965ecb9c..0599bc61a52f73 100644
--- a/llvm/test/CodeGen/AArch64/sme2-mops.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-mops.ll
@@ -4,7 +4,6 @@
@dst = global [512 x i8] zeroinitializer, align 1
@src = global [512 x i8] zeroinitializer, align 1
-
define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: sc_memcpy:
; CHECK: // %bb.0: // %entry
>From 2082ebbe8fc57fb47677c5e316da9a1612646857 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Wed, 31 Jan 2024 09:54:42 +0000
Subject: [PATCH 3/8] Resolved comments.
---
llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 1c4142e535793c..a3386bea28350c 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -116,11 +116,9 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL)
- .setChain(Chain)
- .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*DAG.getContext()), Symbol, std::move(Args))
- .setDiscardResult();
+ CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+ Symbol, std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
>From 30486535eeca6aa1c6bf95ecc4c5cf7daebdc5ba Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Mon, 5 Feb 2024 08:36:14 +0000
Subject: [PATCH 4/8] Resolved comments.
---
.../AArch64/AArch64SelectionDAGInfo.cpp | 26 +-
.../AArch64/Utils/AArch64SMEAttributes.cpp | 2 +-
llvm/test/CodeGen/AArch64/sme2-mops.ll | 491 +++++++++++++++++-
3 files changed, 511 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index a3386bea28350c..d2908ae83e9c0a 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -15,6 +15,12 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64-selectiondag-info"
+static cl::opt<bool>
+ EnableSMEMops("aarch64-enable-sme-mops", cl::Hidden,
+ cl::desc("Enable AArch64 SME memory operations "
+ "to lower to librt functions"),
+ cl::init(true));
+
SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Dst,
@@ -90,6 +96,9 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
Args.push_back(Entry);
EVT Ty = TLI->getPointerTy(DAG.getDataLayout());
+ if (!EnableSMEMops)
+ return SDValue();
+
switch (LC) {
case RTLIB::MEMCPY:
Symbol = DAG.getExternalSymbol("__arm_sc_memcpy", Ty);
@@ -116,9 +125,11 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
- TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
- Symbol, std::move(Args));
+ CLI.setDebugLoc(DL)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()), Symbol, std::move(Args))
+ .setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
@@ -131,7 +142,8 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
- if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface())
+ if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface() ||
+ Attrs.hasStreamingInterface())
return EmitSpecializedLibcall(DAG, DL, Chain, Dst, Src, Size,
RTLIB::MEMCPY);
if (STI.hasMOPS())
@@ -148,7 +160,8 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
- if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface())
+ if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface() ||
+ Attrs.hasStreamingInterface())
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
RTLIB::MEMSET);
@@ -167,7 +180,8 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
- if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface())
+ if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface() ||
+ Attrs.hasStreamingInterface())
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
RTLIB::MEMMOVE);
if (STI.hasMOPS()) {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 5080e4a0b4f9a2..8c01bb3c1f4829 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -52,7 +52,7 @@ SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) {
Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared |
SMEAttrs::SME_ABI_Routine);
if (FuncName == "__arm_sc_memcpy" || FuncName == "__arm_sc_memset" ||
- FuncName == "__arm_sc_memmove")
+ FuncName == "__arm_sc_memmove" || FuncName == "__arm_sc_memchr")
Bitmask |= SMEAttrs::SM_Compatible;
}
diff --git a/llvm/test/CodeGen/AArch64/sme2-mops.ll b/llvm/test/CodeGen/AArch64/sme2-mops.ll
index 0599bc61a52f73..6c3017d076079b 100644
--- a/llvm/test/CodeGen/AArch64/sme2-mops.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-mops.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs -aarch64-enable-sme-mops=false < %s | FileCheck %s -check-prefixes=NO_SME_MOPS
@dst = global [512 x i8] zeroinitializer, align 1
@src = global [512 x i8] zeroinitializer, align 1
@@ -18,6 +19,48 @@ define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: bl __arm_sc_memcpy
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: sc_memcpy:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT: .cfi_offset w19, -8
+; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: bl __arm_sme_state
+; NO_SME_MOPS-NEXT: adrp x8, :got:dst
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: and x19, x0, #0x1
+; NO_SME_MOPS-NEXT: ldr x8, [x8, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB0_2
+; NO_SME_MOPS-NEXT: // %bb.1: // %entry
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: .LBB0_2: // %entry
+; NO_SME_MOPS-NEXT: mov x0, x8
+; NO_SME_MOPS-NEXT: bl memcpy
+; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB0_4
+; NO_SME_MOPS-NEXT: // %bb.3: // %entry
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: .LBB0_4: // %entry
+; NO_SME_MOPS-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
ret void
@@ -37,6 +80,46 @@ define void @sc_memset(i64 noundef %n) "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: bl __arm_sc_memset
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: sc_memset:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT: .cfi_offset w19, -8
+; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: bl __arm_sme_state
+; NO_SME_MOPS-NEXT: and x19, x0, #0x1
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB1_2
+; NO_SME_MOPS-NEXT: // %bb.1: // %entry
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: .LBB1_2: // %entry
+; NO_SME_MOPS-NEXT: mov w1, #2 // =0x2
+; NO_SME_MOPS-NEXT: bl memset
+; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB1_4
+; NO_SME_MOPS-NEXT: // %bb.3: // %entry
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: .LBB1_4: // %entry
+; NO_SME_MOPS-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
entry:
tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
ret void
@@ -56,6 +139,412 @@ define void @sc_memmove(i64 noundef %n) "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: bl __arm_sc_memmove
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: sc_memmove:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT: .cfi_offset w19, -8
+; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: bl __arm_sme_state
+; NO_SME_MOPS-NEXT: adrp x8, :got:dst
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: and x19, x0, #0x1
+; NO_SME_MOPS-NEXT: ldr x8, [x8, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB2_2
+; NO_SME_MOPS-NEXT: // %bb.1: // %entry
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: .LBB2_2: // %entry
+; NO_SME_MOPS-NEXT: mov x0, x8
+; NO_SME_MOPS-NEXT: bl memmove
+; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB2_4
+; NO_SME_MOPS-NEXT: // %bb.3: // %entry
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: .LBB2_4: // %entry
+; NO_SME_MOPS-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+define void @se_memcpy(i64 noundef %n) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: se_memcpy:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memcpy
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: se_memcpy:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: bl memcpy
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: se_memset:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: mov w1, #2 // =0x2
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: // kill: def $w2 killed $w2 killed $x2
+; CHECK-NEXT: bl __arm_sc_memset
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: se_memset:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: mov w1, #2 // =0x2
+; NO_SME_MOPS-NEXT: bl memset
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
+ ret void
+}
+
+define void @se_memmove(i64 noundef %n) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: se_memmove:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memmove
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: se_memmove:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: bl memmove
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sb_memcpy:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memcpy
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: sb_memcpy:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: bl memcpy
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+define void @sb_memset(i64 noundef %n) "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sb_memset:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: mov w1, #2 // =0x2
+; CHECK-NEXT: // kill: def $w2 killed $w2 killed $x2
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: bl __arm_sc_memset
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: sb_memset:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: mov w1, #2 // =0x2
+; NO_SME_MOPS-NEXT: bl memset
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
+ ret void
+}
+
+define void @sb_memmove(i64 noundef %n) "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sb_memmove:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memmove
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: sb_memmove:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
+; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
+; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
+; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
+; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
+; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
+; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
+; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
+; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
+; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: bl memmove
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
entry:
tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
ret void
>From 6659b58e27d4267332bbb94b25e52df6c489c254 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Mon, 5 Feb 2024 16:41:33 +0000
Subject: [PATCH 5/8] Removed accidently restored setDiscardResult() for
lowering call, fix issue with incorrect type initialization for Size
argument.
---
llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 10 ++++------
llvm/test/CodeGen/AArch64/sme2-mops.ll | 3 ---
2 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index d2908ae83e9c0a..a04d83f69b5cf4 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -115,21 +115,19 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
Src = DAG.getZExtOrTrunc(Src, DL, MVT::i32);
Entry.Node = Src;
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.IsSExt = false;
Args.push_back(Entry);
break;
default:
return SDValue();
}
Entry.Node = Size;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL)
- .setChain(Chain)
- .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*DAG.getContext()), Symbol, std::move(Args))
- .setDiscardResult();
+ CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
+ TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()), Symbol, std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
diff --git a/llvm/test/CodeGen/AArch64/sme2-mops.ll b/llvm/test/CodeGen/AArch64/sme2-mops.ll
index 6c3017d076079b..dda509993f4807 100644
--- a/llvm/test/CodeGen/AArch64/sme2-mops.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-mops.ll
@@ -76,7 +76,6 @@ define void @sc_memset(i64 noundef %n) "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: adrp x0, :got:dst
; CHECK-NEXT: mov w1, #2 // =0x2
; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: // kill: def $w2 killed $w2 killed $x2
; CHECK-NEXT: bl __arm_sc_memset
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -247,7 +246,6 @@ define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" {
; CHECK-NEXT: adrp x0, :got:dst
; CHECK-NEXT: mov w1, #2 // =0x2
; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: // kill: def $w2 killed $w2 killed $x2
; CHECK-NEXT: bl __arm_sc_memset
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -431,7 +429,6 @@ define void @sb_memset(i64 noundef %n) "aarch64_pstate_sm_body" {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: adrp x0, :got:dst
; CHECK-NEXT: mov w1, #2 // =0x2
-; CHECK-NEXT: // kill: def $w2 killed $w2 killed $x2
; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
; CHECK-NEXT: bl __arm_sc_memset
; CHECK-NEXT: smstop sm
>From 6081b6831074ac79ec7bd540c51cc00926fd2d30 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Mon, 5 Feb 2024 21:56:11 +0000
Subject: [PATCH 6/8] Restore change, accidently removed before.
---
llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index a04d83f69b5cf4..a3658b376f5f83 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -126,8 +126,8 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
- TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*DAG.getContext()), Symbol, std::move(Args));
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+ Symbol, std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
>From 137a7e76a57c517b268090da9cfbdc0687eab1b1 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 29 Feb 2024 16:53:47 +0000
Subject: [PATCH 7/8] Resolved comments.
---
.../AArch64/AArch64SelectionDAGInfo.cpp | 73 +--
llvm/test/CodeGen/AArch64/sme2-mops.ll | 552 ------------------
.../streaming-compatible-memory-ops.ll | 289 +++++++++
3 files changed, 329 insertions(+), 585 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/sme2-mops.ll
create mode 100644 llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index a3658b376f5f83..a8cefc3a72ed5e 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -16,10 +16,10 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64-selectiondag-info"
static cl::opt<bool>
- EnableSMEMops("aarch64-enable-sme-mops", cl::Hidden,
- cl::desc("Enable AArch64 SME memory operations "
- "to lower to librt functions"),
- cl::init(true));
+ LowerToSMERoutines("aarch64-lower-to-sme-routines", cl::Hidden,
+ cl::desc("Enable AArch64 SME memory operations "
+ "to lower to librt functions"),
+ cl::init(true));
SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
SelectionDAG &DAG, const SDLoc &DL,
@@ -89,40 +89,50 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
const AArch64TargetLowering *TLI = STI.getTargetLowering();
TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
+ TargetLowering::ArgListEntry DstEntry;
SDValue Symbol;
- Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
- Entry.Node = Dst;
- Args.push_back(Entry);
+ DstEntry.Ty = PointerType::getUnqual(*DAG.getContext());
+ DstEntry.Node = Dst;
+ Args.push_back(DstEntry);
EVT Ty = TLI->getPointerTy(DAG.getDataLayout());
- if (!EnableSMEMops)
+ if (!LowerToSMERoutines)
return SDValue();
switch (LC) {
- case RTLIB::MEMCPY:
+ case RTLIB::MEMCPY: {
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = PointerType::getUnqual(*DAG.getContext());
Symbol = DAG.getExternalSymbol("__arm_sc_memcpy", Ty);
Entry.Node = Src;
Args.push_back(Entry);
break;
- case RTLIB::MEMMOVE:
+ }
+ case RTLIB::MEMMOVE: {
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = PointerType::getUnqual(*DAG.getContext());
Symbol = DAG.getExternalSymbol("__arm_sc_memmove", Ty);
Entry.Node = Src;
Args.push_back(Entry);
break;
- case RTLIB::MEMSET:
+ }
+ case RTLIB::MEMSET: {
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = PointerType::getUnqual(*DAG.getContext());
Symbol = DAG.getExternalSymbol("__arm_sc_memset", Ty);
Src = DAG.getZExtOrTrunc(Src, DL, MVT::i32);
Entry.Node = Src;
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
break;
+ }
default:
return SDValue();
}
- Entry.Node = Size;
- Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
- Args.push_back(Entry);
+ TargetLowering::ArgListEntry SizeEntry;
+ SizeEntry.Node = Size;
+ SizeEntry.Ty = PointerType::getUnqual(*DAG.getContext());
+ Args.push_back(SizeEntry);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
@@ -139,14 +149,14 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
- SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
- if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface() ||
- Attrs.hasStreamingInterface())
- return EmitSpecializedLibcall(DAG, DL, Chain, Dst, Src, Size,
- RTLIB::MEMCPY);
if (STI.hasMOPS())
return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size,
Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
+
+ SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
+ if (!Attrs.hasNonStreamingInterfaceAndBody())
+ return EmitSpecializedLibcall(DAG, DL, Chain, Dst, Src, Size,
+ RTLIB::MEMCPY);
return SDValue();
}
@@ -157,16 +167,14 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+ if (STI.hasMOPS())
+ return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
+ Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
+
SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
- if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface() ||
- Attrs.hasStreamingInterface())
+ if (!Attrs.hasNonStreamingInterfaceAndBody())
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
RTLIB::MEMSET);
-
- if (STI.hasMOPS()) {
- return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
- Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
- }
return SDValue();
}
@@ -177,15 +185,14 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+ if (STI.hasMOPS())
+ return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size,
+ Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
+
SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
- if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface() ||
- Attrs.hasStreamingInterface())
+ if (!Attrs.hasNonStreamingInterfaceAndBody())
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
RTLIB::MEMMOVE);
- if (STI.hasMOPS()) {
- return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size,
- Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
- }
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/sme2-mops.ll b/llvm/test/CodeGen/AArch64/sme2-mops.ll
deleted file mode 100644
index dda509993f4807..00000000000000
--- a/llvm/test/CodeGen/AArch64/sme2-mops.ll
+++ /dev/null
@@ -1,552 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs -aarch64-enable-sme-mops=false < %s | FileCheck %s -check-prefixes=NO_SME_MOPS
-
- at dst = global [512 x i8] zeroinitializer, align 1
- at src = global [512 x i8] zeroinitializer, align 1
-
-define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" {
-; CHECK-LABEL: sc_memcpy:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: adrp x0, :got:dst
-; CHECK-NEXT: adrp x1, :got:src
-; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
-; CHECK-NEXT: bl __arm_sc_memcpy
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO_SME_MOPS-LABEL: sc_memcpy:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
-; NO_SME_MOPS-NEXT: .cfi_offset w19, -8
-; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
-; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
-; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
-; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
-; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
-; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
-; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
-; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
-; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: bl __arm_sme_state
-; NO_SME_MOPS-NEXT: adrp x8, :got:dst
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: and x19, x0, #0x1
-; NO_SME_MOPS-NEXT: ldr x8, [x8, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB0_2
-; NO_SME_MOPS-NEXT: // %bb.1: // %entry
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: .LBB0_2: // %entry
-; NO_SME_MOPS-NEXT: mov x0, x8
-; NO_SME_MOPS-NEXT: bl memcpy
-; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB0_4
-; NO_SME_MOPS-NEXT: // %bb.3: // %entry
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: .LBB0_4: // %entry
-; NO_SME_MOPS-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
-entry:
- tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
- ret void
-}
-
-define void @sc_memset(i64 noundef %n) "aarch64_pstate_sm_compatible" {
-; CHECK-LABEL: sc_memset:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: adrp x0, :got:dst
-; CHECK-NEXT: mov w1, #2 // =0x2
-; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: bl __arm_sc_memset
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO_SME_MOPS-LABEL: sc_memset:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
-; NO_SME_MOPS-NEXT: .cfi_offset w19, -8
-; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
-; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
-; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
-; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
-; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
-; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
-; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
-; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
-; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: bl __arm_sme_state
-; NO_SME_MOPS-NEXT: and x19, x0, #0x1
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB1_2
-; NO_SME_MOPS-NEXT: // %bb.1: // %entry
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: .LBB1_2: // %entry
-; NO_SME_MOPS-NEXT: mov w1, #2 // =0x2
-; NO_SME_MOPS-NEXT: bl memset
-; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB1_4
-; NO_SME_MOPS-NEXT: // %bb.3: // %entry
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: .LBB1_4: // %entry
-; NO_SME_MOPS-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
-entry:
- tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
- ret void
-}
-
-define void @sc_memmove(i64 noundef %n) "aarch64_pstate_sm_compatible" {
-; CHECK-LABEL: sc_memmove:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: adrp x0, :got:dst
-; CHECK-NEXT: adrp x1, :got:src
-; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
-; CHECK-NEXT: bl __arm_sc_memmove
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO_SME_MOPS-LABEL: sc_memmove:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
-; NO_SME_MOPS-NEXT: .cfi_offset w19, -8
-; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
-; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
-; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
-; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
-; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
-; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
-; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
-; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
-; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: bl __arm_sme_state
-; NO_SME_MOPS-NEXT: adrp x8, :got:dst
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: and x19, x0, #0x1
-; NO_SME_MOPS-NEXT: ldr x8, [x8, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB2_2
-; NO_SME_MOPS-NEXT: // %bb.1: // %entry
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: .LBB2_2: // %entry
-; NO_SME_MOPS-NEXT: mov x0, x8
-; NO_SME_MOPS-NEXT: bl memmove
-; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB2_4
-; NO_SME_MOPS-NEXT: // %bb.3: // %entry
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: .LBB2_4: // %entry
-; NO_SME_MOPS-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
-entry:
- tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
- ret void
-}
-
-define void @se_memcpy(i64 noundef %n) "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: se_memcpy:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: adrp x0, :got:dst
-; CHECK-NEXT: adrp x1, :got:src
-; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
-; CHECK-NEXT: bl __arm_sc_memcpy
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO_SME_MOPS-LABEL: se_memcpy:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
-; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
-; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
-; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
-; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
-; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
-; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
-; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
-; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
-; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: bl memcpy
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
-entry:
- tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
- ret void
-}
-
-define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: se_memset:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: adrp x0, :got:dst
-; CHECK-NEXT: mov w1, #2 // =0x2
-; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: bl __arm_sc_memset
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO_SME_MOPS-LABEL: se_memset:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
-; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
-; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
-; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
-; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
-; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
-; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
-; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
-; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
-; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: mov w1, #2 // =0x2
-; NO_SME_MOPS-NEXT: bl memset
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
-entry:
- tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
- ret void
-}
-
-define void @se_memmove(i64 noundef %n) "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: se_memmove:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: adrp x0, :got:dst
-; CHECK-NEXT: adrp x1, :got:src
-; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
-; CHECK-NEXT: bl __arm_sc_memmove
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO_SME_MOPS-LABEL: se_memmove:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
-; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
-; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
-; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
-; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
-; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
-; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
-; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
-; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
-; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: bl memmove
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
-entry:
- tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
- ret void
-}
-
-define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" {
-; CHECK-LABEL: sb_memcpy:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_offset b8, -24
-; CHECK-NEXT: .cfi_offset b9, -32
-; CHECK-NEXT: .cfi_offset b10, -40
-; CHECK-NEXT: .cfi_offset b11, -48
-; CHECK-NEXT: .cfi_offset b12, -56
-; CHECK-NEXT: .cfi_offset b13, -64
-; CHECK-NEXT: .cfi_offset b14, -72
-; CHECK-NEXT: .cfi_offset b15, -80
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: smstart sm
-; CHECK-NEXT: adrp x0, :got:dst
-; CHECK-NEXT: adrp x1, :got:src
-; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
-; CHECK-NEXT: bl __arm_sc_memcpy
-; CHECK-NEXT: smstop sm
-; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO_SME_MOPS-LABEL: sb_memcpy:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
-; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
-; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
-; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
-; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
-; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
-; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
-; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
-; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
-; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: bl memcpy
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
-entry:
- tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
- ret void
-}
-
-define void @sb_memset(i64 noundef %n) "aarch64_pstate_sm_body" {
-; CHECK-LABEL: sb_memset:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_offset b8, -24
-; CHECK-NEXT: .cfi_offset b9, -32
-; CHECK-NEXT: .cfi_offset b10, -40
-; CHECK-NEXT: .cfi_offset b11, -48
-; CHECK-NEXT: .cfi_offset b12, -56
-; CHECK-NEXT: .cfi_offset b13, -64
-; CHECK-NEXT: .cfi_offset b14, -72
-; CHECK-NEXT: .cfi_offset b15, -80
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: smstart sm
-; CHECK-NEXT: adrp x0, :got:dst
-; CHECK-NEXT: mov w1, #2 // =0x2
-; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: bl __arm_sc_memset
-; CHECK-NEXT: smstop sm
-; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO_SME_MOPS-LABEL: sb_memset:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
-; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
-; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
-; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
-; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
-; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
-; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
-; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
-; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
-; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: mov w1, #2 // =0x2
-; NO_SME_MOPS-NEXT: bl memset
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
-entry:
- tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
- ret void
-}
-
-define void @sb_memmove(i64 noundef %n) "aarch64_pstate_sm_body" {
-; CHECK-LABEL: sb_memmove:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_offset b8, -24
-; CHECK-NEXT: .cfi_offset b9, -32
-; CHECK-NEXT: .cfi_offset b10, -40
-; CHECK-NEXT: .cfi_offset b11, -48
-; CHECK-NEXT: .cfi_offset b12, -56
-; CHECK-NEXT: .cfi_offset b13, -64
-; CHECK-NEXT: .cfi_offset b14, -72
-; CHECK-NEXT: .cfi_offset b15, -80
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: smstart sm
-; CHECK-NEXT: adrp x0, :got:dst
-; CHECK-NEXT: adrp x1, :got:src
-; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
-; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
-; CHECK-NEXT: bl __arm_sc_memmove
-; CHECK-NEXT: smstop sm
-; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO_SME_MOPS-LABEL: sb_memmove:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: .cfi_def_cfa_offset 80
-; NO_SME_MOPS-NEXT: .cfi_offset w30, -16
-; NO_SME_MOPS-NEXT: .cfi_offset b8, -24
-; NO_SME_MOPS-NEXT: .cfi_offset b9, -32
-; NO_SME_MOPS-NEXT: .cfi_offset b10, -40
-; NO_SME_MOPS-NEXT: .cfi_offset b11, -48
-; NO_SME_MOPS-NEXT: .cfi_offset b12, -56
-; NO_SME_MOPS-NEXT: .cfi_offset b13, -64
-; NO_SME_MOPS-NEXT: .cfi_offset b14, -72
-; NO_SME_MOPS-NEXT: .cfi_offset b15, -80
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: bl memmove
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
-entry:
- tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
- ret void
-}
-
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
-declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
-declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
new file mode 100644
index 00000000000000..f2258bafd6134d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
@@ -0,0 +1,289 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=NO_SME_MOPS
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK_MOPS
+
+ at dst = global [512 x i8] zeroinitializer, align 1
+ at src = global [512 x i8] zeroinitializer, align 1
+
+define void @se_memcpy(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind {
+; CHECK-LABEL: se_memcpy:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memcpy
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: se_memcpy:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: bl memcpy
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+;
+; CHECK_MOPS-LABEL: se_memcpy:
+; CHECK_MOPS: // %bb.0: // %entry
+; CHECK_MOPS-NEXT: adrp x8, :got:src
+; CHECK_MOPS-NEXT: adrp x9, :got:dst
+; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
+; CHECK_MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
+; CHECK_MOPS-NEXT: cpyfp [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: cpyfm [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: cpyfe [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind {
+; CHECK-LABEL: se_memset:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: mov w1, #2 // =0x2
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: bl __arm_sc_memset
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: se_memset:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: mov w1, #2 // =0x2
+; NO_SME_MOPS-NEXT: bl memset
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+;
+; CHECK_MOPS-LABEL: se_memset:
+; CHECK_MOPS: // %bb.0: // %entry
+; CHECK_MOPS-NEXT: adrp x8, :got:dst
+; CHECK_MOPS-NEXT: mov w9, #2 // =0x2
+; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:dst]
+; CHECK_MOPS-NEXT: setp [x8]!, x0!, x9
+; CHECK_MOPS-NEXT: setm [x8]!, x0!, x9
+; CHECK_MOPS-NEXT: sete [x8]!, x0!, x9
+; CHECK_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
+ ret void
+}
+
+define void @se_memmove(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind {
+; CHECK-LABEL: se_memmove:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memmove
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: se_memmove:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: bl memmove
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+;
+; CHECK_MOPS-LABEL: se_memmove:
+; CHECK_MOPS: // %bb.0: // %entry
+; CHECK_MOPS-NEXT: adrp x8, :got:src
+; CHECK_MOPS-NEXT: adrp x9, :got:dst
+; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
+; CHECK_MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
+; CHECK_MOPS-NEXT: cpyp [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: cpym [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: cpye [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" nounwind {
+; CHECK-LABEL: sc_memcpy:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memcpy
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: sc_memcpy:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: bl __arm_sme_state
+; NO_SME_MOPS-NEXT: adrp x8, :got:dst
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: and x19, x0, #0x1
+; NO_SME_MOPS-NEXT: ldr x8, [x8, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB3_2
+; NO_SME_MOPS-NEXT: // %bb.1: // %entry
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: .LBB3_2: // %entry
+; NO_SME_MOPS-NEXT: mov x0, x8
+; NO_SME_MOPS-NEXT: bl memcpy
+; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB3_4
+; NO_SME_MOPS-NEXT: // %bb.3: // %entry
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: .LBB3_4: // %entry
+; NO_SME_MOPS-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+;
+; CHECK_MOPS-LABEL: sc_memcpy:
+; CHECK_MOPS: // %bb.0: // %entry
+; CHECK_MOPS-NEXT: adrp x8, :got:src
+; CHECK_MOPS-NEXT: adrp x9, :got:dst
+; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
+; CHECK_MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
+; CHECK_MOPS-NEXT: cpyfp [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: cpyfm [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: cpyfe [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" nounwind {
+; CHECK-LABEL: sb_memcpy:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: adrp x0, :got:dst
+; CHECK-NEXT: adrp x1, :got:src
+; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NEXT: bl __arm_sc_memcpy
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO_SME_MOPS-LABEL: sb_memcpy:
+; NO_SME_MOPS: // %bb.0: // %entry
+; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: mov x2, x0
+; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: adrp x0, :got:dst
+; NO_SME_MOPS-NEXT: adrp x1, :got:src
+; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
+; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: bl memcpy
+; NO_SME_MOPS-NEXT: smstart sm
+; NO_SME_MOPS-NEXT: smstop sm
+; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; NO_SME_MOPS-NEXT: ret
+;
+; CHECK_MOPS-LABEL: sb_memcpy:
+; CHECK_MOPS: // %bb.0: // %entry
+; CHECK_MOPS-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK_MOPS-NEXT: smstart sm
+; CHECK_MOPS-NEXT: adrp x8, :got:src
+; CHECK_MOPS-NEXT: adrp x9, :got:dst
+; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
+; CHECK_MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
+; CHECK_MOPS-NEXT: cpyfp [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: cpyfm [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: cpyfe [x9]!, [x8]!, x0!
+; CHECK_MOPS-NEXT: smstop sm
+; CHECK_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK_MOPS-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
+; CHECK_MOPS-NEXT: ret
+entry:
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
>From 5de67c85902d7f7979e6fbabe868ce79fe625d77 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Mon, 4 Mar 2024 02:30:09 +0000
Subject: [PATCH 8/8] Resolved comments.
---
.../AArch64/AArch64SelectionDAGInfo.cpp | 12 +-
.../streaming-compatible-memory-ops.ll | 352 +++++++++---------
2 files changed, 184 insertions(+), 180 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index a8cefc3a72ed5e..7cf3b8f3a2be4a 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -95,6 +95,7 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
DstEntry.Node = Dst;
Args.push_back(DstEntry);
EVT Ty = TLI->getPointerTy(DAG.getDataLayout());
+ PointerType *RetTy;
if (!LowerToSMERoutines)
return SDValue();
@@ -106,6 +107,7 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
Symbol = DAG.getExternalSymbol("__arm_sc_memcpy", Ty);
Entry.Node = Src;
Args.push_back(Entry);
+ RetTy = PointerType::getUnqual(*DAG.getContext());
break;
}
case RTLIB::MEMMOVE: {
@@ -114,16 +116,17 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
Symbol = DAG.getExternalSymbol("__arm_sc_memmove", Ty);
Entry.Node = Src;
Args.push_back(Entry);
+ RetTy = PointerType::getUnqual(*DAG.getContext());
break;
}
case RTLIB::MEMSET: {
TargetLowering::ArgListEntry Entry;
- Entry.Ty = PointerType::getUnqual(*DAG.getContext());
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
Symbol = DAG.getExternalSymbol("__arm_sc_memset", Ty);
Src = DAG.getZExtOrTrunc(Src, DL, MVT::i32);
Entry.Node = Src;
- Entry.Ty = Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
+ RetTy = PointerType::getUnqual(*DAG.getContext());
break;
}
default:
@@ -133,11 +136,12 @@ SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
SizeEntry.Node = Size;
SizeEntry.Ty = PointerType::getUnqual(*DAG.getContext());
Args.push_back(SizeEntry);
+ assert(Symbol->getOpcode() == ISD::ExternalSymbol &&
+ "Function name is not set");
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
- TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
- Symbol, std::move(Args));
+ TLI->getLibcallCallingConv(LC), RetTy, Symbol, std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
index f2258bafd6134d..c39894c27d9d4d 100644
--- a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
+++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=NO_SME_MOPS
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK_MOPS
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-NO-SME-ROUTINES
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MOPS
@dst = global [512 x i8] zeroinitializer, align 1
@src = global [512 x i8] zeroinitializer, align 1
@@ -19,38 +19,38 @@ define void @se_memcpy(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind {
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
;
-; NO_SME_MOPS-LABEL: se_memcpy:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: bl memcpy
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
+; CHECK-NO-SME-ROUTINES-LABEL: se_memcpy:
+; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst
+; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src
+; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
+; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy
+; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ret
;
-; CHECK_MOPS-LABEL: se_memcpy:
-; CHECK_MOPS: // %bb.0: // %entry
-; CHECK_MOPS-NEXT: adrp x8, :got:src
-; CHECK_MOPS-NEXT: adrp x9, :got:dst
-; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
-; CHECK_MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
-; CHECK_MOPS-NEXT: cpyfp [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: cpyfm [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: cpyfe [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: ret
+; CHECK-MOPS-LABEL: se_memcpy:
+; CHECK-MOPS: // %bb.0: // %entry
+; CHECK-MOPS-NEXT: adrp x8, :got:src
+; CHECK-MOPS-NEXT: adrp x9, :got:dst
+; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
+; CHECK-MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
+; CHECK-MOPS-NEXT: cpyfp [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: cpyfm [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: cpyfe [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
ret void
@@ -68,36 +68,36 @@ define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind {
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
;
-; NO_SME_MOPS-LABEL: se_memset:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: mov w1, #2 // =0x2
-; NO_SME_MOPS-NEXT: bl memset
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
+; CHECK-NO-SME-ROUTINES-LABEL: se_memset:
+; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst
+; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
+; CHECK-NO-SME-ROUTINES-NEXT: mov w1, #2 // =0x2
+; CHECK-NO-SME-ROUTINES-NEXT: bl memset
+; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ret
;
-; CHECK_MOPS-LABEL: se_memset:
-; CHECK_MOPS: // %bb.0: // %entry
-; CHECK_MOPS-NEXT: adrp x8, :got:dst
-; CHECK_MOPS-NEXT: mov w9, #2 // =0x2
-; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:dst]
-; CHECK_MOPS-NEXT: setp [x8]!, x0!, x9
-; CHECK_MOPS-NEXT: setm [x8]!, x0!, x9
-; CHECK_MOPS-NEXT: sete [x8]!, x0!, x9
-; CHECK_MOPS-NEXT: ret
+; CHECK-MOPS-LABEL: se_memset:
+; CHECK-MOPS: // %bb.0: // %entry
+; CHECK-MOPS-NEXT: adrp x8, :got:dst
+; CHECK-MOPS-NEXT: mov w9, #2 // =0x2
+; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:dst]
+; CHECK-MOPS-NEXT: setp [x8]!, x0!, x9
+; CHECK-MOPS-NEXT: setm [x8]!, x0!, x9
+; CHECK-MOPS-NEXT: sete [x8]!, x0!, x9
+; CHECK-MOPS-NEXT: ret
entry:
tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
ret void
@@ -116,38 +116,38 @@ define void @se_memmove(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind {
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
;
-; NO_SME_MOPS-LABEL: se_memmove:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: bl memmove
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
+; CHECK-NO-SME-ROUTINES-LABEL: se_memmove:
+; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst
+; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src
+; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
+; CHECK-NO-SME-ROUTINES-NEXT: bl memmove
+; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ret
;
-; CHECK_MOPS-LABEL: se_memmove:
-; CHECK_MOPS: // %bb.0: // %entry
-; CHECK_MOPS-NEXT: adrp x8, :got:src
-; CHECK_MOPS-NEXT: adrp x9, :got:dst
-; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
-; CHECK_MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
-; CHECK_MOPS-NEXT: cpyp [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: cpym [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: cpye [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: ret
+; CHECK-MOPS-LABEL: se_memmove:
+; CHECK-MOPS: // %bb.0: // %entry
+; CHECK-MOPS-NEXT: adrp x8, :got:src
+; CHECK-MOPS-NEXT: adrp x9, :got:dst
+; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
+; CHECK-MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
+; CHECK-MOPS-NEXT: cpyp [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: cpym [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: cpye [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: ret
entry:
tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
ret void
@@ -166,47 +166,47 @@ define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" nounwind {
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
;
-; NO_SME_MOPS-LABEL: sc_memcpy:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: bl __arm_sme_state
-; NO_SME_MOPS-NEXT: adrp x8, :got:dst
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: and x19, x0, #0x1
-; NO_SME_MOPS-NEXT: ldr x8, [x8, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB3_2
-; NO_SME_MOPS-NEXT: // %bb.1: // %entry
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: .LBB3_2: // %entry
-; NO_SME_MOPS-NEXT: mov x0, x8
-; NO_SME_MOPS-NEXT: bl memcpy
-; NO_SME_MOPS-NEXT: tbz w19, #0, .LBB3_4
-; NO_SME_MOPS-NEXT: // %bb.3: // %entry
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: .LBB3_4: // %entry
-; NO_SME_MOPS-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
+; CHECK-NO-SME-ROUTINES-LABEL: sc_memcpy:
+; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0
+; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: bl __arm_sme_state
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x8, :got:dst
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src
+; CHECK-NO-SME-ROUTINES-NEXT: and x19, x0, #0x1
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x8, [x8, :got_lo12:dst]
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_2
+; CHECK-NO-SME-ROUTINES-NEXT: // %bb.1: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
+; CHECK-NO-SME-ROUTINES-NEXT: .LBB3_2: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: mov x0, x8
+; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy
+; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_4
+; CHECK-NO-SME-ROUTINES-NEXT: // %bb.3: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
+; CHECK-NO-SME-ROUTINES-NEXT: .LBB3_4: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ret
;
-; CHECK_MOPS-LABEL: sc_memcpy:
-; CHECK_MOPS: // %bb.0: // %entry
-; CHECK_MOPS-NEXT: adrp x8, :got:src
-; CHECK_MOPS-NEXT: adrp x9, :got:dst
-; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
-; CHECK_MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
-; CHECK_MOPS-NEXT: cpyfp [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: cpyfm [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: cpyfe [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: ret
+; CHECK-MOPS-LABEL: sc_memcpy:
+; CHECK-MOPS: // %bb.0: // %entry
+; CHECK-MOPS-NEXT: adrp x8, :got:src
+; CHECK-MOPS-NEXT: adrp x9, :got:dst
+; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
+; CHECK-MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
+; CHECK-MOPS-NEXT: cpyfp [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: cpyfm [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: cpyfe [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
ret void
@@ -235,50 +235,50 @@ define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" nounwind {
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
-; NO_SME_MOPS-LABEL: sb_memcpy:
-; NO_SME_MOPS: // %bb.0: // %entry
-; NO_SME_MOPS-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: mov x2, x0
-; NO_SME_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; NO_SME_MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: adrp x0, :got:dst
-; NO_SME_MOPS-NEXT: adrp x1, :got:src
-; NO_SME_MOPS-NEXT: ldr x0, [x0, :got_lo12:dst]
-; NO_SME_MOPS-NEXT: ldr x1, [x1, :got_lo12:src]
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: bl memcpy
-; NO_SME_MOPS-NEXT: smstart sm
-; NO_SME_MOPS-NEXT: smstop sm
-; NO_SME_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
-; NO_SME_MOPS-NEXT: ret
+; CHECK-NO-SME-ROUTINES-LABEL: sb_memcpy:
+; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0
+; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst]
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src]
+; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
+; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy
+; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
+; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ret
;
-; CHECK_MOPS-LABEL: sb_memcpy:
-; CHECK_MOPS: // %bb.0: // %entry
-; CHECK_MOPS-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
-; CHECK_MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK_MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK_MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK_MOPS-NEXT: smstart sm
-; CHECK_MOPS-NEXT: adrp x8, :got:src
-; CHECK_MOPS-NEXT: adrp x9, :got:dst
-; CHECK_MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
-; CHECK_MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
-; CHECK_MOPS-NEXT: cpyfp [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: cpyfm [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: cpyfe [x9]!, [x8]!, x0!
-; CHECK_MOPS-NEXT: smstop sm
-; CHECK_MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK_MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK_MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK_MOPS-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
-; CHECK_MOPS-NEXT: ret
+; CHECK-MOPS-LABEL: sb_memcpy:
+; CHECK-MOPS: // %bb.0: // %entry
+; CHECK-MOPS-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-MOPS-NEXT: smstart sm
+; CHECK-MOPS-NEXT: adrp x8, :got:src
+; CHECK-MOPS-NEXT: adrp x9, :got:dst
+; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:src]
+; CHECK-MOPS-NEXT: ldr x9, [x9, :got_lo12:dst]
+; CHECK-MOPS-NEXT: cpyfp [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: cpyfm [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: cpyfe [x9]!, [x8]!, x0!
+; CHECK-MOPS-NEXT: smstop sm
+; CHECK-MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-MOPS-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
+; CHECK-MOPS-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
ret void
More information about the llvm-commits
mailing list