[llvm] 5aa08bf - [AArch64][SelectionDAG] CodeGen for Armv8.8/9.3 MOPS

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 31 12:56:35 PST 2022


Author: tyb0807
Date: 2022-01-31T20:56:27Z
New Revision: 5aa08bf70854fc1fb2d595ce12813fe5f40494dc

URL: https://github.com/llvm/llvm-project/commit/5aa08bf70854fc1fb2d595ce12813fe5f40494dc
DIFF: https://github.com/llvm/llvm-project/commit/5aa08bf70854fc1fb2d595ce12813fe5f40494dc.diff

LOG: [AArch64][SelectionDAG] CodeGen for Armv8.8/9.3 MOPS

New target SDNodes are added: AArch64ISD::MOPS_MEMSET, etc.
Each intrinsic is translated to one of these in SelectionDAGBuilder
via EmitTargetCodeForMOPS.

A custom lowering routine for INTRINSIC_W_CHAIN is added to handle
llvm.aarch64.mops.memset.tag. This takes a separate path from the common
intrinsics but ultimately ends up in the same EmitMOPS().

This is part 4/4 of a series of patches split from
https://reviews.llvm.org/D117405 to facilitate reviewing.

Patch by Tomas Matheson, Lucas Prates and Son Tuan Vu.

Differential Revision: https://reviews.llvm.org/D117764

Added: 
    llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
    llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
    llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
    llvm/test/CodeGen/AArch64/aarch64-mops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 944145dd7ad6..e943505f8df1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29,6 +29,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ObjCARCUtil.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/Analysis.h"
@@ -938,19 +939,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   // In case of strict alignment, avoid an excessive number of byte wide stores.
   MaxStoresPerMemsetOptSize = 8;
-  MaxStoresPerMemset = Subtarget->requiresStrictAlign()
-                       ? MaxStoresPerMemsetOptSize : 32;
+  MaxStoresPerMemset =
+      Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
 
   MaxGluedStoresPerMemcpy = 4;
   MaxStoresPerMemcpyOptSize = 4;
-  MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
-                       ? MaxStoresPerMemcpyOptSize : 16;
+  MaxStoresPerMemcpy =
+      Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
 
-  MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
+  MaxStoresPerMemmoveOptSize = 4;
+  MaxStoresPerMemmove = 4;
 
   MaxLoadsPerMemcmpOptSize = 4;
-  MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
-                      ? MaxLoadsPerMemcmpOptSize : 8;
+  MaxLoadsPerMemcmp =
+      Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
 
   setStackPointerRegisterToSaveRestore(AArch64::SP);
 
@@ -1426,6 +1428,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
   }
 
+  if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
+    // Only required for llvm.aarch64.mops.memset.tag
+    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
+  }
+
   PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
 }
 
@@ -2267,6 +2274,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::UADDLP)
     MAKE_CASE(AArch64ISD::CALL_RVMARKER)
     MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
+    MAKE_CASE(AArch64ISD::MOPS_MEMSET)
+    MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
+    MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
+    MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
   }
 #undef MAKE_CASE
   return nullptr;
@@ -4059,6 +4070,39 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
 }
 
+SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  unsigned IntNo = Op.getConstantOperandVal(1);
+  switch (IntNo) {
+  default:
+    return SDValue(); // Don't custom lower most intrinsics.
+  case Intrinsic::aarch64_mops_memset_tag: {
+    auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
+    SDLoc DL(Op);
+    SDValue Chain = Node->getChain();
+    SDValue Dst = Op.getOperand(2);
+    SDValue Val = Op.getOperand(3);
+    Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
+    SDValue Size = Op.getOperand(4);
+    auto Alignment = Node->getMemOperand()->getAlign();
+    bool IsVol = Node->isVolatile();
+    auto DstPtrInfo = Node->getPointerInfo();
+
+    const auto &SDI =
+        static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo());
+    SDValue MS =
+        SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val,
+                     Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{});
+
+    // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the
+    // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise
+    // LowerOperationWrapper will complain that the number of results has
+    // changed.
+    return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
+  }
+  }
+}
+
 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                      SelectionDAG &DAG) const {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -5126,6 +5170,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
   case ISD::MULHU:
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
                                /*OverrideNEON=*/true);
+  case ISD::INTRINSIC_W_CHAIN:
+    return LowerINTRINSIC_W_CHAIN(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN:
     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::ATOMIC_STORE:
@@ -11879,6 +11925,20 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
     return true;
   }
+  case Intrinsic::aarch64_mops_memset_tag: {
+    Value *Dst = I.getArgOperand(0);
+    Value *Val = I.getArgOperand(1);
+    PointerType *PtrTy = cast<PointerType>(Dst->getType());
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(Val->getType());
+    Info.ptrVal = Dst;
+    Info.offset = 0;
+    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.flags = MachineMemOperand::MOStore;
+    // The size of the memory being operated on is unknown at this point
+    Info.size = MemoryLocation::UnknownSize;
+    return true;
+  }
   default:
     break;
   }

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 585eae7de7b7..2138c0ffe70a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -452,6 +452,12 @@ enum NodeType : unsigned {
   LDP,
   STP,
   STNP,
+
+  // Memory Operations
+  MOPS_MEMSET,
+  MOPS_MEMSET_TAGGING,
+  MOPS_MEMCOPY,
+  MOPS_MEMMOVE,
 };
 
 } // end namespace AArch64ISD
@@ -889,6 +895,7 @@ class AArch64TargetLowering : public TargetLowering {
 
   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 
   bool isEligibleForTailCallOptimization(

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index dc2d4dee1129..daceb3feffa3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8362,6 +8362,14 @@ let Predicates = [HasMOPS, HasMTE] in {
   }
 }
 
+// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
+// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
+def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
+def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
+def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
+def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
+def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
+
 let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
   let mayLoad = 1 in {
     def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),

diff  --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index d2d84b2a3f6d..893269c1a7ef 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -15,15 +15,95 @@ using namespace llvm;
 
 #define DEBUG_TYPE "aarch64-selectiondag-info"
 
+SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
+                                          SelectionDAG &DAG, const SDLoc &DL,
+                                          SDValue Chain, SDValue Dst,
+                                          SDValue SrcOrValue, SDValue Size,
+                                          Align Alignment, bool isVolatile,
+                                          MachinePointerInfo DstPtrInfo,
+                                          MachinePointerInfo SrcPtrInfo) const {
+
+  // Get the constant size of the copy/set.
+  uint64_t ConstSize = 0;
+  if (auto *C = dyn_cast<ConstantSDNode>(Size))
+    ConstSize = C->getZExtValue();
+
+  const bool IsSet = SDOpcode == AArch64ISD::MOPS_MEMSET ||
+                     SDOpcode == AArch64ISD::MOPS_MEMSET_TAGGING;
+
+  const auto MachineOpcode = [&]() {
+    switch (SDOpcode) {
+    case AArch64ISD::MOPS_MEMSET:
+      return AArch64::MOPSMemorySetPseudo;
+    case AArch64ISD::MOPS_MEMSET_TAGGING:
+      return AArch64::MOPSMemorySetTaggingPseudo;
+    case AArch64ISD::MOPS_MEMCOPY:
+      return AArch64::MOPSMemoryCopyPseudo;
+    case AArch64ISD::MOPS_MEMMOVE:
+      return AArch64::MOPSMemoryMovePseudo;
+    default:
+      llvm_unreachable("Unhandled MOPS ISD Opcode");
+    }
+  }();
+
+  MachineMemOperand::Flags Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  if (!IsSet)
+    Flags |= MachineMemOperand::MOLoad;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  auto *DstOp =
+      MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment);
+  auto *SrcOp =
+      MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment);
+
+  if (IsSet) {
+    // Extend value to i64 if required
+    if (SrcOrValue.getValueType() != MVT::i64)
+      SrcOrValue = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, SrcOrValue);
+    SDValue Ops[] = {Dst, Size, SrcOrValue, Chain};
+    const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::Other};
+    MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops);
+    DAG.setNodeMemRefs(Node, {DstOp});
+    return SDValue(Node, 2);
+  } else {
+    SDValue Ops[] = {Dst, SrcOrValue, Size, Chain};
+    const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other};
+    MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops);
+    DAG.setNodeMemRefs(Node, {DstOp, SrcOp});
+    return SDValue(Node, 3);
+  }
+}
+
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+  const AArch64Subtarget &STI =
+      DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+  if (STI.hasMOPS())
+    return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size,
+                    Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
+  return SDValue();
+}
+
 SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     SDValue Size, Align Alignment, bool isVolatile,
     MachinePointerInfo DstPtrInfo) const {
+  const AArch64Subtarget &STI =
+      DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+
+  if (STI.hasMOPS()) {
+    return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
+                    Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
+  }
+
   // Check to see if there is a specialized entry-point for memory zeroing.
   ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
   ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
-  const AArch64Subtarget &STI =
-      DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
   const char *bzeroName =
       (V && V->isZero())
           ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
@@ -55,6 +135,19 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
   return SDValue();
 }
 
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
+    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, Align Alignment, bool isVolatile,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+  const AArch64Subtarget &STI =
+      DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+  if (STI.hasMOPS()) {
+    return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size,
+                    Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
+  }
+  return SDValue();
+}
+
 static const int kSetTagLoopThreshold = 176;
 
 static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,

diff  --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 7d53bd456975..47fe3bf7dcf5 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -19,11 +19,30 @@ namespace llvm {
 
 class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo {
 public:
+  SDValue EmitMOPS(AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG,
+                   const SDLoc &DL, SDValue Chain, SDValue Dst,
+                   SDValue SrcOrValue, SDValue Size, Align Alignment,
+                   bool isVolatile, MachinePointerInfo DstPtrInfo,
+                   MachinePointerInfo SrcPtrInfo) const;
+
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+                                  SDValue Chain, SDValue Dst, SDValue Src,
+                                  SDValue Size, Align Alignment,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const override;
   SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
                                   SDValue Chain, SDValue Dst, SDValue Src,
                                   SDValue Size, Align Alignment,
                                   bool isVolatile,
                                   MachinePointerInfo DstPtrInfo) const override;
+  SDValue
+  EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
+                           SDValue Dst, SDValue Src, SDValue Size,
+                           Align Alignment, bool isVolatile,
+                           MachinePointerInfo DstPtrInfo,
+                           MachinePointerInfo SrcPtrInfo) const override;
+
   SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
                                   SDValue Chain, SDValue Op1, SDValue Op2,
                                   MachinePointerInfo DstPtrInfo,

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll
new file mode 100644
index 000000000000..5c6de3a728d9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops       | FileCheck %s --check-prefix=CHECK-MOPS
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+
+declare void @fn(i8*, i8*)
+
+define void @consecutive() {
+; CHECK-MOPS-LABEL: consecutive:
+; CHECK-MOPS:       // %bb.0: // %entry
+; CHECK-MOPS-NEXT:    stp	x29, x30, [sp, #-16]!           // 16-byte Folded Spill
+; CHECK-MOPS-NEXT:    sub	sp, sp, #2016
+; CHECK-MOPS-NEXT:    .cfi_def_cfa_offset 2032
+; CHECK-MOPS-NEXT:    .cfi_offset w30, -8
+; CHECK-MOPS-NEXT:    .cfi_offset w29, -16
+; CHECK-MOPS-NEXT:    mov	w8, #1000
+; CHECK-MOPS-NEXT:    add	x9, sp, #8
+; CHECK-MOPS-NEXT:    adrp	x10, .LCPI0_0
+; CHECK-MOPS-NEXT:    adrp	x11, .LCPI0_1
+; CHECK-MOPS-NEXT:    mov	w12, #6424
+; CHECK-MOPS-NEXT:    mov	w13, #7452
+; CHECK-MOPS-NEXT:    setp	[x9]!, x8!, xzr
+; CHECK-MOPS-NEXT:    setm	[x9]!, x8!, xzr
+; CHECK-MOPS-NEXT:    sete	[x9]!, x8!, xzr
+; CHECK-MOPS-NEXT:    movk	w12, #6938, lsl #16
+; CHECK-MOPS-NEXT:    ldr	q0, [x10, :lo12:.LCPI0_0]
+; CHECK-MOPS-NEXT:    mov	w8, #30
+; CHECK-MOPS-NEXT:    ldr	d1, [x11, :lo12:.LCPI0_1]
+; CHECK-MOPS-NEXT:    add	x0, sp, #1008
+; CHECK-MOPS-NEXT:    add	x1, sp, #8
+; CHECK-MOPS-NEXT:    str	w12, [sp, #1032]
+; CHECK-MOPS-NEXT:    strh	w13, [sp, #1036]
+; CHECK-MOPS-NEXT:    str	q0, [sp, #1008]
+; CHECK-MOPS-NEXT:    str	d1, [sp, #1024]
+; CHECK-MOPS-NEXT:    strb	w8, [sp, #1038]
+; CHECK-MOPS-NEXT:    bl	fn
+; CHECK-MOPS-NEXT:    add	sp, sp, #2016
+; CHECK-MOPS-NEXT:    ldp	x29, x30, [sp], #16             // 16-byte Folded Reload
+; CHECK-MOPS-NEXT:    ret
+entry:
+  %buf_from = alloca [1000 x i8], align 16
+  %buf_to = alloca [1000 x i8], align 1
+  %0 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 0
+  %1 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_to, i64 0, i64 0
+  call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(1000) %1, i8 0, i64 1000, i1 false)
+  %2 = bitcast [1000 x i8]* %buf_from to <16 x i8>*
+  store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8>* %2, align 16
+  %arrayidx.16 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 16
+  %3 = bitcast i8* %arrayidx.16 to <8 x i8>*
+  store <8 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, <8 x i8>* %3, align 16
+  %arrayidx.24 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 24
+  store i8 24, i8* %arrayidx.24, align 8
+  %arrayidx.25 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 25
+  store i8 25, i8* %arrayidx.25, align 1
+  %arrayidx.26 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 26
+  store i8 26, i8* %arrayidx.26, align 2
+  %arrayidx.27 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 27
+  store i8 27, i8* %arrayidx.27, align 1
+  %arrayidx.28 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 28
+  store i8 28, i8* %arrayidx.28, align 4
+  %arrayidx.29 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 29
+  store i8 29, i8* %arrayidx.29, align 1
+  %arrayidx.30 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 30
+  store i8 30, i8* %arrayidx.30, align 2
+  call void @fn(i8* nonnull %0, i8* nonnull %1)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
index d57c4ceb2765..ece402473f80 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
@@ -2,8 +2,8 @@
 
 ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte  | FileCheck %s --check-prefix=GISel-O0
 ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi     -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte  | FileCheck %s --check-prefix=GISel
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops,+mte  | FileCheck %s --check-prefix=SDAG
 
-; Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly
 declare i8* @llvm.aarch64.mops.memset.tag(i8*, i8, i64)
 
 define i8* @memset_tagged_0_zeroval(i8* %dst, i64 %size) {
@@ -15,6 +15,14 @@ define i8* @memset_tagged_0_zeroval(i8* %dst, i64 %size) {
 ; GISel-O0-NEXT:    setge [x0]!, x8!, x8
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_0_zeroval:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    mov x8, xzr
+; SDAG-NEXT:    setgp [x0]!, x8!, xzr
+; SDAG-NEXT:    setgm [x0]!, x8!, xzr
+; SDAG-NEXT:    setge [x0]!, x8!, xzr
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_0_zeroval:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov x8, xzr
@@ -38,6 +46,14 @@ define i8* @memset_tagged_1_zeroval(i8* %dst, i64 %size) {
 ; GISel-O0-NEXT:    setge [x0]!, x8!, x9
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_1_zeroval:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    mov w8, #1
+; SDAG-NEXT:    setgp [x0]!, x8!, xzr
+; SDAG-NEXT:    setgm [x0]!, x8!, xzr
+; SDAG-NEXT:    setge [x0]!, x8!, xzr
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_1_zeroval:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov w8, #1
@@ -61,6 +77,14 @@ define i8* @memset_tagged_10_zeroval(i8* %dst, i64 %size) {
 ; GISel-O0-NEXT:    setge [x0]!, x8!, x9
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_10_zeroval:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    mov w8, #10
+; SDAG-NEXT:    setgp [x0]!, x8!, xzr
+; SDAG-NEXT:    setgm [x0]!, x8!, xzr
+; SDAG-NEXT:    setge [x0]!, x8!, xzr
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_10_zeroval:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov w8, #10
@@ -84,6 +108,14 @@ define i8* @memset_tagged_10000_zeroval(i8* %dst, i64 %size) {
 ; GISel-O0-NEXT:    setge [x0]!, x8!, x9
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_10000_zeroval:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    mov w8, #10000
+; SDAG-NEXT:    setgp [x0]!, x8!, xzr
+; SDAG-NEXT:    setgm [x0]!, x8!, xzr
+; SDAG-NEXT:    setge [x0]!, x8!, xzr
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_10000_zeroval:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov w8, #10000
@@ -105,6 +137,13 @@ define i8* @memset_tagged_size_zeroval(i8* %dst, i64 %size) {
 ; GISel-O0-NEXT:    setge [x0]!, x1!, x8
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_size_zeroval:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    setgp [x0]!, x1!, xzr
+; SDAG-NEXT:    setgm [x0]!, x1!, xzr
+; SDAG-NEXT:    setge [x0]!, x1!, xzr
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_size_zeroval:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    setgp [x0]!, x1!, xzr
@@ -127,6 +166,15 @@ define i8* @memset_tagged_0(i8* %dst, i64 %size, i32 %value) {
 ; GISel-O0-NEXT:    setge [x0]!, x8!, x9
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_0:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    mov x8, xzr
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-NEXT:    setgp [x0]!, x8!, x2
+; SDAG-NEXT:    setgm [x0]!, x8!, x2
+; SDAG-NEXT:    setge [x0]!, x8!, x2
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_0:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov x8, xzr
@@ -153,6 +201,15 @@ define i8* @memset_tagged_1(i8* %dst, i64 %size, i32 %value) {
 ; GISel-O0-NEXT:    setge [x0]!, x8!, x9
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_1:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    mov w8, #1
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-NEXT:    setgp [x0]!, x8!, x2
+; SDAG-NEXT:    setgm [x0]!, x8!, x2
+; SDAG-NEXT:    setge [x0]!, x8!, x2
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_1:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov w8, #1
@@ -179,6 +236,15 @@ define i8* @memset_tagged_10(i8* %dst, i64 %size, i32 %value) {
 ; GISel-O0-NEXT:    setge [x0]!, x8!, x9
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_10:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    mov w8, #10
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-NEXT:    setgp [x0]!, x8!, x2
+; SDAG-NEXT:    setgm [x0]!, x8!, x2
+; SDAG-NEXT:    setge [x0]!, x8!, x2
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_10:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov w8, #10
@@ -205,6 +271,15 @@ define i8* @memset_tagged_10000(i8* %dst, i64 %size, i32 %value) {
 ; GISel-O0-NEXT:    setge [x0]!, x8!, x9
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_10000:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    mov w8, #10000
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-NEXT:    setgp [x0]!, x8!, x2
+; SDAG-NEXT:    setgm [x0]!, x8!, x2
+; SDAG-NEXT:    setge [x0]!, x8!, x2
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_10000:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov w8, #10000
@@ -229,6 +304,14 @@ define i8* @memset_tagged_size(i8* %dst, i64 %size, i32 %value) {
 ; GISel-O0-NEXT:    setge [x0]!, x1!, x8
 ; GISel-O0-NEXT:    ret
 ;
+; SDAG-LABEL: memset_tagged_size:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-NEXT:    setgp [x0]!, x1!, x2
+; SDAG-NEXT:    setgm [x0]!, x1!, x2
+; SDAG-NEXT:    setge [x0]!, x1!, x2
+; SDAG-NEXT:    ret
+;
 ; GISel-LABEL: memset_tagged_size:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index 4bd8383d38e2..847cfd26d173 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -4,6 +4,8 @@
 ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi     -global-isel=1 -global-isel-abort=1                    | FileCheck %s --check-prefixes=GISel-WITHOUT-MOPS,GISel-WITHOUT-MOPS-O3
 ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops       | FileCheck %s --check-prefixes=GISel-MOPS,GISel-MOPS-O0
 ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi     -global-isel=1 -global-isel-abort=1 -mattr=+mops       | FileCheck %s --check-prefixes=GISel-MOPS,GISel-MOPS-O3
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2                    | FileCheck %s --check-prefix=SDAG-WITHOUT-MOPS-O2
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops       | FileCheck %s --check-prefix=SDAG-MOPS-O2
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
 
@@ -21,6 +23,14 @@ define void @memset_0_zeroval(i8* %dst) {
 ; GISel-MOPS-LABEL: memset_0_zeroval:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_0_zeroval:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_0_zeroval:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false)
   ret void
@@ -34,6 +44,14 @@ define void @memset_0_zeroval_volatile(i8* %dst) {
 ; GISel-MOPS-LABEL: memset_0_zeroval_volatile:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_0_zeroval_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_0_zeroval_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true)
   ret void
@@ -51,6 +69,18 @@ define void @memset_10_zeroval(i8* %dst) {
 ; GISel-MOPS-NEXT:    str xzr, [x0]
 ; GISel-MOPS-NEXT:    strh wzr, [x0, #8]
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh wzr, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str xzr, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_10_zeroval:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    strh wzr, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    str xzr, [x0]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 false)
   ret void
@@ -97,6 +127,18 @@ define void @memset_10_zeroval_volatile(i8* %dst) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, xzr
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, xzr
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh wzr, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str xzr, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_10_zeroval_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    strh wzr, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    str xzr, [x0]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 true)
   ret void
@@ -143,6 +185,25 @@ define void @memset_10000_zeroval(i8* %dst) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, xzr
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, xzr
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10000_zeroval:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w1, wzr
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w2, #10000
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memset
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_10000_zeroval:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #10000
+; SDAG-MOPS-O2-NEXT:    setp [x0]!, x8!, xzr
+; SDAG-MOPS-O2-NEXT:    setm [x0]!, x8!, xzr
+; SDAG-MOPS-O2-NEXT:    sete [x0]!, x8!, xzr
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 false)
   ret void
@@ -189,6 +250,25 @@ define void @memset_10000_zeroval_volatile(i8* %dst) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, xzr
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, xzr
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10000_zeroval_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w1, wzr
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w2, #10000
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memset
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_10000_zeroval_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #10000
+; SDAG-MOPS-O2-NEXT:    setp [x0]!, x8!, xzr
+; SDAG-MOPS-O2-NEXT:    setm [x0]!, x8!, xzr
+; SDAG-MOPS-O2-NEXT:    sete [x0]!, x8!, xzr
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 true)
   ret void
@@ -220,6 +300,24 @@ define void @memset_size_zeroval(i8* %dst, i64 %size) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, xzr
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, xzr
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_size_zeroval:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x2, x1
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w1, wzr
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memset
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_size_zeroval:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    setp [x0]!, x1!, xzr
+; SDAG-MOPS-O2-NEXT:    setm [x0]!, x1!, xzr
+; SDAG-MOPS-O2-NEXT:    sete [x0]!, x1!, xzr
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 false)
   ret void
@@ -251,11 +349,30 @@ define void @memset_size_zeroval_volatile(i8* %dst, i64 %size) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, xzr
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, xzr
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_size_zeroval_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x2, x1
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w1, wzr
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memset
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_size_zeroval_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    setp [x0]!, x1!, xzr
+; SDAG-MOPS-O2-NEXT:    setm [x0]!, x1!, xzr
+; SDAG-MOPS-O2-NEXT:    sete [x0]!, x1!, xzr
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 true)
   ret void
 }
 
+
 define void @memset_0(i8* %dst, i32 %value) {
 ; GISel-WITHOUT-MOPS-LABEL: memset_0:
 ; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
@@ -264,6 +381,14 @@ define void @memset_0(i8* %dst, i32 %value) {
 ; GISel-MOPS-LABEL: memset_0:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_0:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_0:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   %value_trunc = trunc i32 %value to i8
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false)
@@ -278,6 +403,14 @@ define void @memset_0_volatile(i8* %dst, i32 %value) {
 ; GISel-MOPS-LABEL: memset_0_volatile:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_0_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_0_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   %value_trunc = trunc i32 %value to i8
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true)
@@ -328,6 +461,26 @@ define void @memset_10(i8* %dst, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    str x8, [x0]
 ; GISel-MOPS-O3-NEXT:    strh w8, [x0, #8]
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x8, #72340172838076673
+; SDAG-WITHOUT-MOPS-O2-NEXT:    and x9, x1, #0xff
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mul x8, x9, x8
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_10:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
+; SDAG-MOPS-O2-NEXT:    mov x8, #72340172838076673
+; SDAG-MOPS-O2-NEXT:    and x9, x1, #0xff
+; SDAG-MOPS-O2-NEXT:    mul x8, x9, x8
+; SDAG-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   %value_trunc = trunc i32 %value to i8
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 false)
@@ -375,6 +528,26 @@ define void @memset_10_volatile(i8* %dst, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x8, #72340172838076673
+; SDAG-WITHOUT-MOPS-O2-NEXT:    and x9, x1, #0xff
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mul x8, x9, x8
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_10_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
+; SDAG-MOPS-O2-NEXT:    mov x8, #72340172838076673
+; SDAG-MOPS-O2-NEXT:    and x9, x1, #0xff
+; SDAG-MOPS-O2-NEXT:    mul x8, x9, x8
+; SDAG-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   %value_trunc = trunc i32 %value to i8
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 true)
@@ -422,6 +595,25 @@ define void @memset_10000(i8* %dst, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10000:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w2, #10000
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memset
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_10000:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #10000
+; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
+; SDAG-MOPS-O2-NEXT:    setp [x0]!, x8!, x1
+; SDAG-MOPS-O2-NEXT:    setm [x0]!, x8!, x1
+; SDAG-MOPS-O2-NEXT:    sete [x0]!, x8!, x1
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   %value_trunc = trunc i32 %value to i8
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 false)
@@ -469,6 +661,25 @@ define void @memset_10000_volatile(i8* %dst, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10000_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w2, #10000
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memset
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_10000_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #10000
+; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
+; SDAG-MOPS-O2-NEXT:    setp [x0]!, x8!, x1
+; SDAG-MOPS-O2-NEXT:    setm [x0]!, x8!, x1
+; SDAG-MOPS-O2-NEXT:    sete [x0]!, x8!, x1
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   %value_trunc = trunc i32 %value to i8
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 true)
@@ -518,6 +729,26 @@ define void @memset_size(i8* %dst, i64 %size, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_size:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x8, x1
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w1, w2
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x2, x8
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memset
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_size:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-MOPS-O2-NEXT:    setp [x0]!, x1!, x2
+; SDAG-MOPS-O2-NEXT:    setm [x0]!, x1!, x2
+; SDAG-MOPS-O2-NEXT:    sete [x0]!, x1!, x2
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   %value_trunc = trunc i32 %value to i8
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 false)
@@ -567,12 +798,33 @@ define void @memset_size_volatile(i8* %dst, i64 %size, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memset_size_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x8, x1
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w1, w2
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x2, x8
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memset
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memset_size_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-MOPS-O2-NEXT:    setp [x0]!, x1!, x2
+; SDAG-MOPS-O2-NEXT:    setm [x0]!, x1!, x2
+; SDAG-MOPS-O2-NEXT:    sete [x0]!, x1!, x2
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   %value_trunc = trunc i32 %value to i8
   call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 true)
   ret void
 }
 
+
 define void @memcpy_0(i8* %dst, i8* %src, i32 %value) {
 ; GISel-WITHOUT-MOPS-LABEL: memcpy_0:
 ; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
@@ -581,6 +833,14 @@ define void @memcpy_0(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-LABEL: memcpy_0:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_0:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_0:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false)
   ret void
@@ -594,6 +854,14 @@ define void @memcpy_0_volatile(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-LABEL: memcpy_0_volatile:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_0_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_0_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true)
   ret void
@@ -615,6 +883,22 @@ define void @memcpy_10(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-NEXT:    ldrh w8, [x1, #8]
 ; GISel-MOPS-NEXT:    strh w8, [x0, #8]
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_10:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldrh w8, [x1, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x9, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x9, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_10:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ldrh w8, [x1, #8]
+; SDAG-MOPS-O2-NEXT:    ldr x9, [x1]
+; SDAG-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    str x9, [x0]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false)
   ret void
@@ -658,6 +942,22 @@ define void @memcpy_10_volatile(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    cpyfm [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    cpyfe [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_10_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x8, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldrh w9, [x1, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh w9, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_10_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ldr x8, [x1]
+; SDAG-MOPS-O2-NEXT:    ldrh w9, [x1, #8]
+; SDAG-MOPS-O2-NEXT:    strh w9, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true)
   ret void
@@ -701,6 +1001,24 @@ define void @memcpy_1000(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    cpyfm [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    cpyfe [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_1000:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w2, #1000
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memcpy
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_1000:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #1000
+; SDAG-MOPS-O2-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false)
   ret void
@@ -744,6 +1062,24 @@ define void @memcpy_1000_volatile(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    cpyfm [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    cpyfe [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_1000_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w2, #1000
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memcpy
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_1000_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #1000
+; SDAG-MOPS-O2-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true)
   ret void
@@ -765,6 +1101,22 @@ define void @memcpy_n(i8* %dst, i8* %src, i64 %size, i32 %value) {
 ; GISel-MOPS-NEXT:    cpyfm [x0]!, [x1]!, x2!
 ; GISel-MOPS-NEXT:    cpyfe [x0]!, [x1]!, x2!
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_n:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memcpy
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_n:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    cpyfp [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    cpyfm [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    cpyfe [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false)
   ret void
@@ -786,11 +1138,28 @@ define void @memcpy_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) {
 ; GISel-MOPS-NEXT:    cpyfm [x0]!, [x1]!, x2!
 ; GISel-MOPS-NEXT:    cpyfe [x0]!, [x1]!, x2!
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_n_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memcpy
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_n_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    cpyfp [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    cpyfm [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    cpyfe [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true)
   ret void
 }
 
+
 define void @memcpy_inline_0(i8* %dst, i8* %src, i32 %value) {
 ; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0:
 ; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
@@ -799,6 +1168,14 @@ define void @memcpy_inline_0(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-LABEL: memcpy_inline_0:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_0:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_inline_0:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false)
   ret void
@@ -812,6 +1189,14 @@ define void @memcpy_inline_0_volatile(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-LABEL: memcpy_inline_0_volatile:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_0_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_inline_0_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true)
   ret void
@@ -833,6 +1218,22 @@ define void @memcpy_inline_10(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-NEXT:    ldrh w8, [x1, #8]
 ; GISel-MOPS-NEXT:    strh w8, [x0, #8]
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_10:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldrh w8, [x1, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x9, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x9, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_inline_10:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ldrh w8, [x1, #8]
+; SDAG-MOPS-O2-NEXT:    ldr x9, [x1]
+; SDAG-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    str x9, [x0]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false)
   ret void
@@ -854,11 +1255,389 @@ define void @memcpy_inline_10_volatile(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-NEXT:    ldrh w8, [x1, #8]
 ; GISel-MOPS-NEXT:    strh w8, [x0, #8]
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_10_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x8, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldrh w9, [x1, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh w9, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_inline_10_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ldr x8, [x1]
+; SDAG-MOPS-O2-NEXT:    ldrh w9, [x1, #8]
+; SDAG-MOPS-O2-NEXT:    strh w9, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true)
   ret void
 }
 
+define void @memcpy_inline_300(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_inline_300:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #16]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #16]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #32]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #32]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #48]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #48]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #64]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #64]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #80]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #80]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #96]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #96]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #112]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #112]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #128]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #128]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #144]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #144]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #160]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #160]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #176]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #176]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #192]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #192]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #208]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #208]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #224]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #224]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #240]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #240]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #256]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #256]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x1, #272]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x0, #272]
+; GISel-WITHOUT-MOPS-O0-NEXT:    add x8, x1, #284
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr q0, [x8]
+; GISel-WITHOUT-MOPS-O0-NEXT:    add x8, x0, #284
+; GISel-WITHOUT-MOPS-O0-NEXT:    str q0, [x8]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_inline_300:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1]
+; GISel-WITHOUT-MOPS-O3-NEXT:    add x8, x1, #284
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #16]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #16]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #32]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #32]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #48]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #48]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #64]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #64]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #80]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #80]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #96]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #96]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #112]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #112]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #128]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #128]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #144]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #144]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #160]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #160]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #176]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #176]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #192]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #192]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #208]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #208]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #224]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #224]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #240]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #240]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #256]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #256]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x1, #272]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x0, #272]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr q0, [x8]
+; GISel-WITHOUT-MOPS-O3-NEXT:    add x8, x0, #284
+; GISel-WITHOUT-MOPS-O3-NEXT:    str q0, [x8]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memcpy_inline_300:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1]
+; GISel-MOPS-O0-NEXT:    str q0, [x0]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #16]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #16]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #32]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #32]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #48]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #48]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #64]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #64]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #80]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #80]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #96]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #96]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #112]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #112]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #128]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #128]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #144]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #144]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #160]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #160]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #176]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #176]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #192]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #192]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #208]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #208]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #224]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #224]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #240]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #240]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #256]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #256]
+; GISel-MOPS-O0-NEXT:    ldr q0, [x1, #272]
+; GISel-MOPS-O0-NEXT:    str q0, [x0, #272]
+; GISel-MOPS-O0-NEXT:    add x8, x1, #284
+; GISel-MOPS-O0-NEXT:    ldr q0, [x8]
+; GISel-MOPS-O0-NEXT:    add x8, x0, #284
+; GISel-MOPS-O0-NEXT:    str q0, [x8]
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memcpy_inline_300:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1]
+; GISel-MOPS-O3-NEXT:    add x8, x1, #284
+; GISel-MOPS-O3-NEXT:    str q0, [x0]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #16]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #16]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #32]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #32]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #48]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #48]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #64]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #64]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #80]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #80]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #96]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #96]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #112]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #112]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #128]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #128]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #144]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #144]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #160]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #160]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #176]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #176]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #192]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #192]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #208]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #208]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #224]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #224]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #240]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #240]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #256]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #256]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x1, #272]
+; GISel-MOPS-O3-NEXT:    str q0, [x0, #272]
+; GISel-MOPS-O3-NEXT:    ldr q0, [x8]
+; GISel-MOPS-O3-NEXT:    add x8, x0, #284
+; GISel-MOPS-O3-NEXT:    str q0, [x8]
+; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_300:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldp q1, q0, [x1, #16]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    add x8, x1, #284
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q2, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    stp q1, q0, [x0, #16]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q2, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldp q1, q0, [x1, #80]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldp q3, q2, [x1, #48]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    stp q1, q0, [x0, #80]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    stp q3, q2, [x0, #48]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldp q1, q0, [x1, #144]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldp q3, q2, [x1, #112]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    stp q1, q0, [x0, #144]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    stp q3, q2, [x0, #112]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldp q1, q0, [x1, #208]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldp q3, q2, [x1, #176]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    stp q1, q0, [x0, #208]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    stp q3, q2, [x0, #176]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldp q2, q1, [x1, #256]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q0, [x8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    add x8, x0, #284
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q3, [x1, #240]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q0, [x8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    stp q2, q1, [x0, #256]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q3, [x0, #240]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_inline_300:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #300
+; SDAG-MOPS-O2-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    ret
+entry:
+  call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 300, i1 false)
+  ret void
+}
+
+define void @memcpy_inline_300_volatile(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_300_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #16]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #16]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #32]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #32]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #48]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #48]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #64]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #64]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #80]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #80]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #96]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #96]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #112]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #112]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #128]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #128]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #144]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #144]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #160]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #160]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #176]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #176]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #192]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #192]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #208]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #208]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #224]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #224]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #240]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #240]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #256]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #256]
+; GISel-WITHOUT-MOPS-NEXT:    ldr q0, [x1, #272]
+; GISel-WITHOUT-MOPS-NEXT:    str q0, [x0, #272]
+; GISel-WITHOUT-MOPS-NEXT:    ldr x8, [x1, #288]
+; GISel-WITHOUT-MOPS-NEXT:    str x8, [x0, #288]
+; GISel-WITHOUT-MOPS-NEXT:    ldr w8, [x1, #296]
+; GISel-WITHOUT-MOPS-NEXT:    str w8, [x0, #296]
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_inline_300_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ldr q0, [x1]
+; GISel-MOPS-NEXT:    str q0, [x0]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #16]
+; GISel-MOPS-NEXT:    str q0, [x0, #16]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #32]
+; GISel-MOPS-NEXT:    str q0, [x0, #32]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #48]
+; GISel-MOPS-NEXT:    str q0, [x0, #48]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #64]
+; GISel-MOPS-NEXT:    str q0, [x0, #64]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #80]
+; GISel-MOPS-NEXT:    str q0, [x0, #80]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #96]
+; GISel-MOPS-NEXT:    str q0, [x0, #96]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #112]
+; GISel-MOPS-NEXT:    str q0, [x0, #112]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #128]
+; GISel-MOPS-NEXT:    str q0, [x0, #128]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #144]
+; GISel-MOPS-NEXT:    str q0, [x0, #144]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #160]
+; GISel-MOPS-NEXT:    str q0, [x0, #160]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #176]
+; GISel-MOPS-NEXT:    str q0, [x0, #176]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #192]
+; GISel-MOPS-NEXT:    str q0, [x0, #192]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #208]
+; GISel-MOPS-NEXT:    str q0, [x0, #208]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #224]
+; GISel-MOPS-NEXT:    str q0, [x0, #224]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #240]
+; GISel-MOPS-NEXT:    str q0, [x0, #240]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #256]
+; GISel-MOPS-NEXT:    str q0, [x0, #256]
+; GISel-MOPS-NEXT:    ldr q0, [x1, #272]
+; GISel-MOPS-NEXT:    str q0, [x0, #272]
+; GISel-MOPS-NEXT:    ldr x8, [x1, #288]
+; GISel-MOPS-NEXT:    str x8, [x0, #288]
+; GISel-MOPS-NEXT:    ldr w8, [x1, #296]
+; GISel-MOPS-NEXT:    str w8, [x0, #296]
+; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_300_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q0, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q1, [x1, #16]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q2, [x1, #32]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q3, [x1, #48]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q3, [x0, #48]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q2, [x0, #32]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q1, [x0, #16]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q0, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q0, [x1, #64]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q1, [x1, #80]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q2, [x1, #96]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q3, [x1, #112]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q3, [x0, #112]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q2, [x0, #96]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q1, [x0, #80]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q0, [x0, #64]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q0, [x1, #128]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q1, [x1, #144]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q2, [x1, #160]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q3, [x1, #176]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q3, [x0, #176]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q2, [x0, #160]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q1, [x0, #144]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q0, [x0, #128]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q0, [x1, #192]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q1, [x1, #208]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q2, [x1, #224]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q3, [x1, #240]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q3, [x0, #240]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q2, [x0, #224]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q1, [x0, #208]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q0, [x0, #192]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q0, [x1, #256]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr q1, [x1, #272]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x8, [x1, #288]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr w9, [x1, #296]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str w9, [x0, #296]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x8, [x0, #288]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q1, [x0, #272]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str q0, [x0, #256]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memcpy_inline_300_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #300
+; SDAG-MOPS-O2-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    ret
+entry:
+  call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 300, i1 true)
+  ret void
+}
+
 define void @memmove_0(i8* %dst, i8* %src, i32 %value) {
 ; GISel-WITHOUT-MOPS-LABEL: memmove_0:
 ; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
@@ -867,6 +1646,14 @@ define void @memmove_0(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-LABEL: memmove_0:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_0:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memmove_0:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false)
   ret void
@@ -880,6 +1667,14 @@ define void @memmove_0_volatile(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-LABEL: memmove_0_volatile:
 ; GISel-MOPS:       // %bb.0: // %entry
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_0_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memmove_0_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true)
   ret void
@@ -917,6 +1712,22 @@ define void @memmove_10(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    str x8, [x0]
 ; GISel-MOPS-O3-NEXT:    strh w9, [x0, #8]
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_10:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldrh w8, [x1, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x9, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x9, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memmove_10:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ldrh w8, [x1, #8]
+; SDAG-MOPS-O2-NEXT:    ldr x9, [x1]
+; SDAG-MOPS-O2-NEXT:    strh w8, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    str x9, [x0]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false)
   ret void
@@ -960,6 +1771,22 @@ define void @memmove_10_volatile(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    cpym [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    cpye [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_10_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x8, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldrh w9, [x1, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    strh w9, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memmove_10_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    ldr x8, [x1]
+; SDAG-MOPS-O2-NEXT:    ldrh w9, [x1, #8]
+; SDAG-MOPS-O2-NEXT:    strh w9, [x0, #8]
+; SDAG-MOPS-O2-NEXT:    str x8, [x0]
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true)
   ret void
@@ -1003,6 +1830,24 @@ define void @memmove_1000(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    cpym [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    cpye [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_1000:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w2, #1000
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memmove
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memmove_1000:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #1000
+; SDAG-MOPS-O2-NEXT:    cpyp [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpym [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpye [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false)
   ret void
@@ -1046,6 +1891,24 @@ define void @memmove_1000_volatile(i8* %dst, i8* %src, i32 %value) {
 ; GISel-MOPS-O3-NEXT:    cpym [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    cpye [x0]!, [x1]!, x8!
 ; GISel-MOPS-O3-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_1000_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    mov w2, #1000
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memmove
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memmove_1000_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    mov w8, #1000
+; SDAG-MOPS-O2-NEXT:    cpyp [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpym [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    cpye [x0]!, [x1]!, x8!
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true)
   ret void
@@ -1067,6 +1930,22 @@ define void @memmove_n(i8* %dst, i8* %src, i64 %size, i32 %value) {
 ; GISel-MOPS-NEXT:    cpym [x0]!, [x1]!, x2!
 ; GISel-MOPS-NEXT:    cpye [x0]!, [x1]!, x2!
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_n:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memmove
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memmove_n:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    cpyp [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    cpym [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    cpye [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false)
   ret void
@@ -1088,6 +1967,22 @@ define void @memmove_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) {
 ; GISel-MOPS-NEXT:    cpym [x0]!, [x1]!, x2!
 ; GISel-MOPS-NEXT:    cpye [x0]!, [x1]!, x2!
 ; GISel-MOPS-NEXT:    ret
+;
+; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_n_volatile:
+; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    .cfi_offset w30, -16
+; SDAG-WITHOUT-MOPS-O2-NEXT:    bl memmove
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-WITHOUT-MOPS-O2-NEXT:    ret
+;
+; SDAG-MOPS-O2-LABEL: memmove_n_volatile:
+; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    cpyp [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    cpym [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    cpye [x0]!, [x1]!, x2!
+; SDAG-MOPS-O2-NEXT:    ret
 entry:
   call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true)
   ret void


        


More information about the llvm-commits mailing list