[llvm] 78fd413 - [AArch64][GlobalISel] CodeGen for Armv8.8/9.3 MOPS

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 31 12:54:51 PST 2022


Author: tyb0807
Date: 2022-01-31T20:54:41Z
New Revision: 78fd413cf736953ac623cabf3d5f84c8219e31f8

URL: https://github.com/llvm/llvm-project/commit/78fd413cf736953ac623cabf3d5f84c8219e31f8
DIFF: https://github.com/llvm/llvm-project/commit/78fd413cf736953ac623cabf3d5f84c8219e31f8.diff

LOG: [AArch64][GlobalISel] CodeGen for Armv8.8/9.3 MOPS

This implements codegen for Armv8.8/9.3 Memory Operations extension (MOPS).
Any memcpy/memset/memmov intrinsics will always be emitted as a series
of three consecutive instructions P, M and E which perform the
operation. The SelectionDAG implementation is split into a separate
patch.

AArch64LegalizerInfo will now consider the following generic opcodes
if +mops is available, instead of legalising by expanding them to
libcalls: G_BZERO, G_MEMCPY_INLINE, G_MEMCPY, G_MEMMOVE, G_MEMSET
The s8 value of memset is legalised to s64 to match the pseudos.

AArch64O0PreLegalizerCombinerInfo will still be able to combine
G_MEMCPY_INLINE even if +mops is present, as it is unclear whether it is
better to generate fixed length copies or MOPS instructions for the
inline code of small or zero-sized memory operations, so we choose to be
conservative for now.

AArch64InstructionSelector will select the above as new pseudo
instructions: AArch64::MOPSMemory{Copy/Move/Set/SetTagging} These are
each expanded to a series of three instructions (e.g. SETP/SETM/SETE)
which must be emitted together during code emission to avoid scheduler
reordering.

This is part 3/4 of a series of patches split from
https://reviews.llvm.org/D117405 to facilitate reviewing.

Patch by Tomas Matheson and Son Tuan Vu

Differential Revision: https://reviews.llvm.org/D117763

Added: 
    llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
    llvm/test/CodeGen/AArch64/aarch64-mops.ll

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
    llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
    llvm/unittests/Target/AArch64/InstSizes.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index a0fcb320a380..17cb53dd2d5b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -432,16 +432,6 @@ class LegalizeRuleSet {
     return TypeIdx;
   }
 
-  unsigned immIdx(unsigned ImmIdx) {
-    assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
-                      MCOI::OPERAND_FIRST_GENERIC_IMM) &&
-           "Imm Index is out of bounds");
-#ifndef NDEBUG
-    ImmIdxsCovered.set(ImmIdx);
-#endif
-    return ImmIdx;
-  }
-
   void markAllIdxsAsCovered() {
 #ifndef NDEBUG
     TypeIdxsCovered.set();
@@ -568,6 +558,16 @@ class LegalizeRuleSet {
   }
   unsigned getAlias() const { return AliasOf; }
 
+  unsigned immIdx(unsigned ImmIdx) {
+    assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
+                      MCOI::OPERAND_FIRST_GENERIC_IMM) &&
+           "Imm Index is out of bounds");
+#ifndef NDEBUG
+    ImmIdxsCovered.set(ImmIdx);
+#endif
+    return ImmIdx;
+  }
+
   /// The instruction is legal if predicate is true.
   LegalizeRuleSet &legalIf(LegalityPredicate Predicate) {
     // We have no choice but conservatively assume that the free-form
@@ -824,11 +824,22 @@ class LegalizeRuleSet {
   LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) {
     return actionForCartesianProduct(LegalizeAction::Custom, Types);
   }
+  /// The instruction is custom when type indexes 0 and 1 are both in their
+  /// respective lists.
   LegalizeRuleSet &
   customForCartesianProduct(std::initializer_list<LLT> Types0,
                             std::initializer_list<LLT> Types1) {
     return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1);
   }
+  /// The instruction is custom when when type indexes 0, 1, and 2 are all in
+  /// their respective lists.
+  LegalizeRuleSet &
+  customForCartesianProduct(std::initializer_list<LLT> Types0,
+                            std::initializer_list<LLT> Types1,
+                            std::initializer_list<LLT> Types2) {
+    return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1,
+                                     Types2);
+  }
 
   /// Unconditionally custom lower.
   LegalizeRuleSet &custom() {

diff  --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 85a9c04a3fef..b54a0eaba7d1 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -95,6 +95,8 @@ class AArch64AsmPrinter : public AsmPrinter {
 
   void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
 
+  void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI);
+
   void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                      const MachineInstr &MI);
   void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@@ -936,6 +938,43 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
                                   .addImm(Size == 4 ? 0 : 2));
 }
 
+void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer,
+                                  const llvm::MachineInstr &MI) {
+  unsigned Opcode = MI.getOpcode();
+  assert(STI->hasMOPS());
+  assert(STI->hasMTE() || Opcode != AArch64::MOPSMemorySetTaggingPseudo);
+
+  const auto Ops = [Opcode]() -> std::array<unsigned, 3> {
+    if (Opcode == AArch64::MOPSMemoryCopyPseudo)
+      return {AArch64::CPYFP, AArch64::CPYFM, AArch64::CPYFE};
+    if (Opcode == AArch64::MOPSMemoryMovePseudo)
+      return {AArch64::CPYP, AArch64::CPYM, AArch64::CPYE};
+    if (Opcode == AArch64::MOPSMemorySetPseudo)
+      return {AArch64::SETP, AArch64::SETM, AArch64::SETE};
+    if (Opcode == AArch64::MOPSMemorySetTaggingPseudo)
+      return {AArch64::SETGP, AArch64::SETGM, AArch64::MOPSSETGE};
+    llvm_unreachable("Unhandled memory operation pseudo");
+  }();
+  const bool IsSet = Opcode == AArch64::MOPSMemorySetPseudo ||
+                     Opcode == AArch64::MOPSMemorySetTaggingPseudo;
+
+  for (auto Op : Ops) {
+    int i = 0;
+    auto MCIB = MCInstBuilder(Op);
+    // Destination registers
+    MCIB.addReg(MI.getOperand(i++).getReg());
+    MCIB.addReg(MI.getOperand(i++).getReg());
+    if (!IsSet)
+      MCIB.addReg(MI.getOperand(i++).getReg());
+    // Input registers
+    MCIB.addReg(MI.getOperand(i++).getReg());
+    MCIB.addReg(MI.getOperand(i++).getReg());
+    MCIB.addReg(MI.getOperand(i++).getReg());
+
+    EmitToStreamer(OutStreamer, MCIB);
+  }
+}
+
 void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                                       const MachineInstr &MI) {
   unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -1363,6 +1402,13 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     emitFMov0(*MI);
     return;
 
+  case AArch64::MOPSMemoryCopyPseudo:
+  case AArch64::MOPSMemoryMovePseudo:
+  case AArch64::MOPSMemorySetPseudo:
+  case AArch64::MOPSMemorySetTaggingPseudo:
+    LowerMOPS(*OutStreamer, *MI);
+    return;
+
   case TargetOpcode::STACKMAP:
     return LowerSTACKMAP(*OutStreamer, SM, *MI);
 

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 93c17133c845..c3ccc541c0b8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -130,6 +130,10 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   case AArch64::JumpTableDest32:
   case AArch64::JumpTableDest16:
   case AArch64::JumpTableDest8:
+  case AArch64::MOPSMemoryCopyPseudo:
+  case AArch64::MOPSMemoryMovePseudo:
+  case AArch64::MOPSMemorySetPseudo:
+  case AArch64::MOPSMemorySetTaggingPseudo:
     NumBytes = 12;
     break;
   case AArch64::SPACE:

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 91baf4cd11e0..dc2d4dee1129 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8362,6 +8362,27 @@ let Predicates = [HasMOPS, HasMTE] in {
   }
 }
 
+let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
+  let mayLoad = 1 in {
+    def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
+                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
+                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
+    def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
+                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
+                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
+  }
+  let mayLoad = 0 in {
+    def MOPSMemorySetPseudo  : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
+                                      (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
+                                      [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
+  }
+}
+let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
+  def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
+                                          (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
+                                          [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
+}
+
 let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in
 def StoreSwiftAsyncContext
       : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1f546ad50d57..703e356f016d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -192,6 +192,7 @@ class AArch64InstructionSelector : public InstructionSelector {
   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
   bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
+  bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
   bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
 
   unsigned emitConstantPoolEntry(const Constant *CPVal,
@@ -3424,6 +3425,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
   case TargetOpcode::G_VECREDUCE_FADD:
   case TargetOpcode::G_VECREDUCE_ADD:
     return selectReduction(I, MRI);
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMCPY_INLINE:
+  case TargetOpcode::G_MEMMOVE:
+  case TargetOpcode::G_MEMSET:
+    assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
+    return selectMOPS(I, MRI);
   }
 
   return false;
@@ -3481,6 +3488,64 @@ bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
   return false;
 }
 
+bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
+                                            MachineRegisterInfo &MRI) {
+  unsigned Mopcode;
+  switch (GI.getOpcode()) {
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMCPY_INLINE:
+    Mopcode = AArch64::MOPSMemoryCopyPseudo;
+    break;
+  case TargetOpcode::G_MEMMOVE:
+    Mopcode = AArch64::MOPSMemoryMovePseudo;
+    break;
+  case TargetOpcode::G_MEMSET:
+    // For tagged memset see llvm.aarch64.mops.memset.tag
+    Mopcode = AArch64::MOPSMemorySetPseudo;
+    break;
+  }
+
+  auto &DstPtr = GI.getOperand(0);
+  auto &SrcOrVal = GI.getOperand(1);
+  auto &Size = GI.getOperand(2);
+
+  // Create copies of the registers that can be clobbered.
+  const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
+  const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
+  const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
+
+  const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
+  const auto &SrcValRegClass =
+      IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
+
+  // Constrain to specific registers
+  RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
+  RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
+  RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
+
+  MIB.buildCopy(DstPtrCopy, DstPtr);
+  MIB.buildCopy(SrcValCopy, SrcOrVal);
+  MIB.buildCopy(SizeCopy, Size);
+
+  // New instruction uses the copied registers because it must update them.
+  // The defs are not used since they don't exist in G_MEM*. They are still
+  // tied.
+  // Note: order of operands is 
diff erent from G_MEMSET, G_MEMCPY, G_MEMMOVE
+  Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
+  Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+  if (IsSet) {
+    MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
+                   {DstPtrCopy, SizeCopy, SrcValCopy});
+  } else {
+    Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
+    MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
+                   {DstPtrCopy, SrcValCopy, SizeCopy});
+  }
+
+  GI.eraseFromParent();
+  return true;
+}
+
 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
                                             MachineRegisterInfo &MRI) {
   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
@@ -5375,6 +5440,36 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
     constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
     break;
   }
+  case Intrinsic::aarch64_mops_memset_tag: {
+    // Transform
+    //    %dst:gpr(p0) = \
+    //      G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
+    //      \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
+    // where %dst is updated, into
+    //    %Rd:GPR64common, %Rn:GPR64) = \
+    //      MOPSMemorySetTaggingPseudo \
+    //      %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
+    // where Rd and Rn are tied.
+    // It is expected that %val has been extended to s64 in legalization.
+    // Note that the order of the size/value operands are swapped.
+
+    Register DstDef = I.getOperand(0).getReg();
+    // I.getOperand(1) is the intrinsic function
+    Register DstUse = I.getOperand(2).getReg();
+    Register ValUse = I.getOperand(3).getReg();
+    Register SizeUse = I.getOperand(4).getReg();
+
+    // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
+    // Therefore an additional virtual register is requried for the updated size
+    // operand. This value is not accessible via the semantics of the intrinsic.
+    Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
+
+    auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
+                                 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
+    Memset.cloneMemRefs(I);
+    constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
+    break;
+  }
   }
 
   I.eraseFromParent();

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e8894e7933d6..e9df7e001d38 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -699,8 +699,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
 
-  getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
-      .libcall();
+  if (ST.hasMOPS()) {
+    // G_BZERO is not supported. Currently it is only emitted by
+    // PreLegalizerCombiner for G_MEMSET with zero constant.
+    getActionDefinitionsBuilder(G_BZERO).unsupported();
+
+    getActionDefinitionsBuilder(G_MEMSET)
+        .legalForCartesianProduct({p0}, {s64}, {s64})
+        .customForCartesianProduct({p0}, {s8}, {s64})
+        .immIdx(0); // Inform verifier imm idx 0 is handled.
+
+    getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
+        .legalForCartesianProduct({p0}, {p0}, {s64})
+        .immIdx(0); // Inform verifier imm idx 0 is handled.
+
+    // G_MEMCPY_INLINE does not have a tailcall immediate
+    getActionDefinitionsBuilder(G_MEMCPY_INLINE)
+        .legalForCartesianProduct({p0}, {p0}, {s64});
+
+  } else {
+    getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
+        .libcall();
+  }
 
   // FIXME: Legal types are only legal with NEON.
   getActionDefinitionsBuilder(G_ABS)
@@ -832,6 +852,11 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
     return legalizeAtomicCmpxchg128(MI, MRI, Helper);
   case TargetOpcode::G_CTTZ:
     return legalizeCTTZ(MI, Helper);
+  case TargetOpcode::G_BZERO:
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMMOVE:
+  case TargetOpcode::G_MEMSET:
+    return legalizeMemOps(MI, Helper);
   }
 
   llvm_unreachable("expected switch to return");
@@ -989,6 +1014,15 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     MI.eraseFromParent();
     return true;
   }
+  case Intrinsic::aarch64_mops_memset_tag: {
+    assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+    // Zext the value to 64 bit
+    MachineIRBuilder MIB(MI);
+    auto &Value = MI.getOperand(3);
+    Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
+    Value.setReg(ZExtValueReg);
+    return true;
+  }
   }
 
   return true;
@@ -1359,3 +1393,20 @@ bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
   MI.eraseFromParent();
   return true;
 }
+
+bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
+                                          LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+
+  // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
+  if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
+    // Zext the value operand to 64 bit
+    auto &Value = MI.getOperand(1);
+    Register ZExtValueReg =
+        MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
+    Value.setReg(ZExtValueReg);
+    return true;
+  }
+
+  return false;
+}

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index e2c46f4b4c1f..973f96ff4775 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -56,6 +56,7 @@ class AArch64LegalizerInfo : public LegalizerInfo {
   bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI,
                                 LegalizerHelper &Helper) const;
   bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
+  bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
   const AArch64Subtarget *ST;
 };
 } // End llvm namespace.

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
new file mode 100644
index 000000000000..d57c4ceb2765
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
@@ -0,0 +1,243 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte  | FileCheck %s --check-prefix=GISel-O0
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi     -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte  | FileCheck %s --check-prefix=GISel
+
+; Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly
+declare i8* @llvm.aarch64.mops.memset.tag(i8*, i8, i64)
+
+define i8* @memset_tagged_0_zeroval(i8* %dst, i64 %size) {
+; GISel-O0-LABEL: memset_tagged_0_zeroval:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    mov x8, xzr
+; GISel-O0-NEXT:    setgp [x0]!, x8!, x8
+; GISel-O0-NEXT:    setgm [x0]!, x8!, x8
+; GISel-O0-NEXT:    setge [x0]!, x8!, x8
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_0_zeroval:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    mov x8, xzr
+; GISel-NEXT:    setgp [x0]!, x8!, xzr
+; GISel-NEXT:    setgm [x0]!, x8!, xzr
+; GISel-NEXT:    setge [x0]!, x8!, xzr
+; GISel-NEXT:    ret
+entry:
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 0)
+  ret i8* %r
+}
+
+define i8* @memset_tagged_1_zeroval(i8* %dst, i64 %size) {
+; GISel-O0-LABEL: memset_tagged_1_zeroval:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    mov x9, xzr
+; GISel-O0-NEXT:    mov w8, #1
+; GISel-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
+; GISel-O0-NEXT:    setgm [x0]!, x8!, x9
+; GISel-O0-NEXT:    setge [x0]!, x8!, x9
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_1_zeroval:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    mov w8, #1
+; GISel-NEXT:    setgp [x0]!, x8!, xzr
+; GISel-NEXT:    setgm [x0]!, x8!, xzr
+; GISel-NEXT:    setge [x0]!, x8!, xzr
+; GISel-NEXT:    ret
+entry:
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 1)
+  ret i8* %r
+}
+
+define i8* @memset_tagged_10_zeroval(i8* %dst, i64 %size) {
+; GISel-O0-LABEL: memset_tagged_10_zeroval:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    mov x9, xzr
+; GISel-O0-NEXT:    mov w8, #10
+; GISel-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
+; GISel-O0-NEXT:    setgm [x0]!, x8!, x9
+; GISel-O0-NEXT:    setge [x0]!, x8!, x9
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_10_zeroval:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    mov w8, #10
+; GISel-NEXT:    setgp [x0]!, x8!, xzr
+; GISel-NEXT:    setgm [x0]!, x8!, xzr
+; GISel-NEXT:    setge [x0]!, x8!, xzr
+; GISel-NEXT:    ret
+entry:
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10)
+  ret i8* %r
+}
+
+define i8* @memset_tagged_10000_zeroval(i8* %dst, i64 %size) {
+; GISel-O0-LABEL: memset_tagged_10000_zeroval:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    mov x9, xzr
+; GISel-O0-NEXT:    mov w8, #10000
+; GISel-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
+; GISel-O0-NEXT:    setgm [x0]!, x8!, x9
+; GISel-O0-NEXT:    setge [x0]!, x8!, x9
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_10000_zeroval:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    mov w8, #10000
+; GISel-NEXT:    setgp [x0]!, x8!, xzr
+; GISel-NEXT:    setgm [x0]!, x8!, xzr
+; GISel-NEXT:    setge [x0]!, x8!, xzr
+; GISel-NEXT:    ret
+entry:
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10000)
+  ret i8* %r
+}
+
+define i8* @memset_tagged_size_zeroval(i8* %dst, i64 %size) {
+; GISel-O0-LABEL: memset_tagged_size_zeroval:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    mov x8, xzr
+; GISel-O0-NEXT:    setgp [x0]!, x1!, x8
+; GISel-O0-NEXT:    setgm [x0]!, x1!, x8
+; GISel-O0-NEXT:    setge [x0]!, x1!, x8
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_size_zeroval:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    setgp [x0]!, x1!, xzr
+; GISel-NEXT:    setgm [x0]!, x1!, xzr
+; GISel-NEXT:    setge [x0]!, x1!, xzr
+; GISel-NEXT:    ret
+entry:
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 %size)
+  ret i8* %r
+}
+
+define i8* @memset_tagged_0(i8* %dst, i64 %size, i32 %value) {
+; GISel-O0-LABEL: memset_tagged_0:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    // implicit-def: $x9
+; GISel-O0-NEXT:    mov w9, w2
+; GISel-O0-NEXT:    mov x8, xzr
+; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
+; GISel-O0-NEXT:    setgm [x0]!, x8!, x9
+; GISel-O0-NEXT:    setge [x0]!, x8!, x9
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_0:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    mov x8, xzr
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-NEXT:    setgp [x0]!, x8!, x2
+; GISel-NEXT:    setgm [x0]!, x8!, x2
+; GISel-NEXT:    setge [x0]!, x8!, x2
+; GISel-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 0)
+  ret i8* %r
+}
+
+define i8* @memset_tagged_1(i8* %dst, i64 %size, i32 %value) {
+; GISel-O0-LABEL: memset_tagged_1:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    // implicit-def: $x9
+; GISel-O0-NEXT:    mov w9, w2
+; GISel-O0-NEXT:    mov w8, #1
+; GISel-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
+; GISel-O0-NEXT:    setgm [x0]!, x8!, x9
+; GISel-O0-NEXT:    setge [x0]!, x8!, x9
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_1:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    mov w8, #1
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-NEXT:    setgp [x0]!, x8!, x2
+; GISel-NEXT:    setgm [x0]!, x8!, x2
+; GISel-NEXT:    setge [x0]!, x8!, x2
+; GISel-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 1)
+  ret i8* %r
+}
+
+define i8* @memset_tagged_10(i8* %dst, i64 %size, i32 %value) {
+; GISel-O0-LABEL: memset_tagged_10:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    // implicit-def: $x9
+; GISel-O0-NEXT:    mov w9, w2
+; GISel-O0-NEXT:    mov w8, #10
+; GISel-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
+; GISel-O0-NEXT:    setgm [x0]!, x8!, x9
+; GISel-O0-NEXT:    setge [x0]!, x8!, x9
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_10:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    mov w8, #10
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-NEXT:    setgp [x0]!, x8!, x2
+; GISel-NEXT:    setgm [x0]!, x8!, x2
+; GISel-NEXT:    setge [x0]!, x8!, x2
+; GISel-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10)
+  ret i8* %r
+}
+
+define i8* @memset_tagged_10000(i8* %dst, i64 %size, i32 %value) {
+; GISel-O0-LABEL: memset_tagged_10000:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    // implicit-def: $x9
+; GISel-O0-NEXT:    mov w9, w2
+; GISel-O0-NEXT:    mov w8, #10000
+; GISel-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
+; GISel-O0-NEXT:    setgm [x0]!, x8!, x9
+; GISel-O0-NEXT:    setge [x0]!, x8!, x9
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_10000:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    mov w8, #10000
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-NEXT:    setgp [x0]!, x8!, x2
+; GISel-NEXT:    setgm [x0]!, x8!, x2
+; GISel-NEXT:    setge [x0]!, x8!, x2
+; GISel-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10000)
+  ret i8* %r
+}
+
+define i8* @memset_tagged_size(i8* %dst, i64 %size, i32 %value) {
+; GISel-O0-LABEL: memset_tagged_size:
+; GISel-O0:       // %bb.0: // %entry
+; GISel-O0-NEXT:    // implicit-def: $x8
+; GISel-O0-NEXT:    mov w8, w2
+; GISel-O0-NEXT:    setgp [x0]!, x1!, x8
+; GISel-O0-NEXT:    setgm [x0]!, x1!, x8
+; GISel-O0-NEXT:    setge [x0]!, x1!, x8
+; GISel-O0-NEXT:    ret
+;
+; GISel-LABEL: memset_tagged_size:
+; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-NEXT:    setgp [x0]!, x1!, x2
+; GISel-NEXT:    setgm [x0]!, x1!, x2
+; GISel-NEXT:    setge [x0]!, x1!, x2
+; GISel-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 %size)
+  ret i8* %r
+}

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
new file mode 100644
index 000000000000..4bd8383d38e2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -0,0 +1,1094 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1                    | FileCheck %s --check-prefixes=GISel-WITHOUT-MOPS,GISel-WITHOUT-MOPS-O0
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi     -global-isel=1 -global-isel-abort=1                    | FileCheck %s --check-prefixes=GISel-WITHOUT-MOPS,GISel-WITHOUT-MOPS-O3
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops       | FileCheck %s --check-prefixes=GISel-MOPS,GISel-MOPS-O0
+; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi     -global-isel=1 -global-isel-abort=1 -mattr=+mops       | FileCheck %s --check-prefixes=GISel-MOPS,GISel-MOPS-O3
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg)
+
+declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg)
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg)
+
+define void @memset_0_zeroval(i8* %dst) {
+; GISel-WITHOUT-MOPS-LABEL: memset_0_zeroval:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memset_0_zeroval:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false)
+  ret void
+}
+
+define void @memset_0_zeroval_volatile(i8* %dst) {
+; GISel-WITHOUT-MOPS-LABEL: memset_0_zeroval_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memset_0_zeroval_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true)
+  ret void
+}
+
+define void @memset_10_zeroval(i8* %dst) {
+; GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    str xzr, [x0]
+; GISel-WITHOUT-MOPS-NEXT:    strh wzr, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memset_10_zeroval:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    str xzr, [x0]
+; GISel-MOPS-NEXT:    strh wzr, [x0, #8]
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 false)
+  ret void
+}
+
+define void @memset_10_zeroval_volatile(i8* %dst) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_zeroval_volatile:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w1, wzr
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_zeroval_volatile:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w1, wzr
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_10_zeroval_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov x9, xzr
+; GISel-MOPS-O0-NEXT:    mov w8, #10
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_10_zeroval_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #10
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, xzr
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, xzr
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, xzr
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 true)
+  ret void
+}
+
+define void @memset_10000_zeroval(i8* %dst) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_10000_zeroval:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w1, wzr
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10000
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_10000_zeroval:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w1, wzr
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10000
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_10000_zeroval:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov x9, xzr
+; GISel-MOPS-O0-NEXT:    mov w8, #10000
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_10000_zeroval:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #10000
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, xzr
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, xzr
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, xzr
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 false)
+  ret void
+}
+
+define void @memset_10000_zeroval_volatile(i8* %dst) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_10000_zeroval_volatile:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w1, wzr
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10000
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_10000_zeroval_volatile:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w1, wzr
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10000
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_10000_zeroval_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov x9, xzr
+; GISel-MOPS-O0-NEXT:    mov w8, #10000
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_10000_zeroval_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #10000
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, xzr
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, xzr
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, xzr
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 true)
+  ret void
+}
+
+define void @memset_size_zeroval(i8* %dst, i64 %size) {
+; GISel-WITHOUT-MOPS-LABEL: memset_size_zeroval:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-NEXT:    mov x2, x1
+; GISel-WITHOUT-MOPS-NEXT:    mov w1, wzr
+; GISel-WITHOUT-MOPS-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_size_zeroval:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov x8, xzr
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_size_zeroval:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x1!, xzr
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, xzr
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, xzr
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 false)
+  ret void
+}
+
+define void @memset_size_zeroval_volatile(i8* %dst, i64 %size) {
+; GISel-WITHOUT-MOPS-LABEL: memset_size_zeroval_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-NEXT:    mov x2, x1
+; GISel-WITHOUT-MOPS-NEXT:    mov w1, wzr
+; GISel-WITHOUT-MOPS-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_size_zeroval_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov x8, xzr
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_size_zeroval_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x1!, xzr
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, xzr
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, xzr
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 true)
+  ret void
+}
+
+define void @memset_0(i8* %dst, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memset_0:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memset_0:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false)
+  ret void
+}
+
+define void @memset_0_volatile(i8* %dst, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memset_0_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memset_0_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true)
+  ret void
+}
+
+define void @memset_10(i8* %dst, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_10:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    // implicit-def: $x8
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, w1
+; GISel-WITHOUT-MOPS-O0-NEXT:    and x8, x8, #0xff
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov x9, #72340172838076673
+; GISel-WITHOUT-MOPS-O0-NEXT:    mul x8, x8, x9
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT:    // kill: def $w8 killed $w8 killed $x8
+; GISel-WITHOUT-MOPS-O0-NEXT:    strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_10:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov x8, #72340172838076673
+; GISel-WITHOUT-MOPS-O3-NEXT:    and x9, x1, #0xff
+; GISel-WITHOUT-MOPS-O3-NEXT:    mul x8, x9, x8
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT:    strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_10:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    // implicit-def: $x8
+; GISel-MOPS-O0-NEXT:    mov w8, w1
+; GISel-MOPS-O0-NEXT:    and x8, x8, #0xff
+; GISel-MOPS-O0-NEXT:    mov x9, #72340172838076673
+; GISel-MOPS-O0-NEXT:    mul x8, x8, x9
+; GISel-MOPS-O0-NEXT:    str x8, [x0]
+; GISel-MOPS-O0-NEXT:    // kill: def $w8 killed $w8 killed $x8
+; GISel-MOPS-O0-NEXT:    strh w8, [x0, #8]
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_10:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GISel-MOPS-O3-NEXT:    mov x8, #72340172838076673
+; GISel-MOPS-O3-NEXT:    and x9, x1, #0xff
+; GISel-MOPS-O3-NEXT:    mul x8, x9, x8
+; GISel-MOPS-O3-NEXT:    str x8, [x0]
+; GISel-MOPS-O3-NEXT:    strh w8, [x0, #8]
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 false)
+  ret void
+}
+
+define void @memset_10_volatile(i8* %dst, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_volatile:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_volatile:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_10_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    // implicit-def: $x9
+; GISel-MOPS-O0-NEXT:    mov w9, w1
+; GISel-MOPS-O0-NEXT:    mov w8, #10
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_10_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #10
+; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 true)
+  ret void
+}
+
+define void @memset_10000(i8* %dst, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_10000:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10000
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_10000:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10000
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_10000:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    // implicit-def: $x9
+; GISel-MOPS-O0-NEXT:    mov w9, w1
+; GISel-MOPS-O0-NEXT:    mov w8, #10000
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_10000:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #10000
+; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 false)
+  ret void
+}
+
+define void @memset_10000_volatile(i8* %dst, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_10000_volatile:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10000
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_10000_volatile:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10000
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_10000_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    // implicit-def: $x9
+; GISel-MOPS-O0-NEXT:    mov w9, w1
+; GISel-MOPS-O0-NEXT:    mov w8, #10000
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_10000_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #10000
+; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 true)
+  ret void
+}
+
+define void @memset_size(i8* %dst, i64 %size, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_size:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    sub sp, sp, #32
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 32
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x1, [sp, #8] // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w1, w2
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x2, [sp, #8] // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    add sp, sp, #32
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_size:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov x3, x1
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w1, w2
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov x2, x3
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_size:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    // implicit-def: $x8
+; GISel-MOPS-O0-NEXT:    mov w8, w2
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_size:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x1!, x2
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, x2
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, x2
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 false)
+  ret void
+}
+
+define void @memset_size_volatile(i8* %dst, i64 %size, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_size_volatile:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    sub sp, sp, #32
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 32
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x1, [sp, #8] // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w1, w2
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x2, [sp, #8] // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    add sp, sp, #32
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_size_volatile:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov x3, x1
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w1, w2
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov x2, x3
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memset_size_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    // implicit-def: $x8
+; GISel-MOPS-O0-NEXT:    mov w8, w2
+; GISel-MOPS-O0-NEXT:    setp [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    setm [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    sete [x0]!, x1!, x8
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memset_size_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-MOPS-O3-NEXT:    setp [x0]!, x1!, x2
+; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, x2
+; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, x2
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  %value_trunc = trunc i32 %value to i8
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 true)
+  ret void
+}
+
+define void @memcpy_0(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_0:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_0:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false)
+  ret void
+}
+
+define void @memcpy_0_volatile(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_0_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_0_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true)
+  ret void
+}
+
+define void @memcpy_10(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_10:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ldr x8, [x1]
+; GISel-WITHOUT-MOPS-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-NEXT:    ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-NEXT:    strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_10:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ldr x8, [x1]
+; GISel-MOPS-NEXT:    str x8, [x0]
+; GISel-MOPS-NEXT:    ldrh w8, [x1, #8]
+; GISel-MOPS-NEXT:    strh w8, [x0, #8]
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false)
+  ret void
+}
+
+define void @memcpy_10_volatile(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_10_volatile:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memcpy
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_10_volatile:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memcpy
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memcpy_10_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov w8, #10
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memcpy_10_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #10
+; GISel-MOPS-O3-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true)
+  ret void
+}
+
+define void @memcpy_1000(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_1000:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #1000
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memcpy
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_1000:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #1000
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memcpy
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memcpy_1000:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov w8, #1000
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memcpy_1000:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #1000
+; GISel-MOPS-O3-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false)
+  ret void
+}
+
+define void @memcpy_1000_volatile(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_1000_volatile:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #1000
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memcpy
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_1000_volatile:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #1000
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memcpy
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memcpy_1000_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov w8, #1000
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memcpy_1000_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #1000
+; GISel-MOPS-O3-NEXT:    cpyfp [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpyfm [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpyfe [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true)
+  ret void
+}
+
+define void @memcpy_n(i8* %dst, i8* %src, i64 %size, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_n:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-NEXT:    bl memcpy
+; GISel-WITHOUT-MOPS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_n:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    cpyfp [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    cpyfm [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    cpyfe [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false)
+  ret void
+}
+
+define void @memcpy_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_n_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-NEXT:    bl memcpy
+; GISel-WITHOUT-MOPS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_n_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    cpyfp [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    cpyfm [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    cpyfe [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true)
+  ret void
+}
+
+define void @memcpy_inline_0(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_inline_0:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false)
+  ret void
+}
+
+define void @memcpy_inline_0_volatile(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_inline_0_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true)
+  ret void
+}
+
+define void @memcpy_inline_10(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_10:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ldr x8, [x1]
+; GISel-WITHOUT-MOPS-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-NEXT:    ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-NEXT:    strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_inline_10:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ldr x8, [x1]
+; GISel-MOPS-NEXT:    str x8, [x0]
+; GISel-MOPS-NEXT:    ldrh w8, [x1, #8]
+; GISel-MOPS-NEXT:    strh w8, [x0, #8]
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false)
+  ret void
+}
+
+define void @memcpy_inline_10_volatile(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_10_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ldr x8, [x1]
+; GISel-WITHOUT-MOPS-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-NEXT:    ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-NEXT:    strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memcpy_inline_10_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ldr x8, [x1]
+; GISel-MOPS-NEXT:    str x8, [x0]
+; GISel-MOPS-NEXT:    ldrh w8, [x1, #8]
+; GISel-MOPS-NEXT:    strh w8, [x0, #8]
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true)
+  ret void
+}
+
+define void @memmove_0(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memmove_0:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memmove_0:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false)
+  ret void
+}
+
+define void @memmove_0_volatile(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memmove_0_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memmove_0_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true)
+  ret void
+}
+
+define void @memmove_10(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memmove_10:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x9, [x1]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x9, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT:    strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memmove_10:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x8, [x1]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldrh w9, [x1, #8]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT:    strh w9, [x0, #8]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memmove_10:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    ldr x9, [x1]
+; GISel-MOPS-O0-NEXT:    ldrh w8, [x1, #8]
+; GISel-MOPS-O0-NEXT:    str x9, [x0]
+; GISel-MOPS-O0-NEXT:    strh w8, [x0, #8]
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memmove_10:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    ldr x8, [x1]
+; GISel-MOPS-O3-NEXT:    ldrh w9, [x1, #8]
+; GISel-MOPS-O3-NEXT:    str x8, [x0]
+; GISel-MOPS-O3-NEXT:    strh w9, [x0, #8]
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false)
+  ret void
+}
+
+define void @memmove_10_volatile(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memmove_10_volatile:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memmove
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memmove_10_volatile:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memmove
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memmove_10_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov w8, #10
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    cpyp [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpym [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memmove_10_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #10
+; GISel-MOPS-O3-NEXT:    cpyp [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpym [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true)
+  ret void
+}
+
+define void @memmove_1000(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memmove_1000:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #1000
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memmove
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memmove_1000:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #1000
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memmove
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memmove_1000:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov w8, #1000
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    cpyp [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpym [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memmove_1000:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #1000
+; GISel-MOPS-O3-NEXT:    cpyp [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpym [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false)
+  ret void
+}
+
+define void @memmove_1000_volatile(i8* %dst, i8* %src, i32 %value) {
+; GISel-WITHOUT-MOPS-O0-LABEL: memmove_1000_volatile:
+; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #1000
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
+; GISel-WITHOUT-MOPS-O0-NEXT:    bl memmove
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ret
+;
+; GISel-WITHOUT-MOPS-O3-LABEL: memmove_1000_volatile:
+; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #1000
+; GISel-WITHOUT-MOPS-O3-NEXT:    bl memmove
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ret
+;
+; GISel-MOPS-O0-LABEL: memmove_1000_volatile:
+; GISel-MOPS-O0:       // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT:    mov w8, #1000
+; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
+; GISel-MOPS-O0-NEXT:    cpyp [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpym [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    ret
+;
+; GISel-MOPS-O3-LABEL: memmove_1000_volatile:
+; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    mov w8, #1000
+; GISel-MOPS-O3-NEXT:    cpyp [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpym [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    ret
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true)
+  ret void
+}
+
+define void @memmove_n(i8* %dst, i8* %src, i64 %size, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memmove_n:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-NEXT:    bl memmove
+; GISel-WITHOUT-MOPS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memmove_n:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    cpyp [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    cpym [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    cpye [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false)
+  ret void
+}
+
+define void @memmove_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) {
+; GISel-WITHOUT-MOPS-LABEL: memmove_n_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_def_cfa_offset 16
+; GISel-WITHOUT-MOPS-NEXT:    .cfi_offset w30, -16
+; GISel-WITHOUT-MOPS-NEXT:    bl memmove
+; GISel-WITHOUT-MOPS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-NEXT:    ret
+;
+; GISel-MOPS-LABEL: memmove_n_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    cpyp [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    cpym [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    cpye [x0]!, [x1]!, x2!
+; GISel-MOPS-NEXT:    ret
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true)
+  ret void
+}

diff  --git a/llvm/unittests/Target/AArch64/InstSizes.cpp b/llvm/unittests/Target/AArch64/InstSizes.cpp
index e8528bb2445d..540e248bad6e 100644
--- a/llvm/unittests/Target/AArch64/InstSizes.cpp
+++ b/llvm/unittests/Target/AArch64/InstSizes.cpp
@@ -155,3 +155,17 @@ TEST(InstSizes, TLSDESC_CALLSEQ) {
         EXPECT_EQ(16u, II.getInstSizeInBytes(*I));
       });
 }
+
+TEST(InstSizes, MOPSMemorySetTaggingPseudo) {
+  std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine();
+  std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
+
+  runChecks(TM.get(), II.get(), "",
+            "  renamable $x0, dead renamable $x1 = MOPSMemorySetTaggingPseudo "
+            "killed renamable $x0, killed renamable $x1, killed renamable $x2, "
+            "implicit-def dead $nzcv\n",
+            [](AArch64InstrInfo &II, MachineFunction &MF) {
+              auto I = MF.begin()->begin();
+              EXPECT_EQ(12u, II.getInstSizeInBytes(*I));
+            });
+}


        


More information about the llvm-commits mailing list