[llvm] [ARM][AArch64] Allow the CSE to take into consideration uses of the carry and overflow flags in ARM and AArch64 (PR #150803)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 26 18:21:55 PDT 2025


https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/150803

>From f2c9da6498310c66ec9c67c841006dafc449a3c2 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 26 Jul 2025 18:01:24 -0400
Subject: [PATCH] [ARM][AArch64] Allow the CSE to take into consideration uses
 of the carry and overflow flags in ARM and AArch64

On both of these platforms, we know that the cmp will not stomp on these flags and overwrite them if doing so would be poison, or in ANDS case, it will always have the V flag cleared during an ANDS.
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  | 5952 +++++++++--------
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp      |   30 +-
 llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll |  104 +
 3 files changed, 3144 insertions(+), 2942 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8685d7a04ac9c..6403c1b446cb8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1745,8 +1745,24 @@ static unsigned sForm(MachineInstr &Instr) {
     return AArch64::SBCSXr;
   case AArch64::ANDWri:
     return AArch64::ANDSWri;
+  case AArch64::ANDWrr:
+    return AArch64::ANDSWrr;
+  case AArch64::ANDWrs:
+    return AArch64::ANDSWrs;
+  case AArch64::BICWrr:
+    return AArch64::BICSWrr;
+  case AArch64::BICWrs:
+    return AArch64::BICSWrs;
   case AArch64::ANDXri:
     return AArch64::ANDSXri;
+  case AArch64::ANDXrr:
+    return AArch64::ANDSXrr;
+  case AArch64::ANDXrs:
+    return AArch64::ANDSXrs;
+  case AArch64::BICXrr:
+    return AArch64::BICSXrr;
+  case AArch64::BICXrs:
+    return AArch64::BICSXrs;
   }
 }
 
@@ -1884,3139 +1900,3197 @@ static bool isSUBSRegImm(unsigned Opcode) {
   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
 }
 
-/// Check if CmpInstr can be substituted by MI.
-///
-/// CmpInstr can be substituted:
-/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
-/// - and, MI and CmpInstr are from the same MachineBB
-/// - and, condition flags are not alive in successors of the CmpInstr parent
-/// - and, if MI opcode is the S form there must be no defs of flags between
-///        MI and CmpInstr
-///        or if MI opcode is not the S form there must be neither defs of flags
-///        nor uses of flags between MI and CmpInstr.
-/// - and, if C/V flags are not used after CmpInstr
-///        or if N flag is used but MI produces poison value if signed overflow
-///        occurs.
-static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
-                                       const TargetRegisterInfo &TRI) {
-  // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
-  // that may or may not set flags.
-  assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
-
-  const unsigned CmpOpcode = CmpInstr.getOpcode();
-  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
+static bool isANDSOpcode(MachineInstr &MI) {
+  switch (sForm(MI)) {
+  case AArch64::ANDSWri:
+  case AArch64::ANDSWrr:
+  case AArch64::ANDSWrs:
+  case AArch64::ANDSXri:
+  case AArch64::ANDSXrr:
+  case AArch64::ANDSXrs:
+  case AArch64::BICSWrr:
+  case AArch64::BICSWrs:
+  case AArch64::BICSXrr:
+  case AArch64::BICSXrs:
+    return true;
+  default:
     return false;
+  }
 
-  assert((CmpInstr.getOperand(2).isImm() &&
-          CmpInstr.getOperand(2).getImm() == 0) &&
-         "Caller guarantees that CmpInstr compares with constant 0");
+  /// Check if CmpInstr can be substituted by MI.
+  ///
+  /// CmpInstr can be substituted:
+  /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
+  /// - and, MI and CmpInstr are from the same MachineBB
+  /// - and, condition flags are not alive in successors of the CmpInstr parent
+  /// - and, if MI opcode is the S form there must be no defs of flags between
+  ///        MI and CmpInstr
+  ///        or if MI opcode is not the S form there must be neither defs of
+  ///        flags nor uses of flags between MI and CmpInstr.
+  /// - and, if C/V flags are not used after CmpInstr
+  ///        or if N flag is used but MI produces poison value if signed
+  ///        overflow occurs.
+  static bool canInstrSubstituteCmpInstr(MachineInstr & MI,
+                                         MachineInstr & CmpInstr,
+                                         const TargetRegisterInfo &TRI) {
+    // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
+    // that may or may not set flags.
+    assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
+
+    const unsigned CmpOpcode = CmpInstr.getOpcode();
+    if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
+      return false;
 
-  std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
-  if (!NZVCUsed || NZVCUsed->C)
-    return false;
+    assert((CmpInstr.getOperand(2).isImm() &&
+            CmpInstr.getOperand(2).getImm() == 0) &&
+           "Caller guarantees that CmpInstr compares with constant 0");
 
-  // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
-  // '%vreg = add ...' or '%vreg = sub ...'.
-  // Condition flag V is used to indicate signed overflow.
-  // 1) MI and CmpInstr set N and V to the same value.
-  // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
-  //    signed overflow occurs, so CmpInstr could still be simplified away.
-  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
-    return false;
+    std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
+    if (!NZVCUsed)
+      return false;
 
-  AccessKind AccessToCheck = AK_Write;
-  if (sForm(MI) != MI.getOpcode())
-    AccessToCheck = AK_All;
-  return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
-}
+    // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
+    // '%vreg = add ...' or '%vreg = sub ...'.
+    // Condition flag C is used to indicate unsigned overflow.
+    // 1) MI and CmpInstr set N and C to the same value if Cmp is an adds
+    // 2) ADDS x, 0, always sets C to 0.
+    // In practice we should not really get here, as an unsigned comparison with
+    // 0 should have been optimized out anyway, but just in case.
+    if (NZVCUsed->C && !isADDSRegImm(CmpOpcode))
+      return false;
 
-/// Substitute an instruction comparing to zero with another instruction
-/// which produces needed condition flags.
-///
-/// Return true on success.
-bool AArch64InstrInfo::substituteCmpToZero(
-    MachineInstr &CmpInstr, unsigned SrcReg,
-    const MachineRegisterInfo &MRI) const {
-  // Get the unique definition of SrcReg.
-  MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
-  if (!MI)
-    return false;
+    // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
+    // '%vreg = add ...' or '%vreg = sub ...'.
+    // Condition flag V is used to indicate signed overflow.
+    // 1) MI and CmpInstr set N and V to the same value.
+    // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
+    //    signed overflow occurs, so CmpInstr could still be simplified away.
+    // 3) ANDS also always sets V to 0.
+    if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDSOpcode(MI))
+      return false;
 
-  const TargetRegisterInfo &TRI = getRegisterInfo();
+    AccessKind AccessToCheck = AK_Write;
+    if (sForm(MI) != MI.getOpcode())
+      AccessToCheck = AK_All;
+    return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
+  }
+
+  /// Substitute an instruction comparing to zero with another instruction
+  /// which produces needed condition flags.
+  ///
+  /// Return true on success.
+  bool AArch64InstrInfo::substituteCmpToZero(
+      MachineInstr & CmpInstr, unsigned SrcReg, const MachineRegisterInfo &MRI)
+      const {
+    // Get the unique definition of SrcReg.
+    MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
+    if (!MI)
+      return false;
 
-  unsigned NewOpc = sForm(*MI);
-  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
-    return false;
+    const TargetRegisterInfo &TRI = getRegisterInfo();
 
-  if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
-    return false;
+    unsigned NewOpc = sForm(*MI);
+    if (NewOpc == AArch64::INSTRUCTION_LIST_END)
+      return false;
 
-  // Update the instruction to set NZCV.
-  MI->setDesc(get(NewOpc));
-  CmpInstr.eraseFromParent();
-  bool succeeded = UpdateOperandRegClass(*MI);
-  (void)succeeded;
-  assert(succeeded && "Some operands reg class are incompatible!");
-  MI->addRegisterDefined(AArch64::NZCV, &TRI);
-  return true;
-}
+    if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
+      return false;
 
-/// \returns True if \p CmpInstr can be removed.
-///
-/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
-/// codes used in \p CCUseInstrs must be inverted.
-static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
-                                 int CmpValue, const TargetRegisterInfo &TRI,
-                                 SmallVectorImpl<MachineInstr *> &CCUseInstrs,
-                                 bool &IsInvertCC) {
-  assert((CmpValue == 0 || CmpValue == 1) &&
-         "Only comparisons to 0 or 1 considered for removal!");
-
-  // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
-  unsigned MIOpc = MI.getOpcode();
-  if (MIOpc == AArch64::CSINCWr) {
-    if (MI.getOperand(1).getReg() != AArch64::WZR ||
-        MI.getOperand(2).getReg() != AArch64::WZR)
+    // Update the instruction to set NZCV.
+    MI->setDesc(get(NewOpc));
+    CmpInstr.eraseFromParent();
+    bool succeeded = UpdateOperandRegClass(*MI);
+    (void)succeeded;
+    assert(succeeded && "Some operands reg class are incompatible!");
+    MI->addRegisterDefined(AArch64::NZCV, &TRI);
+    return true;
+  }
+
+  /// \returns True if \p CmpInstr can be removed.
+  ///
+  /// \p IsInvertCC is true if, after removing \p CmpInstr, condition
+  /// codes used in \p CCUseInstrs must be inverted.
+  static bool canCmpInstrBeRemoved(MachineInstr & MI, MachineInstr & CmpInstr,
+                                   int CmpValue, const TargetRegisterInfo &TRI,
+                                   SmallVectorImpl<MachineInstr *> &CCUseInstrs,
+                                   bool &IsInvertCC) {
+    assert((CmpValue == 0 || CmpValue == 1) &&
+           "Only comparisons to 0 or 1 considered for removal!");
+
+    // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
+    unsigned MIOpc = MI.getOpcode();
+    if (MIOpc == AArch64::CSINCWr) {
+      if (MI.getOperand(1).getReg() != AArch64::WZR ||
+          MI.getOperand(2).getReg() != AArch64::WZR)
+        return false;
+    } else if (MIOpc == AArch64::CSINCXr) {
+      if (MI.getOperand(1).getReg() != AArch64::XZR ||
+          MI.getOperand(2).getReg() != AArch64::XZR)
+        return false;
+    } else {
       return false;
-  } else if (MIOpc == AArch64::CSINCXr) {
-    if (MI.getOperand(1).getReg() != AArch64::XZR ||
-        MI.getOperand(2).getReg() != AArch64::XZR)
+    }
+    AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
+    if (MICC == AArch64CC::Invalid)
       return false;
-  } else {
-    return false;
-  }
-  AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
-  if (MICC == AArch64CC::Invalid)
-    return false;
 
-  // NZCV needs to be defined
-  if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
-    return false;
+    // NZCV needs to be defined
+    if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) !=
+        -1)
+      return false;
 
-  // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
-  const unsigned CmpOpcode = CmpInstr.getOpcode();
-  bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
-  if (CmpValue && !IsSubsRegImm)
-    return false;
-  if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
-    return false;
+    // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
+    const unsigned CmpOpcode = CmpInstr.getOpcode();
+    bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
+    if (CmpValue && !IsSubsRegImm)
+      return false;
+    if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
+      return false;
 
-  // MI conditions allowed: eq, ne, mi, pl
-  UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
-  if (MIUsedNZCV.C || MIUsedNZCV.V)
-    return false;
+    // MI conditions allowed: eq, ne, mi, pl
+    UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
+    if (MIUsedNZCV.C || MIUsedNZCV.V)
+      return false;
 
-  std::optional<UsedNZCV> NZCVUsedAfterCmp =
-      examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
-  // Condition flags are not used in CmpInstr basic block successors and only
-  // Z or N flags allowed to be used after CmpInstr within its basic block
-  if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
-    return false;
-  // Z or N flag used after CmpInstr must correspond to the flag used in MI
-  if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
-      (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
-    return false;
-  // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
-  if (MIUsedNZCV.N && !CmpValue)
-    return false;
+    std::optional<UsedNZCV> NZCVUsedAfterCmp =
+        examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
+    // Condition flags are not used in CmpInstr basic block successors and only
+    // Z or N flags allowed to be used after CmpInstr within its basic block
+    if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
+      return false;
+    // Z or N flag used after CmpInstr must correspond to the flag used in MI
+    if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
+        (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
+      return false;
+    // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
+    if (MIUsedNZCV.N && !CmpValue)
+      return false;
 
-  // There must be no defs of flags between MI and CmpInstr
-  if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
-    return false;
+    // There must be no defs of flags between MI and CmpInstr
+    if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
+      return false;
 
-  // Condition code is inverted in the following cases:
-  // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
-  // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
-  IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
-               (!CmpValue && MICC == AArch64CC::NE);
-  return true;
-}
+    // Condition code is inverted in the following cases:
+    // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
+    // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
+    IsInvertCC =
+        (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
+        (!CmpValue && MICC == AArch64CC::NE);
+    return true;
+  }
 
-/// Remove comparison in csinc-cmp sequence
-///
-/// Examples:
-/// 1. \code
-///   csinc w9, wzr, wzr, ne
-///   cmp   w9, #0
-///   b.eq
-///    \endcode
-/// to
-///    \code
-///   csinc w9, wzr, wzr, ne
-///   b.ne
-///    \endcode
-///
-/// 2. \code
-///   csinc x2, xzr, xzr, mi
-///   cmp   x2, #1
-///   b.pl
-///    \endcode
-/// to
-///    \code
-///   csinc x2, xzr, xzr, mi
-///   b.pl
-///    \endcode
-///
-/// \param  CmpInstr comparison instruction
-/// \return True when comparison removed
-bool AArch64InstrInfo::removeCmpToZeroOrOne(
-    MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
-    const MachineRegisterInfo &MRI) const {
-  MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
-  if (!MI)
-    return false;
-  const TargetRegisterInfo &TRI = getRegisterInfo();
-  SmallVector<MachineInstr *, 4> CCUseInstrs;
-  bool IsInvertCC = false;
-  if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
-                            IsInvertCC))
-    return false;
-  // Make transformation
-  CmpInstr.eraseFromParent();
-  if (IsInvertCC) {
-    // Invert condition codes in CmpInstr CC users
-    for (MachineInstr *CCUseInstr : CCUseInstrs) {
-      int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
-      assert(Idx >= 0 && "Unexpected instruction using CC.");
-      MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
-      AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
-          static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
-      CCOperand.setImm(CCUse);
+  /// Remove comparison in csinc-cmp sequence
+  ///
+  /// Examples:
+  /// 1. \code
+  ///   csinc w9, wzr, wzr, ne
+  ///   cmp   w9, #0
+  ///   b.eq
+  ///    \endcode
+  /// to
+  ///    \code
+  ///   csinc w9, wzr, wzr, ne
+  ///   b.ne
+  ///    \endcode
+  ///
+  /// 2. \code
+  ///   csinc x2, xzr, xzr, mi
+  ///   cmp   x2, #1
+  ///   b.pl
+  ///    \endcode
+  /// to
+  ///    \code
+  ///   csinc x2, xzr, xzr, mi
+  ///   b.pl
+  ///    \endcode
+  ///
+  /// \param  CmpInstr comparison instruction
+  /// \return True when comparison removed
+  bool AArch64InstrInfo::removeCmpToZeroOrOne(
+      MachineInstr & CmpInstr, unsigned SrcReg, int CmpValue,
+      const MachineRegisterInfo &MRI) const {
+    MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
+    if (!MI)
+      return false;
+    const TargetRegisterInfo &TRI = getRegisterInfo();
+    SmallVector<MachineInstr *, 4> CCUseInstrs;
+    bool IsInvertCC = false;
+    if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
+                              IsInvertCC))
+      return false;
+    // Make transformation
+    CmpInstr.eraseFromParent();
+    if (IsInvertCC) {
+      // Invert condition codes in CmpInstr CC users
+      for (MachineInstr *CCUseInstr : CCUseInstrs) {
+        int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
+        assert(Idx >= 0 && "Unexpected instruction using CC.");
+        MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
+        AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
+            static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
+        CCOperand.setImm(CCUse);
+      }
     }
+    return true;
   }
-  return true;
-}
 
-bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
-  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
-      MI.getOpcode() != AArch64::CATCHRET)
-    return false;
+  bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr & MI) const {
+    if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
+        MI.getOpcode() != AArch64::CATCHRET)
+      return false;
 
-  MachineBasicBlock &MBB = *MI.getParent();
-  auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
-  auto TRI = Subtarget.getRegisterInfo();
-  DebugLoc DL = MI.getDebugLoc();
-
-  if (MI.getOpcode() == AArch64::CATCHRET) {
-    // Skip to the first instruction before the epilog.
-    const TargetInstrInfo *TII =
-      MBB.getParent()->getSubtarget().getInstrInfo();
-    MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
-    auto MBBI = MachineBasicBlock::iterator(MI);
-    MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
-    while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
-           FirstEpilogSEH != MBB.begin())
-      FirstEpilogSEH = std::prev(FirstEpilogSEH);
-    if (FirstEpilogSEH != MBB.begin())
-      FirstEpilogSEH = std::next(FirstEpilogSEH);
-    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
-        .addReg(AArch64::X0, RegState::Define)
-        .addMBB(TargetMBB);
-    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
-        .addReg(AArch64::X0, RegState::Define)
-        .addReg(AArch64::X0)
-        .addMBB(TargetMBB)
-        .addImm(0);
-    TargetMBB->setMachineBlockAddressTaken();
-    return true;
-  }
+    MachineBasicBlock &MBB = *MI.getParent();
+    auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
+    auto TRI = Subtarget.getRegisterInfo();
+    DebugLoc DL = MI.getDebugLoc();
+
+    if (MI.getOpcode() == AArch64::CATCHRET) {
+      // Skip to the first instruction before the epilog.
+      const TargetInstrInfo *TII =
+          MBB.getParent()->getSubtarget().getInstrInfo();
+      MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
+      auto MBBI = MachineBasicBlock::iterator(MI);
+      MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
+      while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
+             FirstEpilogSEH != MBB.begin())
+        FirstEpilogSEH = std::prev(FirstEpilogSEH);
+      if (FirstEpilogSEH != MBB.begin())
+        FirstEpilogSEH = std::next(FirstEpilogSEH);
+      BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
+          .addReg(AArch64::X0, RegState::Define)
+          .addMBB(TargetMBB);
+      BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
+          .addReg(AArch64::X0, RegState::Define)
+          .addReg(AArch64::X0)
+          .addMBB(TargetMBB)
+          .addImm(0);
+      TargetMBB->setMachineBlockAddressTaken();
+      return true;
+    }
 
-  Register Reg = MI.getOperand(0).getReg();
-  Module &M = *MBB.getParent()->getFunction().getParent();
-  if (M.getStackProtectorGuard() == "sysreg") {
-    const AArch64SysReg::SysReg *SrcReg =
-        AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
-    if (!SrcReg)
-      report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
-
-    // mrs xN, sysreg
-    BuildMI(MBB, MI, DL, get(AArch64::MRS))
-        .addDef(Reg, RegState::Renamable)
-        .addImm(SrcReg->Encoding);
-    int Offset = M.getStackProtectorGuardOffset();
-    if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
-      // ldr xN, [xN, #offset]
-      BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
-          .addDef(Reg)
-          .addUse(Reg, RegState::Kill)
-          .addImm(Offset / 8);
-    } else if (Offset >= -256 && Offset <= 255) {
-      // ldur xN, [xN, #offset]
-      BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
-          .addDef(Reg)
-          .addUse(Reg, RegState::Kill)
-          .addImm(Offset);
-    } else if (Offset >= -4095 && Offset <= 4095) {
-      if (Offset > 0) {
-        // add xN, xN, #offset
-        BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
+    Register Reg = MI.getOperand(0).getReg();
+    Module &M = *MBB.getParent()->getFunction().getParent();
+    if (M.getStackProtectorGuard() == "sysreg") {
+      const AArch64SysReg::SysReg *SrcReg =
+          AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
+      if (!SrcReg)
+        report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
+
+      // mrs xN, sysreg
+      BuildMI(MBB, MI, DL, get(AArch64::MRS))
+          .addDef(Reg, RegState::Renamable)
+          .addImm(SrcReg->Encoding);
+      int Offset = M.getStackProtectorGuardOffset();
+      if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
+        // ldr xN, [xN, #offset]
+        BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
             .addDef(Reg)
             .addUse(Reg, RegState::Kill)
-            .addImm(Offset)
-            .addImm(0);
-      } else {
-        // sub xN, xN, #offset
-        BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
+            .addImm(Offset / 8);
+      } else if (Offset >= -256 && Offset <= 255) {
+        // ldur xN, [xN, #offset]
+        BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
+            .addDef(Reg)
+            .addUse(Reg, RegState::Kill)
+            .addImm(Offset);
+      } else if (Offset >= -4095 && Offset <= 4095) {
+        if (Offset > 0) {
+          // add xN, xN, #offset
+          BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
+              .addDef(Reg)
+              .addUse(Reg, RegState::Kill)
+              .addImm(Offset)
+              .addImm(0);
+        } else {
+          // sub xN, xN, #offset
+          BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
+              .addDef(Reg)
+              .addUse(Reg, RegState::Kill)
+              .addImm(-Offset)
+              .addImm(0);
+        }
+        // ldr xN, [xN]
+        BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
             .addDef(Reg)
             .addUse(Reg, RegState::Kill)
-            .addImm(-Offset)
             .addImm(0);
+      } else {
+        // Cases that are larger than +/- 4095 and not a multiple of 8, or
+        // larger than 23760. It might be nice to use AArch64::MOVi32imm here,
+        // which would get expanded in PreSched2 after PostRA, but our lone
+        // scratch Reg already contains the MRS result.
+        // findScratchNonCalleeSaveRegister() in AArch64FrameLowering might help
+        // us find such a scratch register though. If we failed to find a
+        // scratch register, we could emit a stream of add instructions to build
+        // up the immediate. Or, we could try to insert a AArch64::MOVi32imm
+        // before register allocation so that we didn't need to scavenge for a
+        // scratch register.
+        report_fatal_error("Unable to encode Stack Protector Guard Offset");
       }
-      // ldr xN, [xN]
-      BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
-          .addDef(Reg)
-          .addUse(Reg, RegState::Kill)
-          .addImm(0);
-    } else {
-      // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
-      // than 23760.
-      // It might be nice to use AArch64::MOVi32imm here, which would get
-      // expanded in PreSched2 after PostRA, but our lone scratch Reg already
-      // contains the MRS result. findScratchNonCalleeSaveRegister() in
-      // AArch64FrameLowering might help us find such a scratch register
-      // though. If we failed to find a scratch register, we could emit a
-      // stream of add instructions to build up the immediate. Or, we could try
-      // to insert a AArch64::MOVi32imm before register allocation so that we
-      // didn't need to scavenge for a scratch register.
-      report_fatal_error("Unable to encode Stack Protector Guard Offset");
+      MBB.erase(MI);
+      return true;
     }
-    MBB.erase(MI);
-    return true;
-  }
 
-  const GlobalValue *GV =
-      cast<GlobalValue>((*MI.memoperands_begin())->getValue());
-  const TargetMachine &TM = MBB.getParent()->getTarget();
-  unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
-  const unsigned char MO_NC = AArch64II::MO_NC;
-
-  if ((OpFlags & AArch64II::MO_GOT) != 0) {
-    BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
-        .addGlobalAddress(GV, 0, OpFlags);
-    if (Subtarget.isTargetILP32()) {
-      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
-      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
-          .addDef(Reg32, RegState::Dead)
-          .addUse(Reg, RegState::Kill)
-          .addImm(0)
-          .addMemOperand(*MI.memoperands_begin())
-          .addDef(Reg, RegState::Implicit);
-    } else {
+    const GlobalValue *GV =
+        cast<GlobalValue>((*MI.memoperands_begin())->getValue());
+    const TargetMachine &TM = MBB.getParent()->getTarget();
+    unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
+    const unsigned char MO_NC = AArch64II::MO_NC;
+
+    if ((OpFlags & AArch64II::MO_GOT) != 0) {
+      BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
+          .addGlobalAddress(GV, 0, OpFlags);
+      if (Subtarget.isTargetILP32()) {
+        unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+        BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+            .addDef(Reg32, RegState::Dead)
+            .addUse(Reg, RegState::Kill)
+            .addImm(0)
+            .addMemOperand(*MI.memoperands_begin())
+            .addDef(Reg, RegState::Implicit);
+      } else {
+        BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+            .addReg(Reg, RegState::Kill)
+            .addImm(0)
+            .addMemOperand(*MI.memoperands_begin());
+      }
+    } else if (TM.getCodeModel() == CodeModel::Large) {
+      assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
+      BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
+          .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
+          .addImm(0);
+      BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+          .addReg(Reg, RegState::Kill)
+          .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
+          .addImm(16);
+      BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+          .addReg(Reg, RegState::Kill)
+          .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
+          .addImm(32);
+      BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+          .addReg(Reg, RegState::Kill)
+          .addGlobalAddress(GV, 0, AArch64II::MO_G3)
+          .addImm(48);
       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
           .addReg(Reg, RegState::Kill)
           .addImm(0)
           .addMemOperand(*MI.memoperands_begin());
-    }
-  } else if (TM.getCodeModel() == CodeModel::Large) {
-    assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
-    BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
-        .addImm(0);
-    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
-        .addReg(Reg, RegState::Kill)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
-        .addImm(16);
-    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
-        .addReg(Reg, RegState::Kill)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
-        .addImm(32);
-    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
-        .addReg(Reg, RegState::Kill)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G3)
-        .addImm(48);
-    BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
-        .addReg(Reg, RegState::Kill)
-        .addImm(0)
-        .addMemOperand(*MI.memoperands_begin());
-  } else if (TM.getCodeModel() == CodeModel::Tiny) {
-    BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
-        .addGlobalAddress(GV, 0, OpFlags);
-  } else {
-    BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
-        .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
-    unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
-    if (Subtarget.isTargetILP32()) {
-      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
-      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
-          .addDef(Reg32, RegState::Dead)
-          .addUse(Reg, RegState::Kill)
-          .addGlobalAddress(GV, 0, LoFlags)
-          .addMemOperand(*MI.memoperands_begin())
-          .addDef(Reg, RegState::Implicit);
+    } else if (TM.getCodeModel() == CodeModel::Tiny) {
+      BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
+          .addGlobalAddress(GV, 0, OpFlags);
     } else {
-      BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
-          .addReg(Reg, RegState::Kill)
-          .addGlobalAddress(GV, 0, LoFlags)
-          .addMemOperand(*MI.memoperands_begin());
+      BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
+          .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
+      unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
+      if (Subtarget.isTargetILP32()) {
+        unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+        BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+            .addDef(Reg32, RegState::Dead)
+            .addUse(Reg, RegState::Kill)
+            .addGlobalAddress(GV, 0, LoFlags)
+            .addMemOperand(*MI.memoperands_begin())
+            .addDef(Reg, RegState::Implicit);
+      } else {
+        BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+            .addReg(Reg, RegState::Kill)
+            .addGlobalAddress(GV, 0, LoFlags)
+            .addMemOperand(*MI.memoperands_begin());
+      }
     }
-  }
 
-  MBB.erase(MI);
+    MBB.erase(MI);
 
-  return true;
-}
+    return true;
+  }
 
-// Return true if this instruction simply sets its single destination register
-// to zero. This is equivalent to a register rename of the zero-register.
-bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::MOVZWi:
-  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
-    if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
-      assert(MI.getDesc().getNumOperands() == 3 &&
-             MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
-      return true;
+  // Return true if this instruction simply sets its single destination register
+  // to zero. This is equivalent to a register rename of the zero-register.
+  bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case AArch64::MOVZWi:
+    case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
+      if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
+        assert(MI.getDesc().getNumOperands() == 3 &&
+               MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
+        return true;
+      }
+      break;
+    case AArch64::ANDWri: // and Rd, Rzr, #imm
+      return MI.getOperand(1).getReg() == AArch64::WZR;
+    case AArch64::ANDXri:
+      return MI.getOperand(1).getReg() == AArch64::XZR;
+    case TargetOpcode::COPY:
+      return MI.getOperand(1).getReg() == AArch64::WZR;
     }
-    break;
-  case AArch64::ANDWri: // and Rd, Rzr, #imm
-    return MI.getOperand(1).getReg() == AArch64::WZR;
-  case AArch64::ANDXri:
-    return MI.getOperand(1).getReg() == AArch64::XZR;
-  case TargetOpcode::COPY:
-    return MI.getOperand(1).getReg() == AArch64::WZR;
+    return false;
   }
-  return false;
-}
 
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case TargetOpcode::COPY: {
-    // GPR32 copies will by lowered to ORRXrs
-    Register DstReg = MI.getOperand(0).getReg();
-    return (AArch64::GPR32RegClass.contains(DstReg) ||
-            AArch64::GPR64RegClass.contains(DstReg));
-  }
-  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
-    if (MI.getOperand(1).getReg() == AArch64::XZR) {
-      assert(MI.getDesc().getNumOperands() == 4 &&
-             MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
-      return true;
+  // Return true if this instruction simply renames a general register without
+  // modifying bits.
+  bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case TargetOpcode::COPY: {
+      // GPR32 copies will by lowered to ORRXrs
+      Register DstReg = MI.getOperand(0).getReg();
+      return (AArch64::GPR32RegClass.contains(DstReg) ||
+              AArch64::GPR64RegClass.contains(DstReg));
     }
-    break;
-  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
-    if (MI.getOperand(2).getImm() == 0) {
-      assert(MI.getDesc().getNumOperands() == 4 &&
-             MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
-      return true;
+    case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
+      if (MI.getOperand(1).getReg() == AArch64::XZR) {
+        assert(MI.getDesc().getNumOperands() == 4 &&
+               MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
+        return true;
+      }
+      break;
+    case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
+      if (MI.getOperand(2).getImm() == 0) {
+        assert(MI.getDesc().getNumOperands() == 4 &&
+               MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
+        return true;
+      }
+      break;
     }
-    break;
+    return false;
   }
-  return false;
-}
 
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case TargetOpcode::COPY: {
-    Register DstReg = MI.getOperand(0).getReg();
-    return AArch64::FPR128RegClass.contains(DstReg);
-  }
-  case AArch64::ORRv16i8:
-    if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
-      assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
-             "invalid ORRv16i8 operands");
-      return true;
+  // Return true if this instruction simply renames a general register without
+  // modifying bits.
+  bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case TargetOpcode::COPY: {
+      Register DstReg = MI.getOperand(0).getReg();
+      return AArch64::FPR128RegClass.contains(DstReg);
     }
-    break;
+    case AArch64::ORRv16i8:
+      if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+        assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
+               "invalid ORRv16i8 operands");
+        return true;
+      }
+      break;
+    }
+    return false;
   }
-  return false;
-}
 
-Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
-                                               int &FrameIndex) const {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::LDRWui:
-  case AArch64::LDRXui:
-  case AArch64::LDRBui:
-  case AArch64::LDRHui:
-  case AArch64::LDRSui:
-  case AArch64::LDRDui:
-  case AArch64::LDRQui:
-  case AArch64::LDR_PXI:
-    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
-        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
-      FrameIndex = MI.getOperand(1).getIndex();
-      return MI.getOperand(0).getReg();
+  Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+                                                 int &FrameIndex) const {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case AArch64::LDRWui:
+    case AArch64::LDRXui:
+    case AArch64::LDRBui:
+    case AArch64::LDRHui:
+    case AArch64::LDRSui:
+    case AArch64::LDRDui:
+    case AArch64::LDRQui:
+    case AArch64::LDR_PXI:
+      if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
+          MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
+        FrameIndex = MI.getOperand(1).getIndex();
+        return MI.getOperand(0).getReg();
+      }
+      break;
     }
-    break;
-  }
 
-  return 0;
-}
+    return 0;
+  }
 
-Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
-                                              int &FrameIndex) const {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::STRWui:
-  case AArch64::STRXui:
-  case AArch64::STRBui:
-  case AArch64::STRHui:
-  case AArch64::STRSui:
-  case AArch64::STRDui:
-  case AArch64::STRQui:
-  case AArch64::STR_PXI:
-    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
-        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
-      FrameIndex = MI.getOperand(1).getIndex();
-      return MI.getOperand(0).getReg();
+  Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+                                                int &FrameIndex) const {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case AArch64::STRWui:
+    case AArch64::STRXui:
+    case AArch64::STRBui:
+    case AArch64::STRHui:
+    case AArch64::STRSui:
+    case AArch64::STRDui:
+    case AArch64::STRQui:
+    case AArch64::STR_PXI:
+      if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
+          MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
+        FrameIndex = MI.getOperand(1).getIndex();
+        return MI.getOperand(0).getReg();
+      }
+      break;
     }
-    break;
+    return 0;
   }
-  return 0;
-}
-
-/// Check all MachineMemOperands for a hint to suppress pairing.
-bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
-  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
-    return MMO->getFlags() & MOSuppressPair;
-  });
-}
-
-/// Set a flag on the first MachineMemOperand to suppress pairing.
-void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
-  if (MI.memoperands_empty())
-    return;
-  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
-}
-
-/// Check all MachineMemOperands for a hint that the load/store is strided.
-bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
-  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
-    return MMO->getFlags() & MOStridedAccess;
-  });
-}
 
-bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
-  switch (Opc) {
-  default:
-    return false;
-  case AArch64::STURSi:
-  case AArch64::STRSpre:
-  case AArch64::STURDi:
-  case AArch64::STRDpre:
-  case AArch64::STURQi:
-  case AArch64::STRQpre:
-  case AArch64::STURBBi:
-  case AArch64::STURHHi:
-  case AArch64::STURWi:
-  case AArch64::STRWpre:
-  case AArch64::STURXi:
-  case AArch64::STRXpre:
-  case AArch64::LDURSi:
-  case AArch64::LDRSpre:
-  case AArch64::LDURDi:
-  case AArch64::LDRDpre:
-  case AArch64::LDURQi:
-  case AArch64::LDRQpre:
-  case AArch64::LDURWi:
-  case AArch64::LDRWpre:
-  case AArch64::LDURXi:
-  case AArch64::LDRXpre:
-  case AArch64::LDRSWpre:
-  case AArch64::LDURSWi:
-  case AArch64::LDURHHi:
-  case AArch64::LDURBBi:
-  case AArch64::LDURSBWi:
-  case AArch64::LDURSHWi:
-    return true;
+  /// Check all MachineMemOperands for a hint to suppress pairing.
+  bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
+    return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+      return MMO->getFlags() & MOSuppressPair;
+    });
   }
-}
 
-std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
-  switch (Opc) {
-  default: return {};
-  case AArch64::PRFMui: return AArch64::PRFUMi;
-  case AArch64::LDRXui: return AArch64::LDURXi;
-  case AArch64::LDRWui: return AArch64::LDURWi;
-  case AArch64::LDRBui: return AArch64::LDURBi;
-  case AArch64::LDRHui: return AArch64::LDURHi;
-  case AArch64::LDRSui: return AArch64::LDURSi;
-  case AArch64::LDRDui: return AArch64::LDURDi;
-  case AArch64::LDRQui: return AArch64::LDURQi;
-  case AArch64::LDRBBui: return AArch64::LDURBBi;
-  case AArch64::LDRHHui: return AArch64::LDURHHi;
-  case AArch64::LDRSBXui: return AArch64::LDURSBXi;
-  case AArch64::LDRSBWui: return AArch64::LDURSBWi;
-  case AArch64::LDRSHXui: return AArch64::LDURSHXi;
-  case AArch64::LDRSHWui: return AArch64::LDURSHWi;
-  case AArch64::LDRSWui: return AArch64::LDURSWi;
-  case AArch64::STRXui: return AArch64::STURXi;
-  case AArch64::STRWui: return AArch64::STURWi;
-  case AArch64::STRBui: return AArch64::STURBi;
-  case AArch64::STRHui: return AArch64::STURHi;
-  case AArch64::STRSui: return AArch64::STURSi;
-  case AArch64::STRDui: return AArch64::STURDi;
-  case AArch64::STRQui: return AArch64::STURQi;
-  case AArch64::STRBBui: return AArch64::STURBBi;
-  case AArch64::STRHHui: return AArch64::STURHHi;
-  }
-}
-
-unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
-  case AArch64::ADDG:
-  case AArch64::LDAPURBi:
-  case AArch64::LDAPURHi:
-  case AArch64::LDAPURi:
-  case AArch64::LDAPURSBWi:
-  case AArch64::LDAPURSBXi:
-  case AArch64::LDAPURSHWi:
-  case AArch64::LDAPURSHXi:
-  case AArch64::LDAPURSWi:
-  case AArch64::LDAPURXi:
-  case AArch64::LDR_PPXI:
-  case AArch64::LDR_PXI:
-  case AArch64::LDR_ZXI:
-  case AArch64::LDR_ZZXI:
-  case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::LDR_ZZZXI:
-  case AArch64::LDR_ZZZZXI:
-  case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::LDRBBui:
-  case AArch64::LDRBui:
-  case AArch64::LDRDui:
-  case AArch64::LDRHHui:
-  case AArch64::LDRHui:
-  case AArch64::LDRQui:
-  case AArch64::LDRSBWui:
-  case AArch64::LDRSBXui:
-  case AArch64::LDRSHWui:
-  case AArch64::LDRSHXui:
-  case AArch64::LDRSui:
-  case AArch64::LDRSWui:
-  case AArch64::LDRWui:
-  case AArch64::LDRXui:
-  case AArch64::LDURBBi:
-  case AArch64::LDURBi:
-  case AArch64::LDURDi:
-  case AArch64::LDURHHi:
-  case AArch64::LDURHi:
-  case AArch64::LDURQi:
-  case AArch64::LDURSBWi:
-  case AArch64::LDURSBXi:
-  case AArch64::LDURSHWi:
-  case AArch64::LDURSHXi:
-  case AArch64::LDURSi:
-  case AArch64::LDURSWi:
-  case AArch64::LDURWi:
-  case AArch64::LDURXi:
-  case AArch64::PRFMui:
-  case AArch64::PRFUMi:
-  case AArch64::ST2Gi:
-  case AArch64::STGi:
-  case AArch64::STLURBi:
-  case AArch64::STLURHi:
-  case AArch64::STLURWi:
-  case AArch64::STLURXi:
-  case AArch64::StoreSwiftAsyncContext:
-  case AArch64::STR_PPXI:
-  case AArch64::STR_PXI:
-  case AArch64::STR_ZXI:
-  case AArch64::STR_ZZXI:
-  case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::STR_ZZZXI:
-  case AArch64::STR_ZZZZXI:
-  case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::STRBBui:
-  case AArch64::STRBui:
-  case AArch64::STRDui:
-  case AArch64::STRHHui:
-  case AArch64::STRHui:
-  case AArch64::STRQui:
-  case AArch64::STRSui:
-  case AArch64::STRWui:
-  case AArch64::STRXui:
-  case AArch64::STURBBi:
-  case AArch64::STURBi:
-  case AArch64::STURDi:
-  case AArch64::STURHHi:
-  case AArch64::STURHi:
-  case AArch64::STURQi:
-  case AArch64::STURSi:
-  case AArch64::STURWi:
-  case AArch64::STURXi:
-  case AArch64::STZ2Gi:
-  case AArch64::STZGi:
-  case AArch64::TAGPstack:
-  case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
-  case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
-    return 2;
-  case AArch64::LD1B_D_IMM:
-  case AArch64::LD1B_H_IMM:
-  case AArch64::LD1B_IMM:
-  case AArch64::LD1B_S_IMM:
-  case AArch64::LD1D_IMM:
-  case AArch64::LD1H_D_IMM:
-  case AArch64::LD1H_IMM:
-  case AArch64::LD1H_S_IMM:
-  case AArch64::LD1RB_D_IMM:
-  case AArch64::LD1RB_H_IMM:
-  case AArch64::LD1RB_IMM:
-  case AArch64::LD1RB_S_IMM:
-  case AArch64::LD1RD_IMM:
-  case AArch64::LD1RH_D_IMM:
-  case AArch64::LD1RH_IMM:
-  case AArch64::LD1RH_S_IMM:
-  case AArch64::LD1RSB_D_IMM:
-  case AArch64::LD1RSB_H_IMM:
-  case AArch64::LD1RSB_S_IMM:
-  case AArch64::LD1RSH_D_IMM:
-  case AArch64::LD1RSH_S_IMM:
-  case AArch64::LD1RSW_IMM:
-  case AArch64::LD1RW_D_IMM:
-  case AArch64::LD1RW_IMM:
-  case AArch64::LD1SB_D_IMM:
-  case AArch64::LD1SB_H_IMM:
-  case AArch64::LD1SB_S_IMM:
-  case AArch64::LD1SH_D_IMM:
-  case AArch64::LD1SH_S_IMM:
-  case AArch64::LD1SW_D_IMM:
-  case AArch64::LD1W_D_IMM:
-  case AArch64::LD1W_IMM:
-  case AArch64::LD2B_IMM:
-  case AArch64::LD2D_IMM:
-  case AArch64::LD2H_IMM:
-  case AArch64::LD2W_IMM:
-  case AArch64::LD3B_IMM:
-  case AArch64::LD3D_IMM:
-  case AArch64::LD3H_IMM:
-  case AArch64::LD3W_IMM:
-  case AArch64::LD4B_IMM:
-  case AArch64::LD4D_IMM:
-  case AArch64::LD4H_IMM:
-  case AArch64::LD4W_IMM:
-  case AArch64::LDG:
-  case AArch64::LDNF1B_D_IMM:
-  case AArch64::LDNF1B_H_IMM:
-  case AArch64::LDNF1B_IMM:
-  case AArch64::LDNF1B_S_IMM:
-  case AArch64::LDNF1D_IMM:
-  case AArch64::LDNF1H_D_IMM:
-  case AArch64::LDNF1H_IMM:
-  case AArch64::LDNF1H_S_IMM:
-  case AArch64::LDNF1SB_D_IMM:
-  case AArch64::LDNF1SB_H_IMM:
-  case AArch64::LDNF1SB_S_IMM:
-  case AArch64::LDNF1SH_D_IMM:
-  case AArch64::LDNF1SH_S_IMM:
-  case AArch64::LDNF1SW_D_IMM:
-  case AArch64::LDNF1W_D_IMM:
-  case AArch64::LDNF1W_IMM:
-  case AArch64::LDNPDi:
-  case AArch64::LDNPQi:
-  case AArch64::LDNPSi:
-  case AArch64::LDNPWi:
-  case AArch64::LDNPXi:
-  case AArch64::LDNT1B_ZRI:
-  case AArch64::LDNT1D_ZRI:
-  case AArch64::LDNT1H_ZRI:
-  case AArch64::LDNT1W_ZRI:
-  case AArch64::LDPDi:
-  case AArch64::LDPQi:
-  case AArch64::LDPSi:
-  case AArch64::LDPWi:
-  case AArch64::LDPXi:
-  case AArch64::LDRBBpost:
-  case AArch64::LDRBBpre:
-  case AArch64::LDRBpost:
-  case AArch64::LDRBpre:
-  case AArch64::LDRDpost:
-  case AArch64::LDRDpre:
-  case AArch64::LDRHHpost:
-  case AArch64::LDRHHpre:
-  case AArch64::LDRHpost:
-  case AArch64::LDRHpre:
-  case AArch64::LDRQpost:
-  case AArch64::LDRQpre:
-  case AArch64::LDRSpost:
-  case AArch64::LDRSpre:
-  case AArch64::LDRWpost:
-  case AArch64::LDRWpre:
-  case AArch64::LDRXpost:
-  case AArch64::LDRXpre:
-  case AArch64::ST1B_D_IMM:
-  case AArch64::ST1B_H_IMM:
-  case AArch64::ST1B_IMM:
-  case AArch64::ST1B_S_IMM:
-  case AArch64::ST1D_IMM:
-  case AArch64::ST1H_D_IMM:
-  case AArch64::ST1H_IMM:
-  case AArch64::ST1H_S_IMM:
-  case AArch64::ST1W_D_IMM:
-  case AArch64::ST1W_IMM:
-  case AArch64::ST2B_IMM:
-  case AArch64::ST2D_IMM:
-  case AArch64::ST2H_IMM:
-  case AArch64::ST2W_IMM:
-  case AArch64::ST3B_IMM:
-  case AArch64::ST3D_IMM:
-  case AArch64::ST3H_IMM:
-  case AArch64::ST3W_IMM:
-  case AArch64::ST4B_IMM:
-  case AArch64::ST4D_IMM:
-  case AArch64::ST4H_IMM:
-  case AArch64::ST4W_IMM:
-  case AArch64::STGPi:
-  case AArch64::STGPreIndex:
-  case AArch64::STZGPreIndex:
-  case AArch64::ST2GPreIndex:
-  case AArch64::STZ2GPreIndex:
-  case AArch64::STGPostIndex:
-  case AArch64::STZGPostIndex:
-  case AArch64::ST2GPostIndex:
-  case AArch64::STZ2GPostIndex:
-  case AArch64::STNPDi:
-  case AArch64::STNPQi:
-  case AArch64::STNPSi:
-  case AArch64::STNPWi:
-  case AArch64::STNPXi:
-  case AArch64::STNT1B_ZRI:
-  case AArch64::STNT1D_ZRI:
-  case AArch64::STNT1H_ZRI:
-  case AArch64::STNT1W_ZRI:
-  case AArch64::STPDi:
-  case AArch64::STPQi:
-  case AArch64::STPSi:
-  case AArch64::STPWi:
-  case AArch64::STPXi:
-  case AArch64::STRBBpost:
-  case AArch64::STRBBpre:
-  case AArch64::STRBpost:
-  case AArch64::STRBpre:
-  case AArch64::STRDpost:
-  case AArch64::STRDpre:
-  case AArch64::STRHHpost:
-  case AArch64::STRHHpre:
-  case AArch64::STRHpost:
-  case AArch64::STRHpre:
-  case AArch64::STRQpost:
-  case AArch64::STRQpre:
-  case AArch64::STRSpost:
-  case AArch64::STRSpre:
-  case AArch64::STRWpost:
-  case AArch64::STRWpre:
-  case AArch64::STRXpost:
-  case AArch64::STRXpre:
-    return 3;
-  case AArch64::LDPDpost:
-  case AArch64::LDPDpre:
-  case AArch64::LDPQpost:
-  case AArch64::LDPQpre:
-  case AArch64::LDPSpost:
-  case AArch64::LDPSpre:
-  case AArch64::LDPWpost:
-  case AArch64::LDPWpre:
-  case AArch64::LDPXpost:
-  case AArch64::LDPXpre:
-  case AArch64::STGPpre:
-  case AArch64::STGPpost:
-  case AArch64::STPDpost:
-  case AArch64::STPDpre:
-  case AArch64::STPQpost:
-  case AArch64::STPQpre:
-  case AArch64::STPSpost:
-  case AArch64::STPSpre:
-  case AArch64::STPWpost:
-  case AArch64::STPWpre:
-  case AArch64::STPXpost:
-  case AArch64::STPXpre:
-    return 4;
-  }
-}
-
-bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  // Scaled instructions.
-  case AArch64::STRSui:
-  case AArch64::STRDui:
-  case AArch64::STRQui:
-  case AArch64::STRXui:
-  case AArch64::STRWui:
-  case AArch64::LDRSui:
-  case AArch64::LDRDui:
-  case AArch64::LDRQui:
-  case AArch64::LDRXui:
-  case AArch64::LDRWui:
-  case AArch64::LDRSWui:
-  // Unscaled instructions.
-  case AArch64::STURSi:
-  case AArch64::STRSpre:
-  case AArch64::STURDi:
-  case AArch64::STRDpre:
-  case AArch64::STURQi:
-  case AArch64::STRQpre:
-  case AArch64::STURWi:
-  case AArch64::STRWpre:
-  case AArch64::STURXi:
-  case AArch64::STRXpre:
-  case AArch64::LDURSi:
-  case AArch64::LDRSpre:
-  case AArch64::LDURDi:
-  case AArch64::LDRDpre:
-  case AArch64::LDURQi:
-  case AArch64::LDRQpre:
-  case AArch64::LDURWi:
-  case AArch64::LDRWpre:
-  case AArch64::LDURXi:
-  case AArch64::LDRXpre:
-  case AArch64::LDURSWi:
-  case AArch64::LDRSWpre:
-  // SVE instructions.
-  case AArch64::LDR_ZXI:
-  case AArch64::STR_ZXI:
-    return true;
+  /// Set a flag on the first MachineMemOperand to suppress pairing.
+  void AArch64InstrInfo::suppressLdStPair(MachineInstr & MI) {
+    if (MI.memoperands_empty())
+      return;
+    (*MI.memoperands_begin())->setFlags(MOSuppressPair);
   }
-}
 
-bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    assert((!MI.isCall() || !MI.isReturn()) &&
-           "Unexpected instruction - was a new tail call opcode introduced?");
-    return false;
-  case AArch64::TCRETURNdi:
-  case AArch64::TCRETURNri:
-  case AArch64::TCRETURNrix16x17:
-  case AArch64::TCRETURNrix17:
-  case AArch64::TCRETURNrinotx16:
-  case AArch64::TCRETURNriALL:
-  case AArch64::AUTH_TCRETURN:
-  case AArch64::AUTH_TCRETURN_BTI:
-    return true;
+  /// Check all MachineMemOperands for a hint that the load/store is strided.
+  bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
+    return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+      return MMO->getFlags() & MOStridedAccess;
+    });
   }
-}
 
-unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Opcode has no flag setting equivalent!");
-  // 32-bit cases:
-  case AArch64::ADDWri:
-    return AArch64::ADDSWri;
-  case AArch64::ADDWrr:
-    return AArch64::ADDSWrr;
-  case AArch64::ADDWrs:
-    return AArch64::ADDSWrs;
-  case AArch64::ADDWrx:
-    return AArch64::ADDSWrx;
-  case AArch64::ANDWri:
-    return AArch64::ANDSWri;
-  case AArch64::ANDWrr:
-    return AArch64::ANDSWrr;
-  case AArch64::ANDWrs:
-    return AArch64::ANDSWrs;
-  case AArch64::BICWrr:
-    return AArch64::BICSWrr;
-  case AArch64::BICWrs:
-    return AArch64::BICSWrs;
-  case AArch64::SUBWri:
-    return AArch64::SUBSWri;
-  case AArch64::SUBWrr:
-    return AArch64::SUBSWrr;
-  case AArch64::SUBWrs:
-    return AArch64::SUBSWrs;
-  case AArch64::SUBWrx:
-    return AArch64::SUBSWrx;
-  // 64-bit cases:
-  case AArch64::ADDXri:
-    return AArch64::ADDSXri;
-  case AArch64::ADDXrr:
-    return AArch64::ADDSXrr;
-  case AArch64::ADDXrs:
-    return AArch64::ADDSXrs;
-  case AArch64::ADDXrx:
-    return AArch64::ADDSXrx;
-  case AArch64::ANDXri:
-    return AArch64::ANDSXri;
-  case AArch64::ANDXrr:
-    return AArch64::ANDSXrr;
-  case AArch64::ANDXrs:
-    return AArch64::ANDSXrs;
-  case AArch64::BICXrr:
-    return AArch64::BICSXrr;
-  case AArch64::BICXrs:
-    return AArch64::BICSXrs;
-  case AArch64::SUBXri:
-    return AArch64::SUBSXri;
-  case AArch64::SUBXrr:
-    return AArch64::SUBSXrr;
-  case AArch64::SUBXrs:
-    return AArch64::SUBSXrs;
-  case AArch64::SUBXrx:
-    return AArch64::SUBSXrx;
-  // SVE instructions:
-  case AArch64::AND_PPzPP:
-    return AArch64::ANDS_PPzPP;
-  case AArch64::BIC_PPzPP:
-    return AArch64::BICS_PPzPP;
-  case AArch64::EOR_PPzPP:
-    return AArch64::EORS_PPzPP;
-  case AArch64::NAND_PPzPP:
-    return AArch64::NANDS_PPzPP;
-  case AArch64::NOR_PPzPP:
-    return AArch64::NORS_PPzPP;
-  case AArch64::ORN_PPzPP:
-    return AArch64::ORNS_PPzPP;
-  case AArch64::ORR_PPzPP:
-    return AArch64::ORRS_PPzPP;
-  case AArch64::BRKA_PPzP:
-    return AArch64::BRKAS_PPzP;
-  case AArch64::BRKPA_PPzPP:
-    return AArch64::BRKPAS_PPzPP;
-  case AArch64::BRKB_PPzP:
-    return AArch64::BRKBS_PPzP;
-  case AArch64::BRKPB_PPzPP:
-    return AArch64::BRKPBS_PPzPP;
-  case AArch64::BRKN_PPzP:
-    return AArch64::BRKNS_PPzP;
-  case AArch64::RDFFR_PPz:
-    return AArch64::RDFFRS_PPz;
-  case AArch64::PTRUE_B:
-    return AArch64::PTRUES_B;
+  bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
+    switch (Opc) {
+    default:
+      return false;
+    case AArch64::STURSi:
+    case AArch64::STRSpre:
+    case AArch64::STURDi:
+    case AArch64::STRDpre:
+    case AArch64::STURQi:
+    case AArch64::STRQpre:
+    case AArch64::STURBBi:
+    case AArch64::STURHHi:
+    case AArch64::STURWi:
+    case AArch64::STRWpre:
+    case AArch64::STURXi:
+    case AArch64::STRXpre:
+    case AArch64::LDURSi:
+    case AArch64::LDRSpre:
+    case AArch64::LDURDi:
+    case AArch64::LDRDpre:
+    case AArch64::LDURQi:
+    case AArch64::LDRQpre:
+    case AArch64::LDURWi:
+    case AArch64::LDRWpre:
+    case AArch64::LDURXi:
+    case AArch64::LDRXpre:
+    case AArch64::LDRSWpre:
+    case AArch64::LDURSWi:
+    case AArch64::LDURHHi:
+    case AArch64::LDURBBi:
+    case AArch64::LDURSBWi:
+    case AArch64::LDURSHWi:
+      return true;
+    }
   }
-}
 
-// Is this a candidate for ld/st merging or pairing?  For example, we don't
-// touch volatiles or load/stores that have a hint to avoid pair formation.
-bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
+  std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
+    switch (Opc) {
+    default:
+      return {};
+    case AArch64::PRFMui:
+      return AArch64::PRFUMi;
+    case AArch64::LDRXui:
+      return AArch64::LDURXi;
+    case AArch64::LDRWui:
+      return AArch64::LDURWi;
+    case AArch64::LDRBui:
+      return AArch64::LDURBi;
+    case AArch64::LDRHui:
+      return AArch64::LDURHi;
+    case AArch64::LDRSui:
+      return AArch64::LDURSi;
+    case AArch64::LDRDui:
+      return AArch64::LDURDi;
+    case AArch64::LDRQui:
+      return AArch64::LDURQi;
+    case AArch64::LDRBBui:
+      return AArch64::LDURBBi;
+    case AArch64::LDRHHui:
+      return AArch64::LDURHHi;
+    case AArch64::LDRSBXui:
+      return AArch64::LDURSBXi;
+    case AArch64::LDRSBWui:
+      return AArch64::LDURSBWi;
+    case AArch64::LDRSHXui:
+      return AArch64::LDURSHXi;
+    case AArch64::LDRSHWui:
+      return AArch64::LDURSHWi;
+    case AArch64::LDRSWui:
+      return AArch64::LDURSWi;
+    case AArch64::STRXui:
+      return AArch64::STURXi;
+    case AArch64::STRWui:
+      return AArch64::STURWi;
+    case AArch64::STRBui:
+      return AArch64::STURBi;
+    case AArch64::STRHui:
+      return AArch64::STURHi;
+    case AArch64::STRSui:
+      return AArch64::STURSi;
+    case AArch64::STRDui:
+      return AArch64::STURDi;
+    case AArch64::STRQui:
+      return AArch64::STURQi;
+    case AArch64::STRBBui:
+      return AArch64::STURBBi;
+    case AArch64::STRHHui:
+      return AArch64::STURHHi;
+    }
+  }
 
-  bool IsPreLdSt = isPreLdSt(MI);
+  unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
+    switch (Opc) {
+    default:
+      llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
+    case AArch64::ADDG:
+    case AArch64::LDAPURBi:
+    case AArch64::LDAPURHi:
+    case AArch64::LDAPURi:
+    case AArch64::LDAPURSBWi:
+    case AArch64::LDAPURSBXi:
+    case AArch64::LDAPURSHWi:
+    case AArch64::LDAPURSHXi:
+    case AArch64::LDAPURSWi:
+    case AArch64::LDAPURXi:
+    case AArch64::LDR_PPXI:
+    case AArch64::LDR_PXI:
+    case AArch64::LDR_ZXI:
+    case AArch64::LDR_ZZXI:
+    case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::LDR_ZZZXI:
+    case AArch64::LDR_ZZZZXI:
+    case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::LDRBBui:
+    case AArch64::LDRBui:
+    case AArch64::LDRDui:
+    case AArch64::LDRHHui:
+    case AArch64::LDRHui:
+    case AArch64::LDRQui:
+    case AArch64::LDRSBWui:
+    case AArch64::LDRSBXui:
+    case AArch64::LDRSHWui:
+    case AArch64::LDRSHXui:
+    case AArch64::LDRSui:
+    case AArch64::LDRSWui:
+    case AArch64::LDRWui:
+    case AArch64::LDRXui:
+    case AArch64::LDURBBi:
+    case AArch64::LDURBi:
+    case AArch64::LDURDi:
+    case AArch64::LDURHHi:
+    case AArch64::LDURHi:
+    case AArch64::LDURQi:
+    case AArch64::LDURSBWi:
+    case AArch64::LDURSBXi:
+    case AArch64::LDURSHWi:
+    case AArch64::LDURSHXi:
+    case AArch64::LDURSi:
+    case AArch64::LDURSWi:
+    case AArch64::LDURWi:
+    case AArch64::LDURXi:
+    case AArch64::PRFMui:
+    case AArch64::PRFUMi:
+    case AArch64::ST2Gi:
+    case AArch64::STGi:
+    case AArch64::STLURBi:
+    case AArch64::STLURHi:
+    case AArch64::STLURWi:
+    case AArch64::STLURXi:
+    case AArch64::StoreSwiftAsyncContext:
+    case AArch64::STR_PPXI:
+    case AArch64::STR_PXI:
+    case AArch64::STR_ZXI:
+    case AArch64::STR_ZZXI:
+    case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::STR_ZZZXI:
+    case AArch64::STR_ZZZZXI:
+    case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::STRBBui:
+    case AArch64::STRBui:
+    case AArch64::STRDui:
+    case AArch64::STRHHui:
+    case AArch64::STRHui:
+    case AArch64::STRQui:
+    case AArch64::STRSui:
+    case AArch64::STRWui:
+    case AArch64::STRXui:
+    case AArch64::STURBBi:
+    case AArch64::STURBi:
+    case AArch64::STURDi:
+    case AArch64::STURHHi:
+    case AArch64::STURHi:
+    case AArch64::STURQi:
+    case AArch64::STURSi:
+    case AArch64::STURWi:
+    case AArch64::STURXi:
+    case AArch64::STZ2Gi:
+    case AArch64::STZGi:
+    case AArch64::TAGPstack:
+    case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+    case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+      return 2;
+    case AArch64::LD1B_D_IMM:
+    case AArch64::LD1B_H_IMM:
+    case AArch64::LD1B_IMM:
+    case AArch64::LD1B_S_IMM:
+    case AArch64::LD1D_IMM:
+    case AArch64::LD1H_D_IMM:
+    case AArch64::LD1H_IMM:
+    case AArch64::LD1H_S_IMM:
+    case AArch64::LD1RB_D_IMM:
+    case AArch64::LD1RB_H_IMM:
+    case AArch64::LD1RB_IMM:
+    case AArch64::LD1RB_S_IMM:
+    case AArch64::LD1RD_IMM:
+    case AArch64::LD1RH_D_IMM:
+    case AArch64::LD1RH_IMM:
+    case AArch64::LD1RH_S_IMM:
+    case AArch64::LD1RSB_D_IMM:
+    case AArch64::LD1RSB_H_IMM:
+    case AArch64::LD1RSB_S_IMM:
+    case AArch64::LD1RSH_D_IMM:
+    case AArch64::LD1RSH_S_IMM:
+    case AArch64::LD1RSW_IMM:
+    case AArch64::LD1RW_D_IMM:
+    case AArch64::LD1RW_IMM:
+    case AArch64::LD1SB_D_IMM:
+    case AArch64::LD1SB_H_IMM:
+    case AArch64::LD1SB_S_IMM:
+    case AArch64::LD1SH_D_IMM:
+    case AArch64::LD1SH_S_IMM:
+    case AArch64::LD1SW_D_IMM:
+    case AArch64::LD1W_D_IMM:
+    case AArch64::LD1W_IMM:
+    case AArch64::LD2B_IMM:
+    case AArch64::LD2D_IMM:
+    case AArch64::LD2H_IMM:
+    case AArch64::LD2W_IMM:
+    case AArch64::LD3B_IMM:
+    case AArch64::LD3D_IMM:
+    case AArch64::LD3H_IMM:
+    case AArch64::LD3W_IMM:
+    case AArch64::LD4B_IMM:
+    case AArch64::LD4D_IMM:
+    case AArch64::LD4H_IMM:
+    case AArch64::LD4W_IMM:
+    case AArch64::LDG:
+    case AArch64::LDNF1B_D_IMM:
+    case AArch64::LDNF1B_H_IMM:
+    case AArch64::LDNF1B_IMM:
+    case AArch64::LDNF1B_S_IMM:
+    case AArch64::LDNF1D_IMM:
+    case AArch64::LDNF1H_D_IMM:
+    case AArch64::LDNF1H_IMM:
+    case AArch64::LDNF1H_S_IMM:
+    case AArch64::LDNF1SB_D_IMM:
+    case AArch64::LDNF1SB_H_IMM:
+    case AArch64::LDNF1SB_S_IMM:
+    case AArch64::LDNF1SH_D_IMM:
+    case AArch64::LDNF1SH_S_IMM:
+    case AArch64::LDNF1SW_D_IMM:
+    case AArch64::LDNF1W_D_IMM:
+    case AArch64::LDNF1W_IMM:
+    case AArch64::LDNPDi:
+    case AArch64::LDNPQi:
+    case AArch64::LDNPSi:
+    case AArch64::LDNPWi:
+    case AArch64::LDNPXi:
+    case AArch64::LDNT1B_ZRI:
+    case AArch64::LDNT1D_ZRI:
+    case AArch64::LDNT1H_ZRI:
+    case AArch64::LDNT1W_ZRI:
+    case AArch64::LDPDi:
+    case AArch64::LDPQi:
+    case AArch64::LDPSi:
+    case AArch64::LDPWi:
+    case AArch64::LDPXi:
+    case AArch64::LDRBBpost:
+    case AArch64::LDRBBpre:
+    case AArch64::LDRBpost:
+    case AArch64::LDRBpre:
+    case AArch64::LDRDpost:
+    case AArch64::LDRDpre:
+    case AArch64::LDRHHpost:
+    case AArch64::LDRHHpre:
+    case AArch64::LDRHpost:
+    case AArch64::LDRHpre:
+    case AArch64::LDRQpost:
+    case AArch64::LDRQpre:
+    case AArch64::LDRSpost:
+    case AArch64::LDRSpre:
+    case AArch64::LDRWpost:
+    case AArch64::LDRWpre:
+    case AArch64::LDRXpost:
+    case AArch64::LDRXpre:
+    case AArch64::ST1B_D_IMM:
+    case AArch64::ST1B_H_IMM:
+    case AArch64::ST1B_IMM:
+    case AArch64::ST1B_S_IMM:
+    case AArch64::ST1D_IMM:
+    case AArch64::ST1H_D_IMM:
+    case AArch64::ST1H_IMM:
+    case AArch64::ST1H_S_IMM:
+    case AArch64::ST1W_D_IMM:
+    case AArch64::ST1W_IMM:
+    case AArch64::ST2B_IMM:
+    case AArch64::ST2D_IMM:
+    case AArch64::ST2H_IMM:
+    case AArch64::ST2W_IMM:
+    case AArch64::ST3B_IMM:
+    case AArch64::ST3D_IMM:
+    case AArch64::ST3H_IMM:
+    case AArch64::ST3W_IMM:
+    case AArch64::ST4B_IMM:
+    case AArch64::ST4D_IMM:
+    case AArch64::ST4H_IMM:
+    case AArch64::ST4W_IMM:
+    case AArch64::STGPi:
+    case AArch64::STGPreIndex:
+    case AArch64::STZGPreIndex:
+    case AArch64::ST2GPreIndex:
+    case AArch64::STZ2GPreIndex:
+    case AArch64::STGPostIndex:
+    case AArch64::STZGPostIndex:
+    case AArch64::ST2GPostIndex:
+    case AArch64::STZ2GPostIndex:
+    case AArch64::STNPDi:
+    case AArch64::STNPQi:
+    case AArch64::STNPSi:
+    case AArch64::STNPWi:
+    case AArch64::STNPXi:
+    case AArch64::STNT1B_ZRI:
+    case AArch64::STNT1D_ZRI:
+    case AArch64::STNT1H_ZRI:
+    case AArch64::STNT1W_ZRI:
+    case AArch64::STPDi:
+    case AArch64::STPQi:
+    case AArch64::STPSi:
+    case AArch64::STPWi:
+    case AArch64::STPXi:
+    case AArch64::STRBBpost:
+    case AArch64::STRBBpre:
+    case AArch64::STRBpost:
+    case AArch64::STRBpre:
+    case AArch64::STRDpost:
+    case AArch64::STRDpre:
+    case AArch64::STRHHpost:
+    case AArch64::STRHHpre:
+    case AArch64::STRHpost:
+    case AArch64::STRHpre:
+    case AArch64::STRQpost:
+    case AArch64::STRQpre:
+    case AArch64::STRSpost:
+    case AArch64::STRSpre:
+    case AArch64::STRWpost:
+    case AArch64::STRWpre:
+    case AArch64::STRXpost:
+    case AArch64::STRXpre:
+      return 3;
+    case AArch64::LDPDpost:
+    case AArch64::LDPDpre:
+    case AArch64::LDPQpost:
+    case AArch64::LDPQpre:
+    case AArch64::LDPSpost:
+    case AArch64::LDPSpre:
+    case AArch64::LDPWpost:
+    case AArch64::LDPWpre:
+    case AArch64::LDPXpost:
+    case AArch64::LDPXpre:
+    case AArch64::STGPpre:
+    case AArch64::STGPpost:
+    case AArch64::STPDpost:
+    case AArch64::STPDpre:
+    case AArch64::STPQpost:
+    case AArch64::STPQpre:
+    case AArch64::STPSpost:
+    case AArch64::STPSpre:
+    case AArch64::STPWpost:
+    case AArch64::STPWpre:
+    case AArch64::STPXpost:
+    case AArch64::STPXpre:
+      return 4;
+    }
+  }
 
-  // If this is a volatile load/store, don't mess with it.
-  if (MI.hasOrderedMemoryRef())
-    return false;
+  bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      return false;
+    // Scaled instructions.
+    case AArch64::STRSui:
+    case AArch64::STRDui:
+    case AArch64::STRQui:
+    case AArch64::STRXui:
+    case AArch64::STRWui:
+    case AArch64::LDRSui:
+    case AArch64::LDRDui:
+    case AArch64::LDRQui:
+    case AArch64::LDRXui:
+    case AArch64::LDRWui:
+    case AArch64::LDRSWui:
+    // Unscaled instructions.
+    case AArch64::STURSi:
+    case AArch64::STRSpre:
+    case AArch64::STURDi:
+    case AArch64::STRDpre:
+    case AArch64::STURQi:
+    case AArch64::STRQpre:
+    case AArch64::STURWi:
+    case AArch64::STRWpre:
+    case AArch64::STURXi:
+    case AArch64::STRXpre:
+    case AArch64::LDURSi:
+    case AArch64::LDRSpre:
+    case AArch64::LDURDi:
+    case AArch64::LDRDpre:
+    case AArch64::LDURQi:
+    case AArch64::LDRQpre:
+    case AArch64::LDURWi:
+    case AArch64::LDRWpre:
+    case AArch64::LDURXi:
+    case AArch64::LDRXpre:
+    case AArch64::LDURSWi:
+    case AArch64::LDRSWpre:
+    // SVE instructions.
+    case AArch64::LDR_ZXI:
+    case AArch64::STR_ZXI:
+      return true;
+    }
+  }
+
+  bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      assert((!MI.isCall() || !MI.isReturn()) &&
+             "Unexpected instruction - was a new tail call opcode introduced?");
+      return false;
+    case AArch64::TCRETURNdi:
+    case AArch64::TCRETURNri:
+    case AArch64::TCRETURNrix16x17:
+    case AArch64::TCRETURNrix17:
+    case AArch64::TCRETURNrinotx16:
+    case AArch64::TCRETURNriALL:
+    case AArch64::AUTH_TCRETURN:
+    case AArch64::AUTH_TCRETURN_BTI:
+      return true;
+    }
+  }
 
-  // Make sure this is a reg/fi+imm (as opposed to an address reloc).
-  // For Pre-inc LD/ST, the operand is shifted by one.
-  assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
-          MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
-         "Expected a reg or frame index operand.");
+  unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
+    switch (Opc) {
+    default:
+      llvm_unreachable("Opcode has no flag setting equivalent!");
+    // 32-bit cases:
+    case AArch64::ADDWri:
+      return AArch64::ADDSWri;
+    case AArch64::ADDWrr:
+      return AArch64::ADDSWrr;
+    case AArch64::ADDWrs:
+      return AArch64::ADDSWrs;
+    case AArch64::ADDWrx:
+      return AArch64::ADDSWrx;
+    case AArch64::ANDWri:
+      return AArch64::ANDSWri;
+    case AArch64::ANDWrr:
+      return AArch64::ANDSWrr;
+    case AArch64::ANDWrs:
+      return AArch64::ANDSWrs;
+    case AArch64::BICWrr:
+      return AArch64::BICSWrr;
+    case AArch64::BICWrs:
+      return AArch64::BICSWrs;
+    case AArch64::SUBWri:
+      return AArch64::SUBSWri;
+    case AArch64::SUBWrr:
+      return AArch64::SUBSWrr;
+    case AArch64::SUBWrs:
+      return AArch64::SUBSWrs;
+    case AArch64::SUBWrx:
+      return AArch64::SUBSWrx;
+    // 64-bit cases:
+    case AArch64::ADDXri:
+      return AArch64::ADDSXri;
+    case AArch64::ADDXrr:
+      return AArch64::ADDSXrr;
+    case AArch64::ADDXrs:
+      return AArch64::ADDSXrs;
+    case AArch64::ADDXrx:
+      return AArch64::ADDSXrx;
+    case AArch64::ANDXri:
+      return AArch64::ANDSXri;
+    case AArch64::ANDXrr:
+      return AArch64::ANDSXrr;
+    case AArch64::ANDXrs:
+      return AArch64::ANDSXrs;
+    case AArch64::BICXrr:
+      return AArch64::BICSXrr;
+    case AArch64::BICXrs:
+      return AArch64::BICSXrs;
+    case AArch64::SUBXri:
+      return AArch64::SUBSXri;
+    case AArch64::SUBXrr:
+      return AArch64::SUBSXrr;
+    case AArch64::SUBXrs:
+      return AArch64::SUBSXrs;
+    case AArch64::SUBXrx:
+      return AArch64::SUBSXrx;
+    // SVE instructions:
+    case AArch64::AND_PPzPP:
+      return AArch64::ANDS_PPzPP;
+    case AArch64::BIC_PPzPP:
+      return AArch64::BICS_PPzPP;
+    case AArch64::EOR_PPzPP:
+      return AArch64::EORS_PPzPP;
+    case AArch64::NAND_PPzPP:
+      return AArch64::NANDS_PPzPP;
+    case AArch64::NOR_PPzPP:
+      return AArch64::NORS_PPzPP;
+    case AArch64::ORN_PPzPP:
+      return AArch64::ORNS_PPzPP;
+    case AArch64::ORR_PPzPP:
+      return AArch64::ORRS_PPzPP;
+    case AArch64::BRKA_PPzP:
+      return AArch64::BRKAS_PPzP;
+    case AArch64::BRKPA_PPzPP:
+      return AArch64::BRKPAS_PPzPP;
+    case AArch64::BRKB_PPzP:
+      return AArch64::BRKBS_PPzP;
+    case AArch64::BRKPB_PPzPP:
+      return AArch64::BRKPBS_PPzPP;
+    case AArch64::BRKN_PPzP:
+      return AArch64::BRKNS_PPzP;
+    case AArch64::RDFFR_PPz:
+      return AArch64::RDFFRS_PPz;
+    case AArch64::PTRUE_B:
+      return AArch64::PTRUES_B;
+    }
+  }
 
-  // For Pre-indexed addressing quadword instructions, the third operand is the
-  // immediate value.
-  bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
+  // Is this a candidate for ld/st merging or pairing?  For example, we don't
+  // touch volatiles or load/stores that have a hint to avoid pair formation.
+  bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI)
+      const {
 
-  if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
-    return false;
+    bool IsPreLdSt = isPreLdSt(MI);
 
-  // Can't merge/pair if the instruction modifies the base register.
-  // e.g., ldr x0, [x0]
-  // This case will never occur with an FI base.
-  // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
-  // STR<S,D,Q,W,X>pre, it can be merged.
-  // For example:
-  //   ldr q0, [x11, #32]!
-  //   ldr q1, [x11, #16]
-  //   to
-  //   ldp q0, q1, [x11, #32]!
-  if (MI.getOperand(1).isReg() && !IsPreLdSt) {
-    Register BaseReg = MI.getOperand(1).getReg();
-    const TargetRegisterInfo *TRI = &getRegisterInfo();
-    if (MI.modifiesRegister(BaseReg, TRI))
+    // If this is a volatile load/store, don't mess with it.
+    if (MI.hasOrderedMemoryRef())
+      return false;
+
+    // Make sure this is a reg/fi+imm (as opposed to an address reloc).
+    // For Pre-inc LD/ST, the operand is shifted by one.
+    assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
+            MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
+           "Expected a reg or frame index operand.");
+
+    // For Pre-indexed addressing quadword instructions, the third operand is
+    // the immediate value.
+    bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
+
+    if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
       return false;
+
+    // Can't merge/pair if the instruction modifies the base register.
+    // e.g., ldr x0, [x0]
+    // This case will never occur with an FI base.
+    // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
+    // STR<S,D,Q,W,X>pre, it can be merged.
+    // For example:
+    //   ldr q0, [x11, #32]!
+    //   ldr q1, [x11, #16]
+    //   to
+    //   ldp q0, q1, [x11, #32]!
+    if (MI.getOperand(1).isReg() && !IsPreLdSt) {
+      Register BaseReg = MI.getOperand(1).getReg();
+      const TargetRegisterInfo *TRI = &getRegisterInfo();
+      if (MI.modifiesRegister(BaseReg, TRI))
+        return false;
+    }
+
+    // Pairing SVE fills/spills is only valid for little-endian targets that
+    // implement VLS 128.
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case AArch64::LDR_ZXI:
+    case AArch64::STR_ZXI:
+      if (!Subtarget.isLittleEndian() ||
+          Subtarget.getSVEVectorSizeInBits() != 128)
+        return false;
+    }
+
+    // Check if this load/store has a hint to avoid pair formation.
+    // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
+    if (isLdStPairSuppressed(MI))
+      return false;
+
+    // Do not pair any callee-save store/reload instructions in the
+    // prologue/epilogue if the CFI information encoded the operations as
+    // separate instructions, as that will cause the size of the actual prologue
+    // to mismatch with the prologue size recorded in the Windows CFI.
+    const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
+    bool NeedsWinCFI = MAI->usesWindowsCFI() &&
+                       MI.getMF()->getFunction().needsUnwindTableEntry();
+    if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
+                        MI.getFlag(MachineInstr::FrameDestroy)))
+      return false;
+
+    // On some CPUs quad load/store pairs are slower than two single
+    // load/stores.
+    if (Subtarget.isPaired128Slow()) {
+      switch (MI.getOpcode()) {
+      default:
+        break;
+      case AArch64::LDURQi:
+      case AArch64::STURQi:
+      case AArch64::LDRQui:
+      case AArch64::STRQui:
+        return false;
+      }
+    }
+
+    return true;
   }
 
-  // Pairing SVE fills/spills is only valid for little-endian targets that
-  // implement VLS 128.
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::LDR_ZXI:
-  case AArch64::STR_ZXI:
-    if (!Subtarget.isLittleEndian() ||
-        Subtarget.getSVEVectorSizeInBits() != 128)
+  bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, LocationSize &Width,
+      const TargetRegisterInfo *TRI) const {
+    if (!LdSt.mayLoadOrStore())
+      return false;
+
+    const MachineOperand *BaseOp;
+    TypeSize WidthN(0, false);
+    if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
+                                      WidthN, TRI))
       return false;
+    // The maximum vscale is 16 under AArch64, return the maximal extent for the
+    // vector.
+    Width = LocationSize::precise(WidthN);
+    BaseOps.push_back(BaseOp);
+    return true;
   }
 
-  // Check if this load/store has a hint to avoid pair formation.
-  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
-  if (isLdStPairSuppressed(MI))
-    return false;
+  std::optional<ExtAddrMode> AArch64InstrInfo::getAddrModeFromMemoryOp(
+      const MachineInstr &MemI, const TargetRegisterInfo *TRI) const {
+    const MachineOperand *Base; // Filled with the base operand of MI.
+    int64_t Offset;             // Filled with the offset of MI.
+    bool OffsetIsScalable;
+    if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
+      return std::nullopt;
 
-  // Do not pair any callee-save store/reload instructions in the
-  // prologue/epilogue if the CFI information encoded the operations as separate
-  // instructions, as that will cause the size of the actual prologue to mismatch
-  // with the prologue size recorded in the Windows CFI.
-  const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
-  bool NeedsWinCFI = MAI->usesWindowsCFI() &&
-                     MI.getMF()->getFunction().needsUnwindTableEntry();
-  if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
-                      MI.getFlag(MachineInstr::FrameDestroy)))
-    return false;
+    if (!Base->isReg())
+      return std::nullopt;
+    ExtAddrMode AM;
+    AM.BaseReg = Base->getReg();
+    AM.Displacement = Offset;
+    AM.ScaledReg = 0;
+    AM.Scale = 0;
+    return AM;
+  }
 
-  // On some CPUs quad load/store pairs are slower than two single load/stores.
-  if (Subtarget.isPaired128Slow()) {
-    switch (MI.getOpcode()) {
+  bool AArch64InstrInfo::canFoldIntoAddrMode(
+      const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI,
+      ExtAddrMode &AM) const {
+    // Filter out instructions into which we cannot fold.
+    unsigned NumBytes;
+    int64_t OffsetScale = 1;
+    switch (MemI.getOpcode()) {
     default:
-      break;
+      return false;
+
     case AArch64::LDURQi:
     case AArch64::STURQi:
+      NumBytes = 16;
+      break;
+
+    case AArch64::LDURDi:
+    case AArch64::STURDi:
+    case AArch64::LDURXi:
+    case AArch64::STURXi:
+      NumBytes = 8;
+      break;
+
+    case AArch64::LDURWi:
+    case AArch64::LDURSWi:
+    case AArch64::STURWi:
+      NumBytes = 4;
+      break;
+
+    case AArch64::LDURHi:
+    case AArch64::STURHi:
+    case AArch64::LDURHHi:
+    case AArch64::STURHHi:
+    case AArch64::LDURSHXi:
+    case AArch64::LDURSHWi:
+      NumBytes = 2;
+      break;
+
+    case AArch64::LDRBroX:
+    case AArch64::LDRBBroX:
+    case AArch64::LDRSBXroX:
+    case AArch64::LDRSBWroX:
+    case AArch64::STRBroX:
+    case AArch64::STRBBroX:
+    case AArch64::LDURBi:
+    case AArch64::LDURBBi:
+    case AArch64::LDURSBXi:
+    case AArch64::LDURSBWi:
+    case AArch64::STURBi:
+    case AArch64::STURBBi:
+    case AArch64::LDRBui:
+    case AArch64::LDRBBui:
+    case AArch64::LDRSBXui:
+    case AArch64::LDRSBWui:
+    case AArch64::STRBui:
+    case AArch64::STRBBui:
+      NumBytes = 1;
+      break;
+
+    case AArch64::LDRQroX:
+    case AArch64::STRQroX:
     case AArch64::LDRQui:
     case AArch64::STRQui:
-      return false;
-    }
-  }
+      NumBytes = 16;
+      OffsetScale = 16;
+      break;
 
-  return true;
-}
+    case AArch64::LDRDroX:
+    case AArch64::STRDroX:
+    case AArch64::LDRXroX:
+    case AArch64::STRXroX:
+    case AArch64::LDRDui:
+    case AArch64::STRDui:
+    case AArch64::LDRXui:
+    case AArch64::STRXui:
+      NumBytes = 8;
+      OffsetScale = 8;
+      break;
 
-bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
-    const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
-    const TargetRegisterInfo *TRI) const {
-  if (!LdSt.mayLoadOrStore())
-    return false;
+    case AArch64::LDRWroX:
+    case AArch64::LDRSWroX:
+    case AArch64::STRWroX:
+    case AArch64::LDRWui:
+    case AArch64::LDRSWui:
+    case AArch64::STRWui:
+      NumBytes = 4;
+      OffsetScale = 4;
+      break;
 
-  const MachineOperand *BaseOp;
-  TypeSize WidthN(0, false);
-  if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
-                                    WidthN, TRI))
-    return false;
-  // The maximum vscale is 16 under AArch64, return the maximal extent for the
-  // vector.
-  Width = LocationSize::precise(WidthN);
-  BaseOps.push_back(BaseOp);
-  return true;
-}
+    case AArch64::LDRHroX:
+    case AArch64::STRHroX:
+    case AArch64::LDRHHroX:
+    case AArch64::STRHHroX:
+    case AArch64::LDRSHXroX:
+    case AArch64::LDRSHWroX:
+    case AArch64::LDRHui:
+    case AArch64::STRHui:
+    case AArch64::LDRHHui:
+    case AArch64::STRHHui:
+    case AArch64::LDRSHXui:
+    case AArch64::LDRSHWui:
+      NumBytes = 2;
+      OffsetScale = 2;
+      break;
+    }
 
-std::optional<ExtAddrMode>
-AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
-                                          const TargetRegisterInfo *TRI) const {
-  const MachineOperand *Base; // Filled with the base operand of MI.
-  int64_t Offset;             // Filled with the offset of MI.
-  bool OffsetIsScalable;
-  if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
-    return std::nullopt;
+    // Check the fold operand is not the loaded/stored value.
+    const MachineOperand &BaseRegOp = MemI.getOperand(0);
+    if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
+      return false;
 
-  if (!Base->isReg())
-    return std::nullopt;
-  ExtAddrMode AM;
-  AM.BaseReg = Base->getReg();
-  AM.Displacement = Offset;
-  AM.ScaledReg = 0;
-  AM.Scale = 0;
-  return AM;
-}
-
-bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
-                                           Register Reg,
-                                           const MachineInstr &AddrI,
-                                           ExtAddrMode &AM) const {
-  // Filter out instructions into which we cannot fold.
-  unsigned NumBytes;
-  int64_t OffsetScale = 1;
-  switch (MemI.getOpcode()) {
-  default:
-    return false;
+    // Handle memory instructions with a [Reg, Reg] addressing mode.
+    if (MemI.getOperand(2).isReg()) {
+      // Bail if the addressing mode already includes extension of the offset
+      // register.
+      if (MemI.getOperand(3).getImm())
+        return false;
 
-  case AArch64::LDURQi:
-  case AArch64::STURQi:
-    NumBytes = 16;
-    break;
+      // Check if we actually have a scaled offset.
+      if (MemI.getOperand(4).getImm() == 0)
+        OffsetScale = 1;
 
-  case AArch64::LDURDi:
-  case AArch64::STURDi:
-  case AArch64::LDURXi:
-  case AArch64::STURXi:
-    NumBytes = 8;
-    break;
+      // If the address instructions is folded into the base register, then the
+      // addressing mode must not have a scale. Then we can swap the base and
+      // the scaled registers.
+      if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
+        return false;
 
-  case AArch64::LDURWi:
-  case AArch64::LDURSWi:
-  case AArch64::STURWi:
-    NumBytes = 4;
-    break;
+      switch (AddrI.getOpcode()) {
+      default:
+        return false;
 
-  case AArch64::LDURHi:
-  case AArch64::STURHi:
-  case AArch64::LDURHHi:
-  case AArch64::STURHHi:
-  case AArch64::LDURSHXi:
-  case AArch64::LDURSHWi:
-    NumBytes = 2;
-    break;
+      case AArch64::SBFMXri:
+        // sxtw Xa, Wm
+        // ldr Xd, [Xn, Xa, lsl #N]
+        // ->
+        // ldr Xd, [Xn, Wm, sxtw #N]
+        if (AddrI.getOperand(2).getImm() != 0 ||
+            AddrI.getOperand(3).getImm() != 31)
+          return false;
 
-  case AArch64::LDRBroX:
-  case AArch64::LDRBBroX:
-  case AArch64::LDRSBXroX:
-  case AArch64::LDRSBWroX:
-  case AArch64::STRBroX:
-  case AArch64::STRBBroX:
-  case AArch64::LDURBi:
-  case AArch64::LDURBBi:
-  case AArch64::LDURSBXi:
-  case AArch64::LDURSBWi:
-  case AArch64::STURBi:
-  case AArch64::STURBBi:
-  case AArch64::LDRBui:
-  case AArch64::LDRBBui:
-  case AArch64::LDRSBXui:
-  case AArch64::LDRSBWui:
-  case AArch64::STRBui:
-  case AArch64::STRBBui:
-    NumBytes = 1;
-    break;
+        AM.BaseReg = MemI.getOperand(1).getReg();
+        if (AM.BaseReg == Reg)
+          AM.BaseReg = MemI.getOperand(2).getReg();
+        AM.ScaledReg = AddrI.getOperand(1).getReg();
+        AM.Scale = OffsetScale;
+        AM.Displacement = 0;
+        AM.Form = ExtAddrMode::Formula::SExtScaledReg;
+        return true;
 
-  case AArch64::LDRQroX:
-  case AArch64::STRQroX:
-  case AArch64::LDRQui:
-  case AArch64::STRQui:
-    NumBytes = 16;
-    OffsetScale = 16;
-    break;
+      case TargetOpcode::SUBREG_TO_REG: {
+        // mov Wa, Wm
+        // ldr Xd, [Xn, Xa, lsl #N]
+        // ->
+        // ldr Xd, [Xn, Wm, uxtw #N]
 
-  case AArch64::LDRDroX:
-  case AArch64::STRDroX:
-  case AArch64::LDRXroX:
-  case AArch64::STRXroX:
-  case AArch64::LDRDui:
-  case AArch64::STRDui:
-  case AArch64::LDRXui:
-  case AArch64::STRXui:
-    NumBytes = 8;
-    OffsetScale = 8;
-    break;
+        // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
+        if (AddrI.getOperand(1).getImm() != 0 ||
+            AddrI.getOperand(3).getImm() != AArch64::sub_32)
+          return false;
 
-  case AArch64::LDRWroX:
-  case AArch64::LDRSWroX:
-  case AArch64::STRWroX:
-  case AArch64::LDRWui:
-  case AArch64::LDRSWui:
-  case AArch64::STRWui:
-    NumBytes = 4;
-    OffsetScale = 4;
-    break;
+        const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
+        Register OffsetReg = AddrI.getOperand(2).getReg();
+        if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
+          return false;
 
-  case AArch64::LDRHroX:
-  case AArch64::STRHroX:
-  case AArch64::LDRHHroX:
-  case AArch64::STRHHroX:
-  case AArch64::LDRSHXroX:
-  case AArch64::LDRSHWroX:
-  case AArch64::LDRHui:
-  case AArch64::STRHui:
-  case AArch64::LDRHHui:
-  case AArch64::STRHHui:
-  case AArch64::LDRSHXui:
-  case AArch64::LDRSHWui:
-    NumBytes = 2;
-    OffsetScale = 2;
-    break;
-  }
+        const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
+        if (DefMI.getOpcode() != AArch64::ORRWrs ||
+            DefMI.getOperand(1).getReg() != AArch64::WZR ||
+            DefMI.getOperand(3).getImm() != 0)
+          return false;
 
-  // Check the fold operand is not the loaded/stored value.
-  const MachineOperand &BaseRegOp = MemI.getOperand(0);
-  if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
-    return false;
+        AM.BaseReg = MemI.getOperand(1).getReg();
+        if (AM.BaseReg == Reg)
+          AM.BaseReg = MemI.getOperand(2).getReg();
+        AM.ScaledReg = DefMI.getOperand(2).getReg();
+        AM.Scale = OffsetScale;
+        AM.Displacement = 0;
+        AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
+        return true;
+      }
+      }
+    }
 
-  // Handle memory instructions with a [Reg, Reg] addressing mode.
-  if (MemI.getOperand(2).isReg()) {
-    // Bail if the addressing mode already includes extension of the offset
-    // register.
-    if (MemI.getOperand(3).getImm())
-      return false;
+    // Handle memory instructions with a [Reg, #Imm] addressing mode.
 
-    // Check if we actually have a scaled offset.
-    if (MemI.getOperand(4).getImm() == 0)
-      OffsetScale = 1;
+    // Check we are not breaking a potential conversion to an LDP.
+    auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
+                                   int64_t NewOffset) -> bool {
+      int64_t MinOffset, MaxOffset;
+      switch (NumBytes) {
+      default:
+        return true;
+      case 4:
+        MinOffset = -256;
+        MaxOffset = 252;
+        break;
+      case 8:
+        MinOffset = -512;
+        MaxOffset = 504;
+        break;
+      case 16:
+        MinOffset = -1024;
+        MaxOffset = 1008;
+        break;
+      }
+      return OldOffset < MinOffset || OldOffset > MaxOffset ||
+             (NewOffset >= MinOffset && NewOffset <= MaxOffset);
+    };
+    auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
+      int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
+      int64_t NewOffset = OldOffset + Disp;
+      if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
+        return false;
+      // If the old offset would fit into an LDP, but the new offset wouldn't,
+      // bail out.
+      if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
+        return false;
+      AM.BaseReg = AddrI.getOperand(1).getReg();
+      AM.ScaledReg = 0;
+      AM.Scale = 0;
+      AM.Displacement = NewOffset;
+      AM.Form = ExtAddrMode::Formula::Basic;
+      return true;
+    };
 
-    // If the address instructions is folded into the base register, then the
-    // addressing mode must not have a scale. Then we can swap the base and the
-    // scaled registers.
-    if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
-      return false;
+    auto canFoldAddRegIntoAddrMode =
+        [&](int64_t Scale,
+            ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
+      if (MemI.getOperand(2).getImm() != 0)
+        return false;
+      if ((unsigned)Scale != Scale)
+        return false;
+      if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
+        return false;
+      AM.BaseReg = AddrI.getOperand(1).getReg();
+      AM.ScaledReg = AddrI.getOperand(2).getReg();
+      AM.Scale = Scale;
+      AM.Displacement = 0;
+      AM.Form = Form;
+      return true;
+    };
+
+    auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
+      unsigned Opcode = MemI.getOpcode();
+      return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
+             Subtarget.isSTRQroSlow();
+    };
 
+    int64_t Disp = 0;
+    const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
     switch (AddrI.getOpcode()) {
     default:
       return false;
 
-    case AArch64::SBFMXri:
-      // sxtw Xa, Wm
-      // ldr Xd, [Xn, Xa, lsl #N]
+    case AArch64::ADDXri:
+      // add Xa, Xn, #N
+      // ldr Xd, [Xa, #M]
       // ->
-      // ldr Xd, [Xn, Wm, sxtw #N]
-      if (AddrI.getOperand(2).getImm() != 0 ||
-          AddrI.getOperand(3).getImm() != 31)
-        return false;
+      // ldr Xd, [Xn, #N'+M]
+      Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+      return canFoldAddSubImmIntoAddrMode(Disp);
 
-      AM.BaseReg = MemI.getOperand(1).getReg();
-      if (AM.BaseReg == Reg)
-        AM.BaseReg = MemI.getOperand(2).getReg();
-      AM.ScaledReg = AddrI.getOperand(1).getReg();
-      AM.Scale = OffsetScale;
-      AM.Displacement = 0;
-      AM.Form = ExtAddrMode::Formula::SExtScaledReg;
-      return true;
+    case AArch64::SUBXri:
+      // sub Xa, Xn, #N
+      // ldr Xd, [Xa, #M]
+      // ->
+      // ldr Xd, [Xn, #N'+M]
+      Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+      return canFoldAddSubImmIntoAddrMode(-Disp);
+
+    case AArch64::ADDXrs: {
+      // add Xa, Xn, Xm, lsl #N
+      // ldr Xd, [Xa]
+      // ->
+      // ldr Xd, [Xn, Xm, lsl #N]
 
-    case TargetOpcode::SUBREG_TO_REG: {
-      // mov Wa, Wm
-      // ldr Xd, [Xn, Xa, lsl #N]
+      // Don't fold the add if the result would be slower, unless optimising for
+      // size.
+      unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
+      if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
+        return false;
+      Shift = AArch64_AM::getShiftValue(Shift);
+      if (!OptSize) {
+        if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
+          return false;
+        if (avoidSlowSTRQ(MemI))
+          return false;
+      }
+      return canFoldAddRegIntoAddrMode(1ULL << Shift);
+    }
+
+    case AArch64::ADDXrr:
+      // add Xa, Xn, Xm
+      // ldr Xd, [Xa]
       // ->
-      // ldr Xd, [Xn, Wm, uxtw #N]
+      // ldr Xd, [Xn, Xm, lsl #0]
 
-      // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
-      if (AddrI.getOperand(1).getImm() != 0 ||
-          AddrI.getOperand(3).getImm() != AArch64::sub_32)
+      // Don't fold the add if the result would be slower, unless optimising for
+      // size.
+      if (!OptSize && avoidSlowSTRQ(MemI))
         return false;
+      return canFoldAddRegIntoAddrMode(1);
 
-      const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
-      Register OffsetReg = AddrI.getOperand(2).getReg();
-      if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
+    case AArch64::ADDXrx:
+      // add Xa, Xn, Wm, {s,u}xtw #N
+      // ldr Xd, [Xa]
+      // ->
+      // ldr Xd, [Xn, Wm, {s,u}xtw #N]
+
+      // Don't fold the add if the result would be slower, unless optimising for
+      // size.
+      if (!OptSize && avoidSlowSTRQ(MemI))
         return false;
 
-      const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
-      if (DefMI.getOpcode() != AArch64::ORRWrs ||
-          DefMI.getOperand(1).getReg() != AArch64::WZR ||
-          DefMI.getOperand(3).getImm() != 0)
+      // Can fold only sign-/zero-extend of a word.
+      unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
+      AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
+      if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
         return false;
 
-      AM.BaseReg = MemI.getOperand(1).getReg();
-      if (AM.BaseReg == Reg)
-        AM.BaseReg = MemI.getOperand(2).getReg();
-      AM.ScaledReg = DefMI.getOperand(2).getReg();
-      AM.Scale = OffsetScale;
-      AM.Displacement = 0;
-      AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
-      return true;
+      return canFoldAddRegIntoAddrMode(
+          1ULL << AArch64_AM::getArithShiftValue(Imm),
+          (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
+                                       : ExtAddrMode::Formula::ZExtScaledReg);
     }
+  }
+
+  // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+  // return the opcode of an instruction performing the same operation, but
+  // using the [Reg, Reg] addressing mode.
+  static unsigned regOffsetOpcode(unsigned Opcode) {
+    switch (Opcode) {
+    default:
+      llvm_unreachable("Address folding not implemented for instruction");
+
+    case AArch64::LDURQi:
+    case AArch64::LDRQui:
+      return AArch64::LDRQroX;
+    case AArch64::STURQi:
+    case AArch64::STRQui:
+      return AArch64::STRQroX;
+    case AArch64::LDURDi:
+    case AArch64::LDRDui:
+      return AArch64::LDRDroX;
+    case AArch64::STURDi:
+    case AArch64::STRDui:
+      return AArch64::STRDroX;
+    case AArch64::LDURXi:
+    case AArch64::LDRXui:
+      return AArch64::LDRXroX;
+    case AArch64::STURXi:
+    case AArch64::STRXui:
+      return AArch64::STRXroX;
+    case AArch64::LDURWi:
+    case AArch64::LDRWui:
+      return AArch64::LDRWroX;
+    case AArch64::LDURSWi:
+    case AArch64::LDRSWui:
+      return AArch64::LDRSWroX;
+    case AArch64::STURWi:
+    case AArch64::STRWui:
+      return AArch64::STRWroX;
+    case AArch64::LDURHi:
+    case AArch64::LDRHui:
+      return AArch64::LDRHroX;
+    case AArch64::STURHi:
+    case AArch64::STRHui:
+      return AArch64::STRHroX;
+    case AArch64::LDURHHi:
+    case AArch64::LDRHHui:
+      return AArch64::LDRHHroX;
+    case AArch64::STURHHi:
+    case AArch64::STRHHui:
+      return AArch64::STRHHroX;
+    case AArch64::LDURSHXi:
+    case AArch64::LDRSHXui:
+      return AArch64::LDRSHXroX;
+    case AArch64::LDURSHWi:
+    case AArch64::LDRSHWui:
+      return AArch64::LDRSHWroX;
+    case AArch64::LDURBi:
+    case AArch64::LDRBui:
+      return AArch64::LDRBroX;
+    case AArch64::LDURBBi:
+    case AArch64::LDRBBui:
+      return AArch64::LDRBBroX;
+    case AArch64::LDURSBXi:
+    case AArch64::LDRSBXui:
+      return AArch64::LDRSBXroX;
+    case AArch64::LDURSBWi:
+    case AArch64::LDRSBWui:
+      return AArch64::LDRSBWroX;
+    case AArch64::STURBi:
+    case AArch64::STRBui:
+      return AArch64::STRBroX;
+    case AArch64::STURBBi:
+    case AArch64::STRBBui:
+      return AArch64::STRBBroX;
+    }
+  }
+
+  // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+  // return the opcode of an instruction performing the same operation, but
+  // using the [Reg, #Imm] addressing mode with scaled offset.
+  unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
+    switch (Opcode) {
+    default:
+      llvm_unreachable("Address folding not implemented for instruction");
+
+    case AArch64::LDURQi:
+      Scale = 16;
+      return AArch64::LDRQui;
+    case AArch64::STURQi:
+      Scale = 16;
+      return AArch64::STRQui;
+    case AArch64::LDURDi:
+      Scale = 8;
+      return AArch64::LDRDui;
+    case AArch64::STURDi:
+      Scale = 8;
+      return AArch64::STRDui;
+    case AArch64::LDURXi:
+      Scale = 8;
+      return AArch64::LDRXui;
+    case AArch64::STURXi:
+      Scale = 8;
+      return AArch64::STRXui;
+    case AArch64::LDURWi:
+      Scale = 4;
+      return AArch64::LDRWui;
+    case AArch64::LDURSWi:
+      Scale = 4;
+      return AArch64::LDRSWui;
+    case AArch64::STURWi:
+      Scale = 4;
+      return AArch64::STRWui;
+    case AArch64::LDURHi:
+      Scale = 2;
+      return AArch64::LDRHui;
+    case AArch64::STURHi:
+      Scale = 2;
+      return AArch64::STRHui;
+    case AArch64::LDURHHi:
+      Scale = 2;
+      return AArch64::LDRHHui;
+    case AArch64::STURHHi:
+      Scale = 2;
+      return AArch64::STRHHui;
+    case AArch64::LDURSHXi:
+      Scale = 2;
+      return AArch64::LDRSHXui;
+    case AArch64::LDURSHWi:
+      Scale = 2;
+      return AArch64::LDRSHWui;
+    case AArch64::LDURBi:
+      Scale = 1;
+      return AArch64::LDRBui;
+    case AArch64::LDURBBi:
+      Scale = 1;
+      return AArch64::LDRBBui;
+    case AArch64::LDURSBXi:
+      Scale = 1;
+      return AArch64::LDRSBXui;
+    case AArch64::LDURSBWi:
+      Scale = 1;
+      return AArch64::LDRSBWui;
+    case AArch64::STURBi:
+      Scale = 1;
+      return AArch64::STRBui;
+    case AArch64::STURBBi:
+      Scale = 1;
+      return AArch64::STRBBui;
+    case AArch64::LDRQui:
+    case AArch64::STRQui:
+      Scale = 16;
+      return Opcode;
+    case AArch64::LDRDui:
+    case AArch64::STRDui:
+    case AArch64::LDRXui:
+    case AArch64::STRXui:
+      Scale = 8;
+      return Opcode;
+    case AArch64::LDRWui:
+    case AArch64::LDRSWui:
+    case AArch64::STRWui:
+      Scale = 4;
+      return Opcode;
+    case AArch64::LDRHui:
+    case AArch64::STRHui:
+    case AArch64::LDRHHui:
+    case AArch64::STRHHui:
+    case AArch64::LDRSHXui:
+    case AArch64::LDRSHWui:
+      Scale = 2;
+      return Opcode;
+    case AArch64::LDRBui:
+    case AArch64::LDRBBui:
+    case AArch64::LDRSBXui:
+    case AArch64::LDRSBWui:
+    case AArch64::STRBui:
+    case AArch64::STRBBui:
+      Scale = 1;
+      return Opcode;
     }
   }
 
-  // Handle memory instructions with a [Reg, #Imm] addressing mode.
+  // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+  // return the opcode of an instruction performing the same operation, but
+  // using the [Reg, #Imm] addressing mode with unscaled offset.
+  unsigned unscaledOffsetOpcode(unsigned Opcode) {
+    switch (Opcode) {
+    default:
+      llvm_unreachable("Address folding not implemented for instruction");
 
-  // Check we are not breaking a potential conversion to an LDP.
-  auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
-                                 int64_t NewOffset) -> bool {
-    int64_t MinOffset, MaxOffset;
-    switch (NumBytes) {
+    case AArch64::LDURQi:
+    case AArch64::STURQi:
+    case AArch64::LDURDi:
+    case AArch64::STURDi:
+    case AArch64::LDURXi:
+    case AArch64::STURXi:
+    case AArch64::LDURWi:
+    case AArch64::LDURSWi:
+    case AArch64::STURWi:
+    case AArch64::LDURHi:
+    case AArch64::STURHi:
+    case AArch64::LDURHHi:
+    case AArch64::STURHHi:
+    case AArch64::LDURSHXi:
+    case AArch64::LDURSHWi:
+    case AArch64::LDURBi:
+    case AArch64::STURBi:
+    case AArch64::LDURBBi:
+    case AArch64::STURBBi:
+    case AArch64::LDURSBWi:
+    case AArch64::LDURSBXi:
+      return Opcode;
+    case AArch64::LDRQui:
+      return AArch64::LDURQi;
+    case AArch64::STRQui:
+      return AArch64::STURQi;
+    case AArch64::LDRDui:
+      return AArch64::LDURDi;
+    case AArch64::STRDui:
+      return AArch64::STURDi;
+    case AArch64::LDRXui:
+      return AArch64::LDURXi;
+    case AArch64::STRXui:
+      return AArch64::STURXi;
+    case AArch64::LDRWui:
+      return AArch64::LDURWi;
+    case AArch64::LDRSWui:
+      return AArch64::LDURSWi;
+    case AArch64::STRWui:
+      return AArch64::STURWi;
+    case AArch64::LDRHui:
+      return AArch64::LDURHi;
+    case AArch64::STRHui:
+      return AArch64::STURHi;
+    case AArch64::LDRHHui:
+      return AArch64::LDURHHi;
+    case AArch64::STRHHui:
+      return AArch64::STURHHi;
+    case AArch64::LDRSHXui:
+      return AArch64::LDURSHXi;
+    case AArch64::LDRSHWui:
+      return AArch64::LDURSHWi;
+    case AArch64::LDRBBui:
+      return AArch64::LDURBBi;
+    case AArch64::LDRBui:
+      return AArch64::LDURBi;
+    case AArch64::STRBBui:
+      return AArch64::STURBBi;
+    case AArch64::STRBui:
+      return AArch64::STURBi;
+    case AArch64::LDRSBWui:
+      return AArch64::LDURSBWi;
+    case AArch64::LDRSBXui:
+      return AArch64::LDURSBXi;
+    }
+  }
+
+  // Given the opcode of a memory load/store instruction, return the opcode of
+  // an instruction performing the same operation, but using the [Reg, Reg,
+  // {s,u}xtw #N] addressing mode with sign-/zero-extend of the offset register.
+  static unsigned offsetExtendOpcode(unsigned Opcode) {
+    switch (Opcode) {
     default:
+      llvm_unreachable("Address folding not implemented for instruction");
+
+    case AArch64::LDRQroX:
+    case AArch64::LDURQi:
+    case AArch64::LDRQui:
+      return AArch64::LDRQroW;
+    case AArch64::STRQroX:
+    case AArch64::STURQi:
+    case AArch64::STRQui:
+      return AArch64::STRQroW;
+    case AArch64::LDRDroX:
+    case AArch64::LDURDi:
+    case AArch64::LDRDui:
+      return AArch64::LDRDroW;
+    case AArch64::STRDroX:
+    case AArch64::STURDi:
+    case AArch64::STRDui:
+      return AArch64::STRDroW;
+    case AArch64::LDRXroX:
+    case AArch64::LDURXi:
+    case AArch64::LDRXui:
+      return AArch64::LDRXroW;
+    case AArch64::STRXroX:
+    case AArch64::STURXi:
+    case AArch64::STRXui:
+      return AArch64::STRXroW;
+    case AArch64::LDRWroX:
+    case AArch64::LDURWi:
+    case AArch64::LDRWui:
+      return AArch64::LDRWroW;
+    case AArch64::LDRSWroX:
+    case AArch64::LDURSWi:
+    case AArch64::LDRSWui:
+      return AArch64::LDRSWroW;
+    case AArch64::STRWroX:
+    case AArch64::STURWi:
+    case AArch64::STRWui:
+      return AArch64::STRWroW;
+    case AArch64::LDRHroX:
+    case AArch64::LDURHi:
+    case AArch64::LDRHui:
+      return AArch64::LDRHroW;
+    case AArch64::STRHroX:
+    case AArch64::STURHi:
+    case AArch64::STRHui:
+      return AArch64::STRHroW;
+    case AArch64::LDRHHroX:
+    case AArch64::LDURHHi:
+    case AArch64::LDRHHui:
+      return AArch64::LDRHHroW;
+    case AArch64::STRHHroX:
+    case AArch64::STURHHi:
+    case AArch64::STRHHui:
+      return AArch64::STRHHroW;
+    case AArch64::LDRSHXroX:
+    case AArch64::LDURSHXi:
+    case AArch64::LDRSHXui:
+      return AArch64::LDRSHXroW;
+    case AArch64::LDRSHWroX:
+    case AArch64::LDURSHWi:
+    case AArch64::LDRSHWui:
+      return AArch64::LDRSHWroW;
+    case AArch64::LDRBroX:
+    case AArch64::LDURBi:
+    case AArch64::LDRBui:
+      return AArch64::LDRBroW;
+    case AArch64::LDRBBroX:
+    case AArch64::LDURBBi:
+    case AArch64::LDRBBui:
+      return AArch64::LDRBBroW;
+    case AArch64::LDRSBXroX:
+    case AArch64::LDURSBXi:
+    case AArch64::LDRSBXui:
+      return AArch64::LDRSBXroW;
+    case AArch64::LDRSBWroX:
+    case AArch64::LDURSBWi:
+    case AArch64::LDRSBWui:
+      return AArch64::LDRSBWroW;
+    case AArch64::STRBroX:
+    case AArch64::STURBi:
+    case AArch64::STRBui:
+      return AArch64::STRBroW;
+    case AArch64::STRBBroX:
+    case AArch64::STURBBi:
+    case AArch64::STRBBui:
+      return AArch64::STRBBroW;
+    }
+  }
+
+  MachineInstr *AArch64InstrInfo::emitLdStWithAddr(
+      MachineInstr & MemI, const ExtAddrMode &AM) const {
+
+    const DebugLoc &DL = MemI.getDebugLoc();
+    MachineBasicBlock &MBB = *MemI.getParent();
+    MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
+
+    if (AM.Form == ExtAddrMode::Formula::Basic) {
+      if (AM.ScaledReg) {
+        // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
+        unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
+        MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
+        auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+                     .addReg(MemI.getOperand(0).getReg(),
+                             MemI.mayLoad() ? RegState::Define : 0)
+                     .addReg(AM.BaseReg)
+                     .addReg(AM.ScaledReg)
+                     .addImm(0)
+                     .addImm(AM.Scale > 1)
+                     .setMemRefs(MemI.memoperands())
+                     .setMIFlags(MemI.getFlags());
+        return B.getInstr();
+      }
+
+      assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+             "Addressing mode not supported for folding");
+
+      // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
+      unsigned Scale = 1;
+      unsigned Opcode = MemI.getOpcode();
+      if (isInt<9>(AM.Displacement))
+        Opcode = unscaledOffsetOpcode(Opcode);
+      else
+        Opcode = scaledOffsetOpcode(Opcode, Scale);
+
+      auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+                   .addReg(MemI.getOperand(0).getReg(),
+                           MemI.mayLoad() ? RegState::Define : 0)
+                   .addReg(AM.BaseReg)
+                   .addImm(AM.Displacement / Scale)
+                   .setMemRefs(MemI.memoperands())
+                   .setMIFlags(MemI.getFlags());
+      return B.getInstr();
+    }
+
+    if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||
+        AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
+      // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw
+      // #N]`.
+      assert(AM.ScaledReg && !AM.Displacement &&
+             "Address offset can be a register or an immediate, but not both");
+      unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
+      MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
+      // Make sure the offset register is in the correct register class.
+      Register OffsetReg = AM.ScaledReg;
+      const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
+      if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
+        OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+        BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
+            .addReg(AM.ScaledReg, 0, AArch64::sub_32);
+      }
+      auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+                   .addReg(MemI.getOperand(0).getReg(),
+                           MemI.mayLoad() ? RegState::Define : 0)
+                   .addReg(AM.BaseReg)
+                   .addReg(OffsetReg)
+                   .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)
+                   .addImm(AM.Scale != 1)
+                   .setMemRefs(MemI.memoperands())
+                   .setMIFlags(MemI.getFlags());
+
+      return B.getInstr();
+    }
+
+    llvm_unreachable(
+        "Function must not be called with an addressing mode it can't handle");
+  }
+
+  /// Return true if the opcode is a post-index ld/st instruction, which really
+  /// loads from base+0.
+  static bool isPostIndexLdStOpcode(unsigned Opcode) {
+    switch (Opcode) {
+    default:
+      return false;
+    case AArch64::LD1Fourv16b_POST:
+    case AArch64::LD1Fourv1d_POST:
+    case AArch64::LD1Fourv2d_POST:
+    case AArch64::LD1Fourv2s_POST:
+    case AArch64::LD1Fourv4h_POST:
+    case AArch64::LD1Fourv4s_POST:
+    case AArch64::LD1Fourv8b_POST:
+    case AArch64::LD1Fourv8h_POST:
+    case AArch64::LD1Onev16b_POST:
+    case AArch64::LD1Onev1d_POST:
+    case AArch64::LD1Onev2d_POST:
+    case AArch64::LD1Onev2s_POST:
+    case AArch64::LD1Onev4h_POST:
+    case AArch64::LD1Onev4s_POST:
+    case AArch64::LD1Onev8b_POST:
+    case AArch64::LD1Onev8h_POST:
+    case AArch64::LD1Rv16b_POST:
+    case AArch64::LD1Rv1d_POST:
+    case AArch64::LD1Rv2d_POST:
+    case AArch64::LD1Rv2s_POST:
+    case AArch64::LD1Rv4h_POST:
+    case AArch64::LD1Rv4s_POST:
+    case AArch64::LD1Rv8b_POST:
+    case AArch64::LD1Rv8h_POST:
+    case AArch64::LD1Threev16b_POST:
+    case AArch64::LD1Threev1d_POST:
+    case AArch64::LD1Threev2d_POST:
+    case AArch64::LD1Threev2s_POST:
+    case AArch64::LD1Threev4h_POST:
+    case AArch64::LD1Threev4s_POST:
+    case AArch64::LD1Threev8b_POST:
+    case AArch64::LD1Threev8h_POST:
+    case AArch64::LD1Twov16b_POST:
+    case AArch64::LD1Twov1d_POST:
+    case AArch64::LD1Twov2d_POST:
+    case AArch64::LD1Twov2s_POST:
+    case AArch64::LD1Twov4h_POST:
+    case AArch64::LD1Twov4s_POST:
+    case AArch64::LD1Twov8b_POST:
+    case AArch64::LD1Twov8h_POST:
+    case AArch64::LD1i16_POST:
+    case AArch64::LD1i32_POST:
+    case AArch64::LD1i64_POST:
+    case AArch64::LD1i8_POST:
+    case AArch64::LD2Rv16b_POST:
+    case AArch64::LD2Rv1d_POST:
+    case AArch64::LD2Rv2d_POST:
+    case AArch64::LD2Rv2s_POST:
+    case AArch64::LD2Rv4h_POST:
+    case AArch64::LD2Rv4s_POST:
+    case AArch64::LD2Rv8b_POST:
+    case AArch64::LD2Rv8h_POST:
+    case AArch64::LD2Twov16b_POST:
+    case AArch64::LD2Twov2d_POST:
+    case AArch64::LD2Twov2s_POST:
+    case AArch64::LD2Twov4h_POST:
+    case AArch64::LD2Twov4s_POST:
+    case AArch64::LD2Twov8b_POST:
+    case AArch64::LD2Twov8h_POST:
+    case AArch64::LD2i16_POST:
+    case AArch64::LD2i32_POST:
+    case AArch64::LD2i64_POST:
+    case AArch64::LD2i8_POST:
+    case AArch64::LD3Rv16b_POST:
+    case AArch64::LD3Rv1d_POST:
+    case AArch64::LD3Rv2d_POST:
+    case AArch64::LD3Rv2s_POST:
+    case AArch64::LD3Rv4h_POST:
+    case AArch64::LD3Rv4s_POST:
+    case AArch64::LD3Rv8b_POST:
+    case AArch64::LD3Rv8h_POST:
+    case AArch64::LD3Threev16b_POST:
+    case AArch64::LD3Threev2d_POST:
+    case AArch64::LD3Threev2s_POST:
+    case AArch64::LD3Threev4h_POST:
+    case AArch64::LD3Threev4s_POST:
+    case AArch64::LD3Threev8b_POST:
+    case AArch64::LD3Threev8h_POST:
+    case AArch64::LD3i16_POST:
+    case AArch64::LD3i32_POST:
+    case AArch64::LD3i64_POST:
+    case AArch64::LD3i8_POST:
+    case AArch64::LD4Fourv16b_POST:
+    case AArch64::LD4Fourv2d_POST:
+    case AArch64::LD4Fourv2s_POST:
+    case AArch64::LD4Fourv4h_POST:
+    case AArch64::LD4Fourv4s_POST:
+    case AArch64::LD4Fourv8b_POST:
+    case AArch64::LD4Fourv8h_POST:
+    case AArch64::LD4Rv16b_POST:
+    case AArch64::LD4Rv1d_POST:
+    case AArch64::LD4Rv2d_POST:
+    case AArch64::LD4Rv2s_POST:
+    case AArch64::LD4Rv4h_POST:
+    case AArch64::LD4Rv4s_POST:
+    case AArch64::LD4Rv8b_POST:
+    case AArch64::LD4Rv8h_POST:
+    case AArch64::LD4i16_POST:
+    case AArch64::LD4i32_POST:
+    case AArch64::LD4i64_POST:
+    case AArch64::LD4i8_POST:
+    case AArch64::LDAPRWpost:
+    case AArch64::LDAPRXpost:
+    case AArch64::LDIAPPWpost:
+    case AArch64::LDIAPPXpost:
+    case AArch64::LDPDpost:
+    case AArch64::LDPQpost:
+    case AArch64::LDPSWpost:
+    case AArch64::LDPSpost:
+    case AArch64::LDPWpost:
+    case AArch64::LDPXpost:
+    case AArch64::LDRBBpost:
+    case AArch64::LDRBpost:
+    case AArch64::LDRDpost:
+    case AArch64::LDRHHpost:
+    case AArch64::LDRHpost:
+    case AArch64::LDRQpost:
+    case AArch64::LDRSBWpost:
+    case AArch64::LDRSBXpost:
+    case AArch64::LDRSHWpost:
+    case AArch64::LDRSHXpost:
+    case AArch64::LDRSWpost:
+    case AArch64::LDRSpost:
+    case AArch64::LDRWpost:
+    case AArch64::LDRXpost:
+    case AArch64::ST1Fourv16b_POST:
+    case AArch64::ST1Fourv1d_POST:
+    case AArch64::ST1Fourv2d_POST:
+    case AArch64::ST1Fourv2s_POST:
+    case AArch64::ST1Fourv4h_POST:
+    case AArch64::ST1Fourv4s_POST:
+    case AArch64::ST1Fourv8b_POST:
+    case AArch64::ST1Fourv8h_POST:
+    case AArch64::ST1Onev16b_POST:
+    case AArch64::ST1Onev1d_POST:
+    case AArch64::ST1Onev2d_POST:
+    case AArch64::ST1Onev2s_POST:
+    case AArch64::ST1Onev4h_POST:
+    case AArch64::ST1Onev4s_POST:
+    case AArch64::ST1Onev8b_POST:
+    case AArch64::ST1Onev8h_POST:
+    case AArch64::ST1Threev16b_POST:
+    case AArch64::ST1Threev1d_POST:
+    case AArch64::ST1Threev2d_POST:
+    case AArch64::ST1Threev2s_POST:
+    case AArch64::ST1Threev4h_POST:
+    case AArch64::ST1Threev4s_POST:
+    case AArch64::ST1Threev8b_POST:
+    case AArch64::ST1Threev8h_POST:
+    case AArch64::ST1Twov16b_POST:
+    case AArch64::ST1Twov1d_POST:
+    case AArch64::ST1Twov2d_POST:
+    case AArch64::ST1Twov2s_POST:
+    case AArch64::ST1Twov4h_POST:
+    case AArch64::ST1Twov4s_POST:
+    case AArch64::ST1Twov8b_POST:
+    case AArch64::ST1Twov8h_POST:
+    case AArch64::ST1i16_POST:
+    case AArch64::ST1i32_POST:
+    case AArch64::ST1i64_POST:
+    case AArch64::ST1i8_POST:
+    case AArch64::ST2GPostIndex:
+    case AArch64::ST2Twov16b_POST:
+    case AArch64::ST2Twov2d_POST:
+    case AArch64::ST2Twov2s_POST:
+    case AArch64::ST2Twov4h_POST:
+    case AArch64::ST2Twov4s_POST:
+    case AArch64::ST2Twov8b_POST:
+    case AArch64::ST2Twov8h_POST:
+    case AArch64::ST2i16_POST:
+    case AArch64::ST2i32_POST:
+    case AArch64::ST2i64_POST:
+    case AArch64::ST2i8_POST:
+    case AArch64::ST3Threev16b_POST:
+    case AArch64::ST3Threev2d_POST:
+    case AArch64::ST3Threev2s_POST:
+    case AArch64::ST3Threev4h_POST:
+    case AArch64::ST3Threev4s_POST:
+    case AArch64::ST3Threev8b_POST:
+    case AArch64::ST3Threev8h_POST:
+    case AArch64::ST3i16_POST:
+    case AArch64::ST3i32_POST:
+    case AArch64::ST3i64_POST:
+    case AArch64::ST3i8_POST:
+    case AArch64::ST4Fourv16b_POST:
+    case AArch64::ST4Fourv2d_POST:
+    case AArch64::ST4Fourv2s_POST:
+    case AArch64::ST4Fourv4h_POST:
+    case AArch64::ST4Fourv4s_POST:
+    case AArch64::ST4Fourv8b_POST:
+    case AArch64::ST4Fourv8h_POST:
+    case AArch64::ST4i16_POST:
+    case AArch64::ST4i32_POST:
+    case AArch64::ST4i64_POST:
+    case AArch64::ST4i8_POST:
+    case AArch64::STGPostIndex:
+    case AArch64::STGPpost:
+    case AArch64::STPDpost:
+    case AArch64::STPQpost:
+    case AArch64::STPSpost:
+    case AArch64::STPWpost:
+    case AArch64::STPXpost:
+    case AArch64::STRBBpost:
+    case AArch64::STRBpost:
+    case AArch64::STRDpost:
+    case AArch64::STRHHpost:
+    case AArch64::STRHpost:
+    case AArch64::STRQpost:
+    case AArch64::STRSpost:
+    case AArch64::STRWpost:
+    case AArch64::STRXpost:
+    case AArch64::STZ2GPostIndex:
+    case AArch64::STZGPostIndex:
       return true;
-    case 4:
+    }
+  }
+
+  bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
+      const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
+      bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI)
+      const {
+    assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+    // Handle only loads/stores with base register followed by immediate offset.
+    if (LdSt.getNumExplicitOperands() == 3) {
+      // Non-paired instruction (e.g., ldr x1, [x0, #8]).
+      if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
+          !LdSt.getOperand(2).isImm())
+        return false;
+    } else if (LdSt.getNumExplicitOperands() == 4) {
+      // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
+      if (!LdSt.getOperand(1).isReg() ||
+          (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
+          !LdSt.getOperand(3).isImm())
+        return false;
+    } else
+      return false;
+
+    // Get the scaling factor for the instruction and set the width for the
+    // instruction.
+    TypeSize Scale(0U, false);
+    int64_t Dummy1, Dummy2;
+
+    // If this returns false, then it's an instruction we don't want to handle.
+    if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
+      return false;
+
+    // Compute the offset. Offset is calculated as the immediate operand
+    // multiplied by the scaling factor. Unscaled instructions have scaling
+    // factor set to 1. Postindex are a special case which have an offset of 0.
+    if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
+      BaseOp = &LdSt.getOperand(2);
+      Offset = 0;
+    } else if (LdSt.getNumExplicitOperands() == 3) {
+      BaseOp = &LdSt.getOperand(1);
+      Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
+    } else {
+      assert(LdSt.getNumExplicitOperands() == 4 &&
+             "invalid number of operands");
+      BaseOp = &LdSt.getOperand(2);
+      Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
+    }
+    OffsetIsScalable = Scale.isScalable();
+
+    return BaseOp->isReg() || BaseOp->isFI();
+  }
+
+  MachineOperand &AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(
+      MachineInstr & LdSt) const {
+    assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+    MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
+    assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
+    return OfsOp;
+  }
+
+  bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
+                                      TypeSize &Width, int64_t &MinOffset,
+                                      int64_t &MaxOffset) {
+    switch (Opcode) {
+    // Not a memory operation or something we want to handle.
+    default:
+      Scale = TypeSize::getFixed(0);
+      Width = TypeSize::getFixed(0);
+      MinOffset = MaxOffset = 0;
+      return false;
+    // LDR / STR
+    case AArch64::LDRQui:
+    case AArch64::STRQui:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(16);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::LDRXui:
+    case AArch64::LDRDui:
+    case AArch64::STRXui:
+    case AArch64::STRDui:
+    case AArch64::PRFMui:
+      Scale = TypeSize::getFixed(8);
+      Width = TypeSize::getFixed(8);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::LDRWui:
+    case AArch64::LDRSui:
+    case AArch64::LDRSWui:
+    case AArch64::STRWui:
+    case AArch64::STRSui:
+      Scale = TypeSize::getFixed(4);
+      Width = TypeSize::getFixed(4);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::LDRHui:
+    case AArch64::LDRHHui:
+    case AArch64::LDRSHWui:
+    case AArch64::LDRSHXui:
+    case AArch64::STRHui:
+    case AArch64::STRHHui:
+      Scale = TypeSize::getFixed(2);
+      Width = TypeSize::getFixed(2);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::LDRBui:
+    case AArch64::LDRBBui:
+    case AArch64::LDRSBWui:
+    case AArch64::LDRSBXui:
+    case AArch64::STRBui:
+    case AArch64::STRBBui:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(1);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    // post/pre inc
+    case AArch64::STRQpre:
+    case AArch64::LDRQpost:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(16);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDRDpost:
+    case AArch64::LDRDpre:
+    case AArch64::LDRXpost:
+    case AArch64::LDRXpre:
+    case AArch64::STRDpost:
+    case AArch64::STRDpre:
+    case AArch64::STRXpost:
+    case AArch64::STRXpre:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(8);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::STRWpost:
+    case AArch64::STRWpre:
+    case AArch64::LDRWpost:
+    case AArch64::LDRWpre:
+    case AArch64::STRSpost:
+    case AArch64::STRSpre:
+    case AArch64::LDRSpost:
+    case AArch64::LDRSpre:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(4);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDRHpost:
+    case AArch64::LDRHpre:
+    case AArch64::STRHpost:
+    case AArch64::STRHpre:
+    case AArch64::LDRHHpost:
+    case AArch64::LDRHHpre:
+    case AArch64::STRHHpost:
+    case AArch64::STRHHpre:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(2);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDRBpost:
+    case AArch64::LDRBpre:
+    case AArch64::STRBpost:
+    case AArch64::STRBpre:
+    case AArch64::LDRBBpost:
+    case AArch64::LDRBBpre:
+    case AArch64::STRBBpost:
+    case AArch64::STRBBpre:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(1);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    // Unscaled
+    case AArch64::LDURQi:
+    case AArch64::STURQi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(16);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDURXi:
+    case AArch64::LDURDi:
+    case AArch64::LDAPURXi:
+    case AArch64::STURXi:
+    case AArch64::STURDi:
+    case AArch64::STLURXi:
+    case AArch64::PRFUMi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(8);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDURWi:
+    case AArch64::LDURSi:
+    case AArch64::LDURSWi:
+    case AArch64::LDAPURi:
+    case AArch64::LDAPURSWi:
+    case AArch64::STURWi:
+    case AArch64::STURSi:
+    case AArch64::STLURWi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(4);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDURHi:
+    case AArch64::LDURHHi:
+    case AArch64::LDURSHXi:
+    case AArch64::LDURSHWi:
+    case AArch64::LDAPURHi:
+    case AArch64::LDAPURSHWi:
+    case AArch64::LDAPURSHXi:
+    case AArch64::STURHi:
+    case AArch64::STURHHi:
+    case AArch64::STLURHi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(2);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDURBi:
+    case AArch64::LDURBBi:
+    case AArch64::LDURSBXi:
+    case AArch64::LDURSBWi:
+    case AArch64::LDAPURBi:
+    case AArch64::LDAPURSBWi:
+    case AArch64::LDAPURSBXi:
+    case AArch64::STURBi:
+    case AArch64::STURBBi:
+    case AArch64::STLURBi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(1);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    // LDP / STP (including pre/post inc)
+    case AArch64::LDPQi:
+    case AArch64::LDNPQi:
+    case AArch64::STPQi:
+    case AArch64::STNPQi:
+    case AArch64::LDPQpost:
+    case AArch64::LDPQpre:
+    case AArch64::STPQpost:
+    case AArch64::STPQpre:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(16 * 2);
+      MinOffset = -64;
+      MaxOffset = 63;
+      break;
+    case AArch64::LDPXi:
+    case AArch64::LDPDi:
+    case AArch64::LDNPXi:
+    case AArch64::LDNPDi:
+    case AArch64::STPXi:
+    case AArch64::STPDi:
+    case AArch64::STNPXi:
+    case AArch64::STNPDi:
+    case AArch64::LDPDpost:
+    case AArch64::LDPDpre:
+    case AArch64::LDPXpost:
+    case AArch64::LDPXpre:
+    case AArch64::STPDpost:
+    case AArch64::STPDpre:
+    case AArch64::STPXpost:
+    case AArch64::STPXpre:
+      Scale = TypeSize::getFixed(8);
+      Width = TypeSize::getFixed(8 * 2);
+      MinOffset = -64;
+      MaxOffset = 63;
+      break;
+    case AArch64::LDPWi:
+    case AArch64::LDPSi:
+    case AArch64::LDNPWi:
+    case AArch64::LDNPSi:
+    case AArch64::STPWi:
+    case AArch64::STPSi:
+    case AArch64::STNPWi:
+    case AArch64::STNPSi:
+    case AArch64::LDPSpost:
+    case AArch64::LDPSpre:
+    case AArch64::LDPWpost:
+    case AArch64::LDPWpre:
+    case AArch64::STPSpost:
+    case AArch64::STPSpre:
+    case AArch64::STPWpost:
+    case AArch64::STPWpre:
+      Scale = TypeSize::getFixed(4);
+      Width = TypeSize::getFixed(4 * 2);
+      MinOffset = -64;
+      MaxOffset = 63;
+      break;
+    case AArch64::StoreSwiftAsyncContext:
+      // Store is an STRXui, but there might be an ADDXri in the expansion too.
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(8);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::ADDG:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(0);
+      MinOffset = 0;
+      MaxOffset = 63;
+      break;
+    case AArch64::TAGPstack:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(0);
+      // TAGP with a negative offset turns into SUBP, which has a maximum offset
+      // of 63 (not 64!).
+      MinOffset = -63;
+      MaxOffset = 63;
+      break;
+    case AArch64::LDG:
+    case AArch64::STGi:
+    case AArch64::STGPreIndex:
+    case AArch64::STGPostIndex:
+    case AArch64::STZGi:
+    case AArch64::STZGPreIndex:
+    case AArch64::STZGPostIndex:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(16);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    // SVE
+    case AArch64::STR_ZZZZXI:
+    case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::LDR_ZZZZXI:
+    case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16 * 4);
       MinOffset = -256;
       MaxOffset = 252;
       break;
-    case 8:
-      MinOffset = -512;
-      MaxOffset = 504;
+    case AArch64::STR_ZZZXI:
+    case AArch64::LDR_ZZZXI:
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16 * 3);
+      MinOffset = -256;
+      MaxOffset = 253;
+      break;
+    case AArch64::STR_ZZXI:
+    case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::LDR_ZZXI:
+    case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16 * 2);
+      MinOffset = -256;
+      MaxOffset = 254;
+      break;
+    case AArch64::LDR_PXI:
+    case AArch64::STR_PXI:
+      Scale = TypeSize::getScalable(2);
+      Width = TypeSize::getScalable(2);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDR_PPXI:
+    case AArch64::STR_PPXI:
+      Scale = TypeSize::getScalable(2);
+      Width = TypeSize::getScalable(2 * 2);
+      MinOffset = -256;
+      MaxOffset = 254;
+      break;
+    case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+    case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+    case AArch64::LDR_ZXI:
+    case AArch64::STR_ZXI:
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LD1B_IMM:
+    case AArch64::LD1H_IMM:
+    case AArch64::LD1W_IMM:
+    case AArch64::LD1D_IMM:
+    case AArch64::LDNT1B_ZRI:
+    case AArch64::LDNT1H_ZRI:
+    case AArch64::LDNT1W_ZRI:
+    case AArch64::LDNT1D_ZRI:
+    case AArch64::ST1B_IMM:
+    case AArch64::ST1H_IMM:
+    case AArch64::ST1W_IMM:
+    case AArch64::ST1D_IMM:
+    case AArch64::STNT1B_ZRI:
+    case AArch64::STNT1H_ZRI:
+    case AArch64::STNT1W_ZRI:
+    case AArch64::STNT1D_ZRI:
+    case AArch64::LDNF1B_IMM:
+    case AArch64::LDNF1H_IMM:
+    case AArch64::LDNF1W_IMM:
+    case AArch64::LDNF1D_IMM:
+      // A full vectors worth of data
+      // Width = mbytes * elements
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD2B_IMM:
+    case AArch64::LD2H_IMM:
+    case AArch64::LD2W_IMM:
+    case AArch64::LD2D_IMM:
+    case AArch64::ST2B_IMM:
+    case AArch64::ST2H_IMM:
+    case AArch64::ST2W_IMM:
+    case AArch64::ST2D_IMM:
+      Scale = TypeSize::getScalable(32);
+      Width = TypeSize::getScalable(16 * 2);
+      MinOffset = -8;
+      MaxOffset = 7;
       break;
-    case 16:
-      MinOffset = -1024;
-      MaxOffset = 1008;
+    case AArch64::LD3B_IMM:
+    case AArch64::LD3H_IMM:
+    case AArch64::LD3W_IMM:
+    case AArch64::LD3D_IMM:
+    case AArch64::ST3B_IMM:
+    case AArch64::ST3H_IMM:
+    case AArch64::ST3W_IMM:
+    case AArch64::ST3D_IMM:
+      Scale = TypeSize::getScalable(48);
+      Width = TypeSize::getScalable(16 * 3);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD4B_IMM:
+    case AArch64::LD4H_IMM:
+    case AArch64::LD4W_IMM:
+    case AArch64::LD4D_IMM:
+    case AArch64::ST4B_IMM:
+    case AArch64::ST4H_IMM:
+    case AArch64::ST4W_IMM:
+    case AArch64::ST4D_IMM:
+      Scale = TypeSize::getScalable(64);
+      Width = TypeSize::getScalable(16 * 4);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD1B_H_IMM:
+    case AArch64::LD1SB_H_IMM:
+    case AArch64::LD1H_S_IMM:
+    case AArch64::LD1SH_S_IMM:
+    case AArch64::LD1W_D_IMM:
+    case AArch64::LD1SW_D_IMM:
+    case AArch64::ST1B_H_IMM:
+    case AArch64::ST1H_S_IMM:
+    case AArch64::ST1W_D_IMM:
+    case AArch64::LDNF1B_H_IMM:
+    case AArch64::LDNF1SB_H_IMM:
+    case AArch64::LDNF1H_S_IMM:
+    case AArch64::LDNF1SH_S_IMM:
+    case AArch64::LDNF1W_D_IMM:
+    case AArch64::LDNF1SW_D_IMM:
+      // A half vector worth of data
+      // Width = mbytes * elements
+      Scale = TypeSize::getScalable(8);
+      Width = TypeSize::getScalable(8);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD1B_S_IMM:
+    case AArch64::LD1SB_S_IMM:
+    case AArch64::LD1H_D_IMM:
+    case AArch64::LD1SH_D_IMM:
+    case AArch64::ST1B_S_IMM:
+    case AArch64::ST1H_D_IMM:
+    case AArch64::LDNF1B_S_IMM:
+    case AArch64::LDNF1SB_S_IMM:
+    case AArch64::LDNF1H_D_IMM:
+    case AArch64::LDNF1SH_D_IMM:
+      // A quarter vector worth of data
+      // Width = mbytes * elements
+      Scale = TypeSize::getScalable(4);
+      Width = TypeSize::getScalable(4);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD1B_D_IMM:
+    case AArch64::LD1SB_D_IMM:
+    case AArch64::ST1B_D_IMM:
+    case AArch64::LDNF1B_D_IMM:
+    case AArch64::LDNF1SB_D_IMM:
+      // A eighth vector worth of data
+      // Width = mbytes * elements
+      Scale = TypeSize::getScalable(2);
+      Width = TypeSize::getScalable(2);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::ST2Gi:
+    case AArch64::ST2GPreIndex:
+    case AArch64::ST2GPostIndex:
+    case AArch64::STZ2Gi:
+    case AArch64::STZ2GPreIndex:
+    case AArch64::STZ2GPostIndex:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(32);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::STGPi:
+    case AArch64::STGPpost:
+    case AArch64::STGPpre:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(16);
+      MinOffset = -64;
+      MaxOffset = 63;
+      break;
+    case AArch64::LD1RB_IMM:
+    case AArch64::LD1RB_H_IMM:
+    case AArch64::LD1RB_S_IMM:
+    case AArch64::LD1RB_D_IMM:
+    case AArch64::LD1RSB_H_IMM:
+    case AArch64::LD1RSB_S_IMM:
+    case AArch64::LD1RSB_D_IMM:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(1);
+      MinOffset = 0;
+      MaxOffset = 63;
+      break;
+    case AArch64::LD1RH_IMM:
+    case AArch64::LD1RH_S_IMM:
+    case AArch64::LD1RH_D_IMM:
+    case AArch64::LD1RSH_S_IMM:
+    case AArch64::LD1RSH_D_IMM:
+      Scale = TypeSize::getFixed(2);
+      Width = TypeSize::getFixed(2);
+      MinOffset = 0;
+      MaxOffset = 63;
+      break;
+    case AArch64::LD1RW_IMM:
+    case AArch64::LD1RW_D_IMM:
+    case AArch64::LD1RSW_IMM:
+      Scale = TypeSize::getFixed(4);
+      Width = TypeSize::getFixed(4);
+      MinOffset = 0;
+      MaxOffset = 63;
+      break;
+    case AArch64::LD1RD_IMM:
+      Scale = TypeSize::getFixed(8);
+      Width = TypeSize::getFixed(8);
+      MinOffset = 0;
+      MaxOffset = 63;
       break;
     }
-    return OldOffset < MinOffset || OldOffset > MaxOffset ||
-           (NewOffset >= MinOffset && NewOffset <= MaxOffset);
-  };
-  auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
-    int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
-    int64_t NewOffset = OldOffset + Disp;
-    if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
-      return false;
-    // If the old offset would fit into an LDP, but the new offset wouldn't,
-    // bail out.
-    if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
-      return false;
-    AM.BaseReg = AddrI.getOperand(1).getReg();
-    AM.ScaledReg = 0;
-    AM.Scale = 0;
-    AM.Displacement = NewOffset;
-    AM.Form = ExtAddrMode::Formula::Basic;
-    return true;
-  };
-
-  auto canFoldAddRegIntoAddrMode =
-      [&](int64_t Scale,
-          ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
-    if (MemI.getOperand(2).getImm() != 0)
-      return false;
-    if ((unsigned)Scale != Scale)
-      return false;
-    if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
-      return false;
-    AM.BaseReg = AddrI.getOperand(1).getReg();
-    AM.ScaledReg = AddrI.getOperand(2).getReg();
-    AM.Scale = Scale;
-    AM.Displacement = 0;
-    AM.Form = Form;
-    return true;
-  };
-
-  auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
-    unsigned Opcode = MemI.getOpcode();
-    return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
-           Subtarget.isSTRQroSlow();
-  };
 
-  int64_t Disp = 0;
-  const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
-  switch (AddrI.getOpcode()) {
-  default:
-    return false;
+    return true;
+  }
 
-  case AArch64::ADDXri:
-    // add Xa, Xn, #N
-    // ldr Xd, [Xa, #M]
-    // ->
-    // ldr Xd, [Xn, #N'+M]
-    Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
-    return canFoldAddSubImmIntoAddrMode(Disp);
+  // Scaling factor for unscaled load or store.
+  int AArch64InstrInfo::getMemScale(unsigned Opc) {
+    switch (Opc) {
+    default:
+      llvm_unreachable("Opcode has unknown scale!");
+    case AArch64::LDRBBui:
+    case AArch64::LDURBBi:
+    case AArch64::LDRSBWui:
+    case AArch64::LDURSBWi:
+    case AArch64::STRBBui:
+    case AArch64::STURBBi:
+      return 1;
+    case AArch64::LDRHHui:
+    case AArch64::LDURHHi:
+    case AArch64::LDRSHWui:
+    case AArch64::LDURSHWi:
+    case AArch64::STRHHui:
+    case AArch64::STURHHi:
+      return 2;
+    case AArch64::LDRSui:
+    case AArch64::LDURSi:
+    case AArch64::LDRSpre:
+    case AArch64::LDRSWui:
+    case AArch64::LDURSWi:
+    case AArch64::LDRSWpre:
+    case AArch64::LDRWpre:
+    case AArch64::LDRWui:
+    case AArch64::LDURWi:
+    case AArch64::STRSui:
+    case AArch64::STURSi:
+    case AArch64::STRSpre:
+    case AArch64::STRWui:
+    case AArch64::STURWi:
+    case AArch64::STRWpre:
+    case AArch64::LDPSi:
+    case AArch64::LDPSWi:
+    case AArch64::LDPWi:
+    case AArch64::STPSi:
+    case AArch64::STPWi:
+      return 4;
+    case AArch64::LDRDui:
+    case AArch64::LDURDi:
+    case AArch64::LDRDpre:
+    case AArch64::LDRXui:
+    case AArch64::LDURXi:
+    case AArch64::LDRXpre:
+    case AArch64::STRDui:
+    case AArch64::STURDi:
+    case AArch64::STRDpre:
+    case AArch64::STRXui:
+    case AArch64::STURXi:
+    case AArch64::STRXpre:
+    case AArch64::LDPDi:
+    case AArch64::LDPXi:
+    case AArch64::STPDi:
+    case AArch64::STPXi:
+      return 8;
+    case AArch64::LDRQui:
+    case AArch64::LDURQi:
+    case AArch64::STRQui:
+    case AArch64::STURQi:
+    case AArch64::STRQpre:
+    case AArch64::LDPQi:
+    case AArch64::LDRQpre:
+    case AArch64::STPQi:
+    case AArch64::STGi:
+    case AArch64::STZGi:
+    case AArch64::ST2Gi:
+    case AArch64::STZ2Gi:
+    case AArch64::STGPi:
+      return 16;
+    }
+  }
 
-  case AArch64::SUBXri:
-    // sub Xa, Xn, #N
-    // ldr Xd, [Xa, #M]
-    // ->
-    // ldr Xd, [Xn, #N'+M]
-    Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
-    return canFoldAddSubImmIntoAddrMode(-Disp);
-
-  case AArch64::ADDXrs: {
-    // add Xa, Xn, Xm, lsl #N
-    // ldr Xd, [Xa]
-    // ->
-    // ldr Xd, [Xn, Xm, lsl #N]
-
-    // Don't fold the add if the result would be slower, unless optimising for
-    // size.
-    unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
-    if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
+  bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
       return false;
-    Shift = AArch64_AM::getShiftValue(Shift);
-    if (!OptSize) {
-      if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
-        return false;
-      if (avoidSlowSTRQ(MemI))
-        return false;
+    case AArch64::LDRWpre:
+    case AArch64::LDRXpre:
+    case AArch64::LDRSWpre:
+    case AArch64::LDRSpre:
+    case AArch64::LDRDpre:
+    case AArch64::LDRQpre:
+      return true;
     }
-    return canFoldAddRegIntoAddrMode(1ULL << Shift);
   }
 
-  case AArch64::ADDXrr:
-    // add Xa, Xn, Xm
-    // ldr Xd, [Xa]
-    // ->
-    // ldr Xd, [Xn, Xm, lsl #0]
-
-    // Don't fold the add if the result would be slower, unless optimising for
-    // size.
-    if (!OptSize && avoidSlowSTRQ(MemI))
+  bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
       return false;
-    return canFoldAddRegIntoAddrMode(1);
+    case AArch64::STRWpre:
+    case AArch64::STRXpre:
+    case AArch64::STRSpre:
+    case AArch64::STRDpre:
+    case AArch64::STRQpre:
+      return true;
+    }
+  }
 
-  case AArch64::ADDXrx:
-    // add Xa, Xn, Wm, {s,u}xtw #N
-    // ldr Xd, [Xa]
-    // ->
-    // ldr Xd, [Xn, Wm, {s,u}xtw #N]
-
-    // Don't fold the add if the result would be slower, unless optimising for
-    // size.
-    if (!OptSize && avoidSlowSTRQ(MemI))
-      return false;
+  bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
+    return isPreLd(MI) || isPreSt(MI);
+  }
 
-    // Can fold only sign-/zero-extend of a word.
-    unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
-    AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
-    if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
+  bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
       return false;
-
-    return canFoldAddRegIntoAddrMode(
-        1ULL << AArch64_AM::getArithShiftValue(Imm),
-        (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
-                                     : ExtAddrMode::Formula::ZExtScaledReg);
+    case AArch64::LDPSi:
+    case AArch64::LDPSWi:
+    case AArch64::LDPDi:
+    case AArch64::LDPQi:
+    case AArch64::LDPWi:
+    case AArch64::LDPXi:
+    case AArch64::STPSi:
+    case AArch64::STPDi:
+    case AArch64::STPQi:
+    case AArch64::STPWi:
+    case AArch64::STPXi:
+    case AArch64::STGPi:
+      return true;
+    }
   }
-}
-
-// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
-// return the opcode of an instruction performing the same operation, but using
-// the [Reg, Reg] addressing mode.
-static unsigned regOffsetOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Address folding not implemented for instruction");
-
-  case AArch64::LDURQi:
-  case AArch64::LDRQui:
-    return AArch64::LDRQroX;
-  case AArch64::STURQi:
-  case AArch64::STRQui:
-    return AArch64::STRQroX;
-  case AArch64::LDURDi:
-  case AArch64::LDRDui:
-    return AArch64::LDRDroX;
-  case AArch64::STURDi:
-  case AArch64::STRDui:
-    return AArch64::STRDroX;
-  case AArch64::LDURXi:
-  case AArch64::LDRXui:
-    return AArch64::LDRXroX;
-  case AArch64::STURXi:
-  case AArch64::STRXui:
-    return AArch64::STRXroX;
-  case AArch64::LDURWi:
-  case AArch64::LDRWui:
-    return AArch64::LDRWroX;
-  case AArch64::LDURSWi:
-  case AArch64::LDRSWui:
-    return AArch64::LDRSWroX;
-  case AArch64::STURWi:
-  case AArch64::STRWui:
-    return AArch64::STRWroX;
-  case AArch64::LDURHi:
-  case AArch64::LDRHui:
-    return AArch64::LDRHroX;
-  case AArch64::STURHi:
-  case AArch64::STRHui:
-    return AArch64::STRHroX;
-  case AArch64::LDURHHi:
-  case AArch64::LDRHHui:
-    return AArch64::LDRHHroX;
-  case AArch64::STURHHi:
-  case AArch64::STRHHui:
-    return AArch64::STRHHroX;
-  case AArch64::LDURSHXi:
-  case AArch64::LDRSHXui:
-    return AArch64::LDRSHXroX;
-  case AArch64::LDURSHWi:
-  case AArch64::LDRSHWui:
-    return AArch64::LDRSHWroX;
-  case AArch64::LDURBi:
-  case AArch64::LDRBui:
-    return AArch64::LDRBroX;
-  case AArch64::LDURBBi:
-  case AArch64::LDRBBui:
-    return AArch64::LDRBBroX;
-  case AArch64::LDURSBXi:
-  case AArch64::LDRSBXui:
-    return AArch64::LDRSBXroX;
-  case AArch64::LDURSBWi:
-  case AArch64::LDRSBWui:
-    return AArch64::LDRSBWroX;
-  case AArch64::STURBi:
-  case AArch64::STRBui:
-    return AArch64::STRBroX;
-  case AArch64::STURBBi:
-  case AArch64::STRBBui:
-    return AArch64::STRBBroX;
-  }
-}
-
-// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
-// the opcode of an instruction performing the same operation, but using the
-// [Reg, #Imm] addressing mode with scaled offset.
-unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Address folding not implemented for instruction");
-
-  case AArch64::LDURQi:
-    Scale = 16;
-    return AArch64::LDRQui;
-  case AArch64::STURQi:
-    Scale = 16;
-    return AArch64::STRQui;
-  case AArch64::LDURDi:
-    Scale = 8;
-    return AArch64::LDRDui;
-  case AArch64::STURDi:
-    Scale = 8;
-    return AArch64::STRDui;
-  case AArch64::LDURXi:
-    Scale = 8;
-    return AArch64::LDRXui;
-  case AArch64::STURXi:
-    Scale = 8;
-    return AArch64::STRXui;
-  case AArch64::LDURWi:
-    Scale = 4;
-    return AArch64::LDRWui;
-  case AArch64::LDURSWi:
-    Scale = 4;
-    return AArch64::LDRSWui;
-  case AArch64::STURWi:
-    Scale = 4;
-    return AArch64::STRWui;
-  case AArch64::LDURHi:
-    Scale = 2;
-    return AArch64::LDRHui;
-  case AArch64::STURHi:
-    Scale = 2;
-    return AArch64::STRHui;
-  case AArch64::LDURHHi:
-    Scale = 2;
-    return AArch64::LDRHHui;
-  case AArch64::STURHHi:
-    Scale = 2;
-    return AArch64::STRHHui;
-  case AArch64::LDURSHXi:
-    Scale = 2;
-    return AArch64::LDRSHXui;
-  case AArch64::LDURSHWi:
-    Scale = 2;
-    return AArch64::LDRSHWui;
-  case AArch64::LDURBi:
-    Scale = 1;
-    return AArch64::LDRBui;
-  case AArch64::LDURBBi:
-    Scale = 1;
-    return AArch64::LDRBBui;
-  case AArch64::LDURSBXi:
-    Scale = 1;
-    return AArch64::LDRSBXui;
-  case AArch64::LDURSBWi:
-    Scale = 1;
-    return AArch64::LDRSBWui;
-  case AArch64::STURBi:
-    Scale = 1;
-    return AArch64::STRBui;
-  case AArch64::STURBBi:
-    Scale = 1;
-    return AArch64::STRBBui;
-  case AArch64::LDRQui:
-  case AArch64::STRQui:
-    Scale = 16;
-    return Opcode;
-  case AArch64::LDRDui:
-  case AArch64::STRDui:
-  case AArch64::LDRXui:
-  case AArch64::STRXui:
-    Scale = 8;
-    return Opcode;
-  case AArch64::LDRWui:
-  case AArch64::LDRSWui:
-  case AArch64::STRWui:
-    Scale = 4;
-    return Opcode;
-  case AArch64::LDRHui:
-  case AArch64::STRHui:
-  case AArch64::LDRHHui:
-  case AArch64::STRHHui:
-  case AArch64::LDRSHXui:
-  case AArch64::LDRSHWui:
-    Scale = 2;
-    return Opcode;
-  case AArch64::LDRBui:
-  case AArch64::LDRBBui:
-  case AArch64::LDRSBXui:
-  case AArch64::LDRSBWui:
-  case AArch64::STRBui:
-  case AArch64::STRBBui:
-    Scale = 1;
-    return Opcode;
-  }
-}
-
-// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
-// the opcode of an instruction performing the same operation, but using the
-// [Reg, #Imm] addressing mode with unscaled offset.
-unsigned unscaledOffsetOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Address folding not implemented for instruction");
-
-  case AArch64::LDURQi:
-  case AArch64::STURQi:
-  case AArch64::LDURDi:
-  case AArch64::STURDi:
-  case AArch64::LDURXi:
-  case AArch64::STURXi:
-  case AArch64::LDURWi:
-  case AArch64::LDURSWi:
-  case AArch64::STURWi:
-  case AArch64::LDURHi:
-  case AArch64::STURHi:
-  case AArch64::LDURHHi:
-  case AArch64::STURHHi:
-  case AArch64::LDURSHXi:
-  case AArch64::LDURSHWi:
-  case AArch64::LDURBi:
-  case AArch64::STURBi:
-  case AArch64::LDURBBi:
-  case AArch64::STURBBi:
-  case AArch64::LDURSBWi:
-  case AArch64::LDURSBXi:
-    return Opcode;
-  case AArch64::LDRQui:
-    return AArch64::LDURQi;
-  case AArch64::STRQui:
-    return AArch64::STURQi;
-  case AArch64::LDRDui:
-    return AArch64::LDURDi;
-  case AArch64::STRDui:
-    return AArch64::STURDi;
-  case AArch64::LDRXui:
-    return AArch64::LDURXi;
-  case AArch64::STRXui:
-    return AArch64::STURXi;
-  case AArch64::LDRWui:
-    return AArch64::LDURWi;
-  case AArch64::LDRSWui:
-    return AArch64::LDURSWi;
-  case AArch64::STRWui:
-    return AArch64::STURWi;
-  case AArch64::LDRHui:
-    return AArch64::LDURHi;
-  case AArch64::STRHui:
-    return AArch64::STURHi;
-  case AArch64::LDRHHui:
-    return AArch64::LDURHHi;
-  case AArch64::STRHHui:
-    return AArch64::STURHHi;
-  case AArch64::LDRSHXui:
-    return AArch64::LDURSHXi;
-  case AArch64::LDRSHWui:
-    return AArch64::LDURSHWi;
-  case AArch64::LDRBBui:
-    return AArch64::LDURBBi;
-  case AArch64::LDRBui:
-    return AArch64::LDURBi;
-  case AArch64::STRBBui:
-    return AArch64::STURBBi;
-  case AArch64::STRBui:
-    return AArch64::STURBi;
-  case AArch64::LDRSBWui:
-    return AArch64::LDURSBWi;
-  case AArch64::LDRSBXui:
-    return AArch64::LDURSBXi;
-  }
-}
-
-// Given the opcode of a memory load/store instruction, return the opcode of an
-// instruction performing the same operation, but using
-// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
-// offset register.
-static unsigned offsetExtendOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Address folding not implemented for instruction");
 
-  case AArch64::LDRQroX:
-  case AArch64::LDURQi:
-  case AArch64::LDRQui:
-    return AArch64::LDRQroW;
-  case AArch64::STRQroX:
-  case AArch64::STURQi:
-  case AArch64::STRQui:
-    return AArch64::STRQroW;
-  case AArch64::LDRDroX:
-  case AArch64::LDURDi:
-  case AArch64::LDRDui:
-    return AArch64::LDRDroW;
-  case AArch64::STRDroX:
-  case AArch64::STURDi:
-  case AArch64::STRDui:
-    return AArch64::STRDroW;
-  case AArch64::LDRXroX:
-  case AArch64::LDURXi:
-  case AArch64::LDRXui:
-    return AArch64::LDRXroW;
-  case AArch64::STRXroX:
-  case AArch64::STURXi:
-  case AArch64::STRXui:
-    return AArch64::STRXroW;
-  case AArch64::LDRWroX:
-  case AArch64::LDURWi:
-  case AArch64::LDRWui:
-    return AArch64::LDRWroW;
-  case AArch64::LDRSWroX:
-  case AArch64::LDURSWi:
-  case AArch64::LDRSWui:
-    return AArch64::LDRSWroW;
-  case AArch64::STRWroX:
-  case AArch64::STURWi:
-  case AArch64::STRWui:
-    return AArch64::STRWroW;
-  case AArch64::LDRHroX:
-  case AArch64::LDURHi:
-  case AArch64::LDRHui:
-    return AArch64::LDRHroW;
-  case AArch64::STRHroX:
-  case AArch64::STURHi:
-  case AArch64::STRHui:
-    return AArch64::STRHroW;
-  case AArch64::LDRHHroX:
-  case AArch64::LDURHHi:
-  case AArch64::LDRHHui:
-    return AArch64::LDRHHroW;
-  case AArch64::STRHHroX:
-  case AArch64::STURHHi:
-  case AArch64::STRHHui:
-    return AArch64::STRHHroW;
-  case AArch64::LDRSHXroX:
-  case AArch64::LDURSHXi:
-  case AArch64::LDRSHXui:
-    return AArch64::LDRSHXroW;
-  case AArch64::LDRSHWroX:
-  case AArch64::LDURSHWi:
-  case AArch64::LDRSHWui:
-    return AArch64::LDRSHWroW;
-  case AArch64::LDRBroX:
-  case AArch64::LDURBi:
-  case AArch64::LDRBui:
-    return AArch64::LDRBroW;
-  case AArch64::LDRBBroX:
-  case AArch64::LDURBBi:
-  case AArch64::LDRBBui:
-    return AArch64::LDRBBroW;
-  case AArch64::LDRSBXroX:
-  case AArch64::LDURSBXi:
-  case AArch64::LDRSBXui:
-    return AArch64::LDRSBXroW;
-  case AArch64::LDRSBWroX:
-  case AArch64::LDURSBWi:
-  case AArch64::LDRSBWui:
-    return AArch64::LDRSBWroW;
-  case AArch64::STRBroX:
-  case AArch64::STURBi:
-  case AArch64::STRBui:
-    return AArch64::STRBroW;
-  case AArch64::STRBBroX:
-  case AArch64::STURBBi:
-  case AArch64::STRBBui:
-    return AArch64::STRBBroW;
+  const MachineOperand &AArch64InstrInfo::getLdStBaseOp(
+      const MachineInstr &MI) {
+    assert(MI.mayLoadOrStore() && "Load or store instruction expected");
+    unsigned Idx =
+        AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI)
+            ? 2
+            : 1;
+    return MI.getOperand(Idx);
   }
-}
-
-MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
-                                                 const ExtAddrMode &AM) const {
 
-  const DebugLoc &DL = MemI.getDebugLoc();
-  MachineBasicBlock &MBB = *MemI.getParent();
-  MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
+  const MachineOperand &AArch64InstrInfo::getLdStOffsetOp(
+      const MachineInstr &MI) {
+    assert(MI.mayLoadOrStore() && "Load or store instruction expected");
+    unsigned Idx =
+        AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI)
+            ? 3
+            : 2;
+    return MI.getOperand(Idx);
+  }
 
-  if (AM.Form == ExtAddrMode::Formula::Basic) {
-    if (AM.ScaledReg) {
-      // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
-      unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
-      MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
-      auto B = BuildMI(MBB, MemI, DL, get(Opcode))
-                   .addReg(MemI.getOperand(0).getReg(),
-                           MemI.mayLoad() ? RegState::Define : 0)
-                   .addReg(AM.BaseReg)
-                   .addReg(AM.ScaledReg)
-                   .addImm(0)
-                   .addImm(AM.Scale > 1)
-                   .setMemRefs(MemI.memoperands())
-                   .setMIFlags(MemI.getFlags());
-      return B.getInstr();
+  const MachineOperand &AArch64InstrInfo::getLdStAmountOp(
+      const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      llvm_unreachable("Unexpected opcode");
+    case AArch64::LDRBroX:
+    case AArch64::LDRBBroX:
+    case AArch64::LDRSBXroX:
+    case AArch64::LDRSBWroX:
+    case AArch64::LDRHroX:
+    case AArch64::LDRHHroX:
+    case AArch64::LDRSHXroX:
+    case AArch64::LDRSHWroX:
+    case AArch64::LDRWroX:
+    case AArch64::LDRSroX:
+    case AArch64::LDRSWroX:
+    case AArch64::LDRDroX:
+    case AArch64::LDRXroX:
+    case AArch64::LDRQroX:
+      return MI.getOperand(4);
     }
-
-    assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
-           "Addressing mode not supported for folding");
-
-    // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
-    unsigned Scale = 1;
-    unsigned Opcode = MemI.getOpcode();
-    if (isInt<9>(AM.Displacement))
-      Opcode = unscaledOffsetOpcode(Opcode);
-    else
-      Opcode = scaledOffsetOpcode(Opcode, Scale);
-
-    auto B = BuildMI(MBB, MemI, DL, get(Opcode))
-                 .addReg(MemI.getOperand(0).getReg(),
-                         MemI.mayLoad() ? RegState::Define : 0)
-                 .addReg(AM.BaseReg)
-                 .addImm(AM.Displacement / Scale)
-                 .setMemRefs(MemI.memoperands())
-                 .setMIFlags(MemI.getFlags());
-    return B.getInstr();
-  }
-
-  if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||
-      AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
-    // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
-    assert(AM.ScaledReg && !AM.Displacement &&
-           "Address offset can be a register or an immediate, but not both");
-    unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
-    MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
-    // Make sure the offset register is in the correct register class.
-    Register OffsetReg = AM.ScaledReg;
-    const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
-    if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
-      OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-      BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
-          .addReg(AM.ScaledReg, 0, AArch64::sub_32);
-    }
-    auto B = BuildMI(MBB, MemI, DL, get(Opcode))
-                 .addReg(MemI.getOperand(0).getReg(),
-                         MemI.mayLoad() ? RegState::Define : 0)
-                 .addReg(AM.BaseReg)
-                 .addReg(OffsetReg)
-                 .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)
-                 .addImm(AM.Scale != 1)
-                 .setMemRefs(MemI.memoperands())
-                 .setMIFlags(MemI.getFlags());
-
-    return B.getInstr();
-  }
-
-  llvm_unreachable(
-      "Function must not be called with an addressing mode it can't handle");
-}
-
-/// Return true if the opcode is a post-index ld/st instruction, which really
-/// loads from base+0.
-static bool isPostIndexLdStOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    return false;
-  case AArch64::LD1Fourv16b_POST:
-  case AArch64::LD1Fourv1d_POST:
-  case AArch64::LD1Fourv2d_POST:
-  case AArch64::LD1Fourv2s_POST:
-  case AArch64::LD1Fourv4h_POST:
-  case AArch64::LD1Fourv4s_POST:
-  case AArch64::LD1Fourv8b_POST:
-  case AArch64::LD1Fourv8h_POST:
-  case AArch64::LD1Onev16b_POST:
-  case AArch64::LD1Onev1d_POST:
-  case AArch64::LD1Onev2d_POST:
-  case AArch64::LD1Onev2s_POST:
-  case AArch64::LD1Onev4h_POST:
-  case AArch64::LD1Onev4s_POST:
-  case AArch64::LD1Onev8b_POST:
-  case AArch64::LD1Onev8h_POST:
-  case AArch64::LD1Rv16b_POST:
-  case AArch64::LD1Rv1d_POST:
-  case AArch64::LD1Rv2d_POST:
-  case AArch64::LD1Rv2s_POST:
-  case AArch64::LD1Rv4h_POST:
-  case AArch64::LD1Rv4s_POST:
-  case AArch64::LD1Rv8b_POST:
-  case AArch64::LD1Rv8h_POST:
-  case AArch64::LD1Threev16b_POST:
-  case AArch64::LD1Threev1d_POST:
-  case AArch64::LD1Threev2d_POST:
-  case AArch64::LD1Threev2s_POST:
-  case AArch64::LD1Threev4h_POST:
-  case AArch64::LD1Threev4s_POST:
-  case AArch64::LD1Threev8b_POST:
-  case AArch64::LD1Threev8h_POST:
-  case AArch64::LD1Twov16b_POST:
-  case AArch64::LD1Twov1d_POST:
-  case AArch64::LD1Twov2d_POST:
-  case AArch64::LD1Twov2s_POST:
-  case AArch64::LD1Twov4h_POST:
-  case AArch64::LD1Twov4s_POST:
-  case AArch64::LD1Twov8b_POST:
-  case AArch64::LD1Twov8h_POST:
-  case AArch64::LD1i16_POST:
-  case AArch64::LD1i32_POST:
-  case AArch64::LD1i64_POST:
-  case AArch64::LD1i8_POST:
-  case AArch64::LD2Rv16b_POST:
-  case AArch64::LD2Rv1d_POST:
-  case AArch64::LD2Rv2d_POST:
-  case AArch64::LD2Rv2s_POST:
-  case AArch64::LD2Rv4h_POST:
-  case AArch64::LD2Rv4s_POST:
-  case AArch64::LD2Rv8b_POST:
-  case AArch64::LD2Rv8h_POST:
-  case AArch64::LD2Twov16b_POST:
-  case AArch64::LD2Twov2d_POST:
-  case AArch64::LD2Twov2s_POST:
-  case AArch64::LD2Twov4h_POST:
-  case AArch64::LD2Twov4s_POST:
-  case AArch64::LD2Twov8b_POST:
-  case AArch64::LD2Twov8h_POST:
-  case AArch64::LD2i16_POST:
-  case AArch64::LD2i32_POST:
-  case AArch64::LD2i64_POST:
-  case AArch64::LD2i8_POST:
-  case AArch64::LD3Rv16b_POST:
-  case AArch64::LD3Rv1d_POST:
-  case AArch64::LD3Rv2d_POST:
-  case AArch64::LD3Rv2s_POST:
-  case AArch64::LD3Rv4h_POST:
-  case AArch64::LD3Rv4s_POST:
-  case AArch64::LD3Rv8b_POST:
-  case AArch64::LD3Rv8h_POST:
-  case AArch64::LD3Threev16b_POST:
-  case AArch64::LD3Threev2d_POST:
-  case AArch64::LD3Threev2s_POST:
-  case AArch64::LD3Threev4h_POST:
-  case AArch64::LD3Threev4s_POST:
-  case AArch64::LD3Threev8b_POST:
-  case AArch64::LD3Threev8h_POST:
-  case AArch64::LD3i16_POST:
-  case AArch64::LD3i32_POST:
-  case AArch64::LD3i64_POST:
-  case AArch64::LD3i8_POST:
-  case AArch64::LD4Fourv16b_POST:
-  case AArch64::LD4Fourv2d_POST:
-  case AArch64::LD4Fourv2s_POST:
-  case AArch64::LD4Fourv4h_POST:
-  case AArch64::LD4Fourv4s_POST:
-  case AArch64::LD4Fourv8b_POST:
-  case AArch64::LD4Fourv8h_POST:
-  case AArch64::LD4Rv16b_POST:
-  case AArch64::LD4Rv1d_POST:
-  case AArch64::LD4Rv2d_POST:
-  case AArch64::LD4Rv2s_POST:
-  case AArch64::LD4Rv4h_POST:
-  case AArch64::LD4Rv4s_POST:
-  case AArch64::LD4Rv8b_POST:
-  case AArch64::LD4Rv8h_POST:
-  case AArch64::LD4i16_POST:
-  case AArch64::LD4i32_POST:
-  case AArch64::LD4i64_POST:
-  case AArch64::LD4i8_POST:
-  case AArch64::LDAPRWpost:
-  case AArch64::LDAPRXpost:
-  case AArch64::LDIAPPWpost:
-  case AArch64::LDIAPPXpost:
-  case AArch64::LDPDpost:
-  case AArch64::LDPQpost:
-  case AArch64::LDPSWpost:
-  case AArch64::LDPSpost:
-  case AArch64::LDPWpost:
-  case AArch64::LDPXpost:
-  case AArch64::LDRBBpost:
-  case AArch64::LDRBpost:
-  case AArch64::LDRDpost:
-  case AArch64::LDRHHpost:
-  case AArch64::LDRHpost:
-  case AArch64::LDRQpost:
-  case AArch64::LDRSBWpost:
-  case AArch64::LDRSBXpost:
-  case AArch64::LDRSHWpost:
-  case AArch64::LDRSHXpost:
-  case AArch64::LDRSWpost:
-  case AArch64::LDRSpost:
-  case AArch64::LDRWpost:
-  case AArch64::LDRXpost:
-  case AArch64::ST1Fourv16b_POST:
-  case AArch64::ST1Fourv1d_POST:
-  case AArch64::ST1Fourv2d_POST:
-  case AArch64::ST1Fourv2s_POST:
-  case AArch64::ST1Fourv4h_POST:
-  case AArch64::ST1Fourv4s_POST:
-  case AArch64::ST1Fourv8b_POST:
-  case AArch64::ST1Fourv8h_POST:
-  case AArch64::ST1Onev16b_POST:
-  case AArch64::ST1Onev1d_POST:
-  case AArch64::ST1Onev2d_POST:
-  case AArch64::ST1Onev2s_POST:
-  case AArch64::ST1Onev4h_POST:
-  case AArch64::ST1Onev4s_POST:
-  case AArch64::ST1Onev8b_POST:
-  case AArch64::ST1Onev8h_POST:
-  case AArch64::ST1Threev16b_POST:
-  case AArch64::ST1Threev1d_POST:
-  case AArch64::ST1Threev2d_POST:
-  case AArch64::ST1Threev2s_POST:
-  case AArch64::ST1Threev4h_POST:
-  case AArch64::ST1Threev4s_POST:
-  case AArch64::ST1Threev8b_POST:
-  case AArch64::ST1Threev8h_POST:
-  case AArch64::ST1Twov16b_POST:
-  case AArch64::ST1Twov1d_POST:
-  case AArch64::ST1Twov2d_POST:
-  case AArch64::ST1Twov2s_POST:
-  case AArch64::ST1Twov4h_POST:
-  case AArch64::ST1Twov4s_POST:
-  case AArch64::ST1Twov8b_POST:
-  case AArch64::ST1Twov8h_POST:
-  case AArch64::ST1i16_POST:
-  case AArch64::ST1i32_POST:
-  case AArch64::ST1i64_POST:
-  case AArch64::ST1i8_POST:
-  case AArch64::ST2GPostIndex:
-  case AArch64::ST2Twov16b_POST:
-  case AArch64::ST2Twov2d_POST:
-  case AArch64::ST2Twov2s_POST:
-  case AArch64::ST2Twov4h_POST:
-  case AArch64::ST2Twov4s_POST:
-  case AArch64::ST2Twov8b_POST:
-  case AArch64::ST2Twov8h_POST:
-  case AArch64::ST2i16_POST:
-  case AArch64::ST2i32_POST:
-  case AArch64::ST2i64_POST:
-  case AArch64::ST2i8_POST:
-  case AArch64::ST3Threev16b_POST:
-  case AArch64::ST3Threev2d_POST:
-  case AArch64::ST3Threev2s_POST:
-  case AArch64::ST3Threev4h_POST:
-  case AArch64::ST3Threev4s_POST:
-  case AArch64::ST3Threev8b_POST:
-  case AArch64::ST3Threev8h_POST:
-  case AArch64::ST3i16_POST:
-  case AArch64::ST3i32_POST:
-  case AArch64::ST3i64_POST:
-  case AArch64::ST3i8_POST:
-  case AArch64::ST4Fourv16b_POST:
-  case AArch64::ST4Fourv2d_POST:
-  case AArch64::ST4Fourv2s_POST:
-  case AArch64::ST4Fourv4h_POST:
-  case AArch64::ST4Fourv4s_POST:
-  case AArch64::ST4Fourv8b_POST:
-  case AArch64::ST4Fourv8h_POST:
-  case AArch64::ST4i16_POST:
-  case AArch64::ST4i32_POST:
-  case AArch64::ST4i64_POST:
-  case AArch64::ST4i8_POST:
-  case AArch64::STGPostIndex:
-  case AArch64::STGPpost:
-  case AArch64::STPDpost:
-  case AArch64::STPQpost:
-  case AArch64::STPSpost:
-  case AArch64::STPWpost:
-  case AArch64::STPXpost:
-  case AArch64::STRBBpost:
-  case AArch64::STRBpost:
-  case AArch64::STRDpost:
-  case AArch64::STRHHpost:
-  case AArch64::STRHpost:
-  case AArch64::STRQpost:
-  case AArch64::STRSpost:
-  case AArch64::STRWpost:
-  case AArch64::STRXpost:
-  case AArch64::STZ2GPostIndex:
-  case AArch64::STZGPostIndex:
-    return true;
   }
-}
-
-bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
-    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
-    bool &OffsetIsScalable, TypeSize &Width,
-    const TargetRegisterInfo *TRI) const {
-  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
-  // Handle only loads/stores with base register followed by immediate offset.
-  if (LdSt.getNumExplicitOperands() == 3) {
-    // Non-paired instruction (e.g., ldr x1, [x0, #8]).
-    if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
-        !LdSt.getOperand(2).isImm())
-      return false;
-  } else if (LdSt.getNumExplicitOperands() == 4) {
-    // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
-    if (!LdSt.getOperand(1).isReg() ||
-        (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
-        !LdSt.getOperand(3).isImm())
-      return false;
-  } else
-    return false;
-
-  // Get the scaling factor for the instruction and set the width for the
-  // instruction.
-  TypeSize Scale(0U, false);
-  int64_t Dummy1, Dummy2;
 
-  // If this returns false, then it's an instruction we don't want to handle.
-  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
-    return false;
-
-  // Compute the offset. Offset is calculated as the immediate operand
-  // multiplied by the scaling factor. Unscaled instructions have scaling factor
-  // set to 1. Postindex are a special case which have an offset of 0.
-  if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
-    BaseOp = &LdSt.getOperand(2);
-    Offset = 0;
-  } else if (LdSt.getNumExplicitOperands() == 3) {
-    BaseOp = &LdSt.getOperand(1);
-    Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
-  } else {
-    assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
-    BaseOp = &LdSt.getOperand(2);
-    Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
+  static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
+                                                Register Reg) {
+    if (MI.getParent() == nullptr)
+      return nullptr;
+    const MachineFunction *MF = MI.getParent()->getParent();
+    return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
   }
-  OffsetIsScalable = Scale.isScalable();
-
-  return BaseOp->isReg() || BaseOp->isFI();
-}
-
-MachineOperand &
-AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
-  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
-  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
-  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
-  return OfsOp;
-}
 
-bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
-                                    TypeSize &Width, int64_t &MinOffset,
-                                    int64_t &MaxOffset) {
-  switch (Opcode) {
-  // Not a memory operation or something we want to handle.
-  default:
-    Scale = TypeSize::getFixed(0);
-    Width = TypeSize::getFixed(0);
-    MinOffset = MaxOffset = 0;
-    return false;
-  // LDR / STR
-  case AArch64::LDRQui:
-  case AArch64::STRQui:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(16);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::LDRXui:
-  case AArch64::LDRDui:
-  case AArch64::STRXui:
-  case AArch64::STRDui:
-  case AArch64::PRFMui:
-    Scale = TypeSize::getFixed(8);
-    Width = TypeSize::getFixed(8);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::LDRWui:
-  case AArch64::LDRSui:
-  case AArch64::LDRSWui:
-  case AArch64::STRWui:
-  case AArch64::STRSui:
-    Scale = TypeSize::getFixed(4);
-    Width = TypeSize::getFixed(4);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::LDRHui:
-  case AArch64::LDRHHui:
-  case AArch64::LDRSHWui:
-  case AArch64::LDRSHXui:
-  case AArch64::STRHui:
-  case AArch64::STRHHui:
-    Scale = TypeSize::getFixed(2);
-    Width = TypeSize::getFixed(2);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::LDRBui:
-  case AArch64::LDRBBui:
-  case AArch64::LDRSBWui:
-  case AArch64::LDRSBXui:
-  case AArch64::STRBui:
-  case AArch64::STRBBui:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(1);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  // post/pre inc
-  case AArch64::STRQpre:
-  case AArch64::LDRQpost:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(16);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDRDpost:
-  case AArch64::LDRDpre:
-  case AArch64::LDRXpost:
-  case AArch64::LDRXpre:
-  case AArch64::STRDpost:
-  case AArch64::STRDpre:
-  case AArch64::STRXpost:
-  case AArch64::STRXpre:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(8);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::STRWpost:
-  case AArch64::STRWpre:
-  case AArch64::LDRWpost:
-  case AArch64::LDRWpre:
-  case AArch64::STRSpost:
-  case AArch64::STRSpre:
-  case AArch64::LDRSpost:
-  case AArch64::LDRSpre:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(4);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDRHpost:
-  case AArch64::LDRHpre:
-  case AArch64::STRHpost:
-  case AArch64::STRHpre:
-  case AArch64::LDRHHpost:
-  case AArch64::LDRHHpre:
-  case AArch64::STRHHpost:
-  case AArch64::STRHHpre:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(2);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDRBpost:
-  case AArch64::LDRBpre:
-  case AArch64::STRBpost:
-  case AArch64::STRBpre:
-  case AArch64::LDRBBpost:
-  case AArch64::LDRBBpre:
-  case AArch64::STRBBpost:
-  case AArch64::STRBBpre:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(1);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  // Unscaled
-  case AArch64::LDURQi:
-  case AArch64::STURQi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(16);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDURXi:
-  case AArch64::LDURDi:
-  case AArch64::LDAPURXi:
-  case AArch64::STURXi:
-  case AArch64::STURDi:
-  case AArch64::STLURXi:
-  case AArch64::PRFUMi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(8);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDURWi:
-  case AArch64::LDURSi:
-  case AArch64::LDURSWi:
-  case AArch64::LDAPURi:
-  case AArch64::LDAPURSWi:
-  case AArch64::STURWi:
-  case AArch64::STURSi:
-  case AArch64::STLURWi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(4);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDURHi:
-  case AArch64::LDURHHi:
-  case AArch64::LDURSHXi:
-  case AArch64::LDURSHWi:
-  case AArch64::LDAPURHi:
-  case AArch64::LDAPURSHWi:
-  case AArch64::LDAPURSHXi:
-  case AArch64::STURHi:
-  case AArch64::STURHHi:
-  case AArch64::STLURHi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(2);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDURBi:
-  case AArch64::LDURBBi:
-  case AArch64::LDURSBXi:
-  case AArch64::LDURSBWi:
-  case AArch64::LDAPURBi:
-  case AArch64::LDAPURSBWi:
-  case AArch64::LDAPURSBXi:
-  case AArch64::STURBi:
-  case AArch64::STURBBi:
-  case AArch64::STLURBi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(1);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  // LDP / STP (including pre/post inc)
-  case AArch64::LDPQi:
-  case AArch64::LDNPQi:
-  case AArch64::STPQi:
-  case AArch64::STNPQi:
-  case AArch64::LDPQpost:
-  case AArch64::LDPQpre:
-  case AArch64::STPQpost:
-  case AArch64::STPQpre:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(16 * 2);
-    MinOffset = -64;
-    MaxOffset = 63;
-    break;
-  case AArch64::LDPXi:
-  case AArch64::LDPDi:
-  case AArch64::LDNPXi:
-  case AArch64::LDNPDi:
-  case AArch64::STPXi:
-  case AArch64::STPDi:
-  case AArch64::STNPXi:
-  case AArch64::STNPDi:
-  case AArch64::LDPDpost:
-  case AArch64::LDPDpre:
-  case AArch64::LDPXpost:
-  case AArch64::LDPXpre:
-  case AArch64::STPDpost:
-  case AArch64::STPDpre:
-  case AArch64::STPXpost:
-  case AArch64::STPXpre:
-    Scale = TypeSize::getFixed(8);
-    Width = TypeSize::getFixed(8 * 2);
-    MinOffset = -64;
-    MaxOffset = 63;
-    break;
-  case AArch64::LDPWi:
-  case AArch64::LDPSi:
-  case AArch64::LDNPWi:
-  case AArch64::LDNPSi:
-  case AArch64::STPWi:
-  case AArch64::STPSi:
-  case AArch64::STNPWi:
-  case AArch64::STNPSi:
-  case AArch64::LDPSpost:
-  case AArch64::LDPSpre:
-  case AArch64::LDPWpost:
-  case AArch64::LDPWpre:
-  case AArch64::STPSpost:
-  case AArch64::STPSpre:
-  case AArch64::STPWpost:
-  case AArch64::STPWpre:
-    Scale = TypeSize::getFixed(4);
-    Width = TypeSize::getFixed(4 * 2);
-    MinOffset = -64;
-    MaxOffset = 63;
-    break;
-  case AArch64::StoreSwiftAsyncContext:
-    // Store is an STRXui, but there might be an ADDXri in the expansion too.
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(8);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::ADDG:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(0);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
-  case AArch64::TAGPstack:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(0);
-    // TAGP with a negative offset turns into SUBP, which has a maximum offset
-    // of 63 (not 64!).
-    MinOffset = -63;
-    MaxOffset = 63;
-    break;
-  case AArch64::LDG:
-  case AArch64::STGi:
-  case AArch64::STGPreIndex:
-  case AArch64::STGPostIndex:
-  case AArch64::STZGi:
-  case AArch64::STZGPreIndex:
-  case AArch64::STZGPostIndex:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(16);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  // SVE
-  case AArch64::STR_ZZZZXI:
-  case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::LDR_ZZZZXI:
-  case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16 * 4);
-    MinOffset = -256;
-    MaxOffset = 252;
-    break;
-  case AArch64::STR_ZZZXI:
-  case AArch64::LDR_ZZZXI:
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16 * 3);
-    MinOffset = -256;
-    MaxOffset = 253;
-    break;
-  case AArch64::STR_ZZXI:
-  case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::LDR_ZZXI:
-  case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16 * 2);
-    MinOffset = -256;
-    MaxOffset = 254;
-    break;
-  case AArch64::LDR_PXI:
-  case AArch64::STR_PXI:
-    Scale = TypeSize::getScalable(2);
-    Width = TypeSize::getScalable(2);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDR_PPXI:
-  case AArch64::STR_PPXI:
-    Scale = TypeSize::getScalable(2);
-    Width = TypeSize::getScalable(2 * 2);
-    MinOffset = -256;
-    MaxOffset = 254;
-    break;
-  case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
-  case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
-  case AArch64::LDR_ZXI:
-  case AArch64::STR_ZXI:
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LD1B_IMM:
-  case AArch64::LD1H_IMM:
-  case AArch64::LD1W_IMM:
-  case AArch64::LD1D_IMM:
-  case AArch64::LDNT1B_ZRI:
-  case AArch64::LDNT1H_ZRI:
-  case AArch64::LDNT1W_ZRI:
-  case AArch64::LDNT1D_ZRI:
-  case AArch64::ST1B_IMM:
-  case AArch64::ST1H_IMM:
-  case AArch64::ST1W_IMM:
-  case AArch64::ST1D_IMM:
-  case AArch64::STNT1B_ZRI:
-  case AArch64::STNT1H_ZRI:
-  case AArch64::STNT1W_ZRI:
-  case AArch64::STNT1D_ZRI:
-  case AArch64::LDNF1B_IMM:
-  case AArch64::LDNF1H_IMM:
-  case AArch64::LDNF1W_IMM:
-  case AArch64::LDNF1D_IMM:
-    // A full vectors worth of data
-    // Width = mbytes * elements
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD2B_IMM:
-  case AArch64::LD2H_IMM:
-  case AArch64::LD2W_IMM:
-  case AArch64::LD2D_IMM:
-  case AArch64::ST2B_IMM:
-  case AArch64::ST2H_IMM:
-  case AArch64::ST2W_IMM:
-  case AArch64::ST2D_IMM:
-    Scale = TypeSize::getScalable(32);
-    Width = TypeSize::getScalable(16 * 2);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD3B_IMM:
-  case AArch64::LD3H_IMM:
-  case AArch64::LD3W_IMM:
-  case AArch64::LD3D_IMM:
-  case AArch64::ST3B_IMM:
-  case AArch64::ST3H_IMM:
-  case AArch64::ST3W_IMM:
-  case AArch64::ST3D_IMM:
-    Scale = TypeSize::getScalable(48);
-    Width = TypeSize::getScalable(16 * 3);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD4B_IMM:
-  case AArch64::LD4H_IMM:
-  case AArch64::LD4W_IMM:
-  case AArch64::LD4D_IMM:
-  case AArch64::ST4B_IMM:
-  case AArch64::ST4H_IMM:
-  case AArch64::ST4W_IMM:
-  case AArch64::ST4D_IMM:
-    Scale = TypeSize::getScalable(64);
-    Width = TypeSize::getScalable(16 * 4);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD1B_H_IMM:
-  case AArch64::LD1SB_H_IMM:
-  case AArch64::LD1H_S_IMM:
-  case AArch64::LD1SH_S_IMM:
-  case AArch64::LD1W_D_IMM:
-  case AArch64::LD1SW_D_IMM:
-  case AArch64::ST1B_H_IMM:
-  case AArch64::ST1H_S_IMM:
-  case AArch64::ST1W_D_IMM:
-  case AArch64::LDNF1B_H_IMM:
-  case AArch64::LDNF1SB_H_IMM:
-  case AArch64::LDNF1H_S_IMM:
-  case AArch64::LDNF1SH_S_IMM:
-  case AArch64::LDNF1W_D_IMM:
-  case AArch64::LDNF1SW_D_IMM:
-    // A half vector worth of data
-    // Width = mbytes * elements
-    Scale = TypeSize::getScalable(8);
-    Width = TypeSize::getScalable(8);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD1B_S_IMM:
-  case AArch64::LD1SB_S_IMM:
-  case AArch64::LD1H_D_IMM:
-  case AArch64::LD1SH_D_IMM:
-  case AArch64::ST1B_S_IMM:
-  case AArch64::ST1H_D_IMM:
-  case AArch64::LDNF1B_S_IMM:
-  case AArch64::LDNF1SB_S_IMM:
-  case AArch64::LDNF1H_D_IMM:
-  case AArch64::LDNF1SH_D_IMM:
-    // A quarter vector worth of data
-    // Width = mbytes * elements
-    Scale = TypeSize::getScalable(4);
-    Width = TypeSize::getScalable(4);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD1B_D_IMM:
-  case AArch64::LD1SB_D_IMM:
-  case AArch64::ST1B_D_IMM:
-  case AArch64::LDNF1B_D_IMM:
-  case AArch64::LDNF1SB_D_IMM:
-    // A eighth vector worth of data
-    // Width = mbytes * elements
-    Scale = TypeSize::getScalable(2);
-    Width = TypeSize::getScalable(2);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::ST2Gi:
-  case AArch64::ST2GPreIndex:
-  case AArch64::ST2GPostIndex:
-  case AArch64::STZ2Gi:
-  case AArch64::STZ2GPreIndex:
-  case AArch64::STZ2GPostIndex:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(32);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::STGPi:
-  case AArch64::STGPpost:
-  case AArch64::STGPpre:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(16);
-    MinOffset = -64;
-    MaxOffset = 63;
-    break;
-  case AArch64::LD1RB_IMM:
-  case AArch64::LD1RB_H_IMM:
-  case AArch64::LD1RB_S_IMM:
-  case AArch64::LD1RB_D_IMM:
-  case AArch64::LD1RSB_H_IMM:
-  case AArch64::LD1RSB_S_IMM:
-  case AArch64::LD1RSB_D_IMM:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(1);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
-  case AArch64::LD1RH_IMM:
-  case AArch64::LD1RH_S_IMM:
-  case AArch64::LD1RH_D_IMM:
-  case AArch64::LD1RSH_S_IMM:
-  case AArch64::LD1RSH_D_IMM:
-    Scale = TypeSize::getFixed(2);
-    Width = TypeSize::getFixed(2);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
-  case AArch64::LD1RW_IMM:
-  case AArch64::LD1RW_D_IMM:
-  case AArch64::LD1RSW_IMM:
-    Scale = TypeSize::getFixed(4);
-    Width = TypeSize::getFixed(4);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
-  case AArch64::LD1RD_IMM:
-    Scale = TypeSize::getFixed(8);
-    Width = TypeSize::getFixed(8);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
+  bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
+    auto IsHFPR = [&](const MachineOperand &Op) {
+      if (!Op.isReg())
+        return false;
+      auto Reg = Op.getReg();
+      if (Reg.isPhysical())
+        return AArch64::FPR16RegClass.contains(Reg);
+      const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+      return TRC == &AArch64::FPR16RegClass ||
+             TRC == &AArch64::FPR16_loRegClass;
+    };
+    return llvm::any_of(MI.operands(), IsHFPR);
   }
 
-  return true;
-}
-
-// Scaling factor for unscaled load or store.
-int AArch64InstrInfo::getMemScale(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Opcode has unknown scale!");
-  case AArch64::LDRBBui:
-  case AArch64::LDURBBi:
-  case AArch64::LDRSBWui:
-  case AArch64::LDURSBWi:
-  case AArch64::STRBBui:
-  case AArch64::STURBBi:
-    return 1;
-  case AArch64::LDRHHui:
-  case AArch64::LDURHHi:
-  case AArch64::LDRSHWui:
-  case AArch64::LDURSHWi:
-  case AArch64::STRHHui:
-  case AArch64::STURHHi:
-    return 2;
-  case AArch64::LDRSui:
-  case AArch64::LDURSi:
-  case AArch64::LDRSpre:
-  case AArch64::LDRSWui:
-  case AArch64::LDURSWi:
-  case AArch64::LDRSWpre:
-  case AArch64::LDRWpre:
-  case AArch64::LDRWui:
-  case AArch64::LDURWi:
-  case AArch64::STRSui:
-  case AArch64::STURSi:
-  case AArch64::STRSpre:
-  case AArch64::STRWui:
-  case AArch64::STURWi:
-  case AArch64::STRWpre:
-  case AArch64::LDPSi:
-  case AArch64::LDPSWi:
-  case AArch64::LDPWi:
-  case AArch64::STPSi:
-  case AArch64::STPWi:
-    return 4;
-  case AArch64::LDRDui:
-  case AArch64::LDURDi:
-  case AArch64::LDRDpre:
-  case AArch64::LDRXui:
-  case AArch64::LDURXi:
-  case AArch64::LDRXpre:
-  case AArch64::STRDui:
-  case AArch64::STURDi:
-  case AArch64::STRDpre:
-  case AArch64::STRXui:
-  case AArch64::STURXi:
-  case AArch64::STRXpre:
-  case AArch64::LDPDi:
-  case AArch64::LDPXi:
-  case AArch64::STPDi:
-  case AArch64::STPXi:
-    return 8;
-  case AArch64::LDRQui:
-  case AArch64::LDURQi:
-  case AArch64::STRQui:
-  case AArch64::STURQi:
-  case AArch64::STRQpre:
-  case AArch64::LDPQi:
-  case AArch64::LDRQpre:
-  case AArch64::STPQi:
-  case AArch64::STGi:
-  case AArch64::STZGi:
-  case AArch64::ST2Gi:
-  case AArch64::STZ2Gi:
-  case AArch64::STGPi:
-    return 16;
-  }
-}
-
-bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case AArch64::LDRWpre:
-  case AArch64::LDRXpre:
-  case AArch64::LDRSWpre:
-  case AArch64::LDRSpre:
-  case AArch64::LDRDpre:
-  case AArch64::LDRQpre:
-    return true;
+  bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
+    auto IsQFPR = [&](const MachineOperand &Op) {
+      if (!Op.isReg())
+        return false;
+      auto Reg = Op.getReg();
+      if (Reg.isPhysical())
+        return AArch64::FPR128RegClass.contains(Reg);
+      const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+      return TRC == &AArch64::FPR128RegClass ||
+             TRC == &AArch64::FPR128_loRegClass;
+    };
+    return llvm::any_of(MI.operands(), IsQFPR);
   }
-}
 
-bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case AArch64::STRWpre:
-  case AArch64::STRXpre:
-  case AArch64::STRSpre:
-  case AArch64::STRDpre:
-  case AArch64::STRQpre:
-    return true;
+  bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    case AArch64::BRK:
+    case AArch64::HLT:
+    case AArch64::PACIASP:
+    case AArch64::PACIBSP:
+      // Implicit BTI behavior.
+      return true;
+    case AArch64::PAUTH_PROLOGUE:
+      // PAUTH_PROLOGUE expands to PACI(A|B)SP.
+      return true;
+    case AArch64::HINT: {
+      unsigned Imm = MI.getOperand(0).getImm();
+      // Explicit BTI instruction.
+      if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
+        return true;
+      // PACI(A|B)SP instructions.
+      if (Imm == 25 || Imm == 27)
+        return true;
+      return false;
+    }
+    default:
+      return false;
+    }
   }
-}
-
-bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
-  return isPreLd(MI) || isPreSt(MI);
-}
 
-bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case AArch64::LDPSi:
-  case AArch64::LDPSWi:
-  case AArch64::LDPDi:
-  case AArch64::LDPQi:
-  case AArch64::LDPWi:
-  case AArch64::LDPXi:
-  case AArch64::STPSi:
-  case AArch64::STPDi:
-  case AArch64::STPQi:
-  case AArch64::STPWi:
-  case AArch64::STPXi:
-  case AArch64::STGPi:
-    return true;
+  bool AArch64InstrInfo::isFpOrNEON(Register Reg) {
+    if (Reg == 0)
+      return false;
+    assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
+    return AArch64::FPR128RegClass.contains(Reg) ||
+           AArch64::FPR64RegClass.contains(Reg) ||
+           AArch64::FPR32RegClass.contains(Reg) ||
+           AArch64::FPR16RegClass.contains(Reg) ||
+           AArch64::FPR8RegClass.contains(Reg);
   }
-}
-
-const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
-  assert(MI.mayLoadOrStore() && "Load or store instruction expected");
-  unsigned Idx =
-      AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2
-                                                                            : 1;
-  return MI.getOperand(Idx);
-}
 
-const MachineOperand &
-AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
-  assert(MI.mayLoadOrStore() && "Load or store instruction expected");
-  unsigned Idx =
-      AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3
-                                                                            : 2;
-  return MI.getOperand(Idx);
-}
-
-const MachineOperand &
-AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    llvm_unreachable("Unexpected opcode");
-  case AArch64::LDRBroX:
-  case AArch64::LDRBBroX:
-  case AArch64::LDRSBXroX:
-  case AArch64::LDRSBWroX:
-  case AArch64::LDRHroX:
-  case AArch64::LDRHHroX:
-  case AArch64::LDRSHXroX:
-  case AArch64::LDRSHWroX:
-  case AArch64::LDRWroX:
-  case AArch64::LDRSroX:
-  case AArch64::LDRSWroX:
-  case AArch64::LDRDroX:
-  case AArch64::LDRXroX:
-  case AArch64::LDRQroX:
-    return MI.getOperand(4);
+  bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
+    auto IsFPR = [&](const MachineOperand &Op) {
+      if (!Op.isReg())
+        return false;
+      auto Reg = Op.getReg();
+      if (Reg.isPhysical())
+        return isFpOrNEON(Reg);
+
+      const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+      return TRC == &AArch64::FPR128RegClass ||
+             TRC == &AArch64::FPR128_loRegClass ||
+             TRC == &AArch64::FPR64RegClass ||
+             TRC == &AArch64::FPR64_loRegClass ||
+             TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
+             TRC == &AArch64::FPR8RegClass;
+    };
+    return llvm::any_of(MI.operands(), IsFPR);
   }
-}
-
-static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
-                                              Register Reg) {
-  if (MI.getParent() == nullptr)
-    return nullptr;
-  const MachineFunction *MF = MI.getParent()->getParent();
-  return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
-}
 
-bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
-  auto IsHFPR = [&](const MachineOperand &Op) {
-    if (!Op.isReg())
-      return false;
-    auto Reg = Op.getReg();
-    if (Reg.isPhysical())
-      return AArch64::FPR16RegClass.contains(Reg);
-    const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
-    return TRC == &AArch64::FPR16RegClass ||
-           TRC == &AArch64::FPR16_loRegClass;
-  };
-  return llvm::any_of(MI.operands(), IsHFPR);
-}
+  // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
+  // scaled.
+  static bool scaleOffset(unsigned Opc, int64_t &Offset) {
+    int Scale = AArch64InstrInfo::getMemScale(Opc);
 
-bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
-  auto IsQFPR = [&](const MachineOperand &Op) {
-    if (!Op.isReg())
+    // If the byte-offset isn't a multiple of the stride, we can't scale this
+    // offset.
+    if (Offset % Scale != 0)
       return false;
-    auto Reg = Op.getReg();
-    if (Reg.isPhysical())
-      return AArch64::FPR128RegClass.contains(Reg);
-    const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
-    return TRC == &AArch64::FPR128RegClass ||
-           TRC == &AArch64::FPR128_loRegClass;
-  };
-  return llvm::any_of(MI.operands(), IsQFPR);
-}
 
-bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case AArch64::BRK:
-  case AArch64::HLT:
-  case AArch64::PACIASP:
-  case AArch64::PACIBSP:
-    // Implicit BTI behavior.
-    return true;
-  case AArch64::PAUTH_PROLOGUE:
-    // PAUTH_PROLOGUE expands to PACI(A|B)SP.
+    // Convert the byte-offset used by unscaled into an "element" offset used
+    // by the scaled pair load/store instructions.
+    Offset /= Scale;
     return true;
-  case AArch64::HINT: {
-    unsigned Imm = MI.getOperand(0).getImm();
-    // Explicit BTI instruction.
-    if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
-      return true;
-    // PACI(A|B)SP instructions.
-    if (Imm == 25 || Imm == 27)
-      return true;
-    return false;
-  }
-  default:
-    return false;
   }
-}
-
-bool AArch64InstrInfo::isFpOrNEON(Register Reg) {
-  if (Reg == 0)
-    return false;
-  assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
-  return AArch64::FPR128RegClass.contains(Reg) ||
-         AArch64::FPR64RegClass.contains(Reg) ||
-         AArch64::FPR32RegClass.contains(Reg) ||
-         AArch64::FPR16RegClass.contains(Reg) ||
-         AArch64::FPR8RegClass.contains(Reg);
-}
 
-bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
-  auto IsFPR = [&](const MachineOperand &Op) {
-    if (!Op.isReg())
+  static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
+    if (FirstOpc == SecondOpc)
+      return true;
+    // We can also pair sign-ext and zero-ext instructions.
+    switch (FirstOpc) {
+    default:
       return false;
-    auto Reg = Op.getReg();
-    if (Reg.isPhysical())
-      return isFpOrNEON(Reg);
-
-    const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
-    return TRC == &AArch64::FPR128RegClass ||
-           TRC == &AArch64::FPR128_loRegClass ||
-           TRC == &AArch64::FPR64RegClass ||
-           TRC == &AArch64::FPR64_loRegClass ||
-           TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
-           TRC == &AArch64::FPR8RegClass;
-  };
-  return llvm::any_of(MI.operands(), IsFPR);
-}
-
-// Scale the unscaled offsets.  Returns false if the unscaled offset can't be
-// scaled.
-static bool scaleOffset(unsigned Opc, int64_t &Offset) {
-  int Scale = AArch64InstrInfo::getMemScale(Opc);
-
-  // If the byte-offset isn't a multiple of the stride, we can't scale this
-  // offset.
-  if (Offset % Scale != 0)
+    case AArch64::STRSui:
+    case AArch64::STURSi:
+      return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
+    case AArch64::STRDui:
+    case AArch64::STURDi:
+      return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
+    case AArch64::STRQui:
+    case AArch64::STURQi:
+      return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
+    case AArch64::STRWui:
+    case AArch64::STURWi:
+      return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
+    case AArch64::STRXui:
+    case AArch64::STURXi:
+      return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
+    case AArch64::LDRSui:
+    case AArch64::LDURSi:
+      return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
+    case AArch64::LDRDui:
+    case AArch64::LDURDi:
+      return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
+    case AArch64::LDRQui:
+    case AArch64::LDURQi:
+      return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
+    case AArch64::LDRWui:
+    case AArch64::LDURWi:
+      return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
+    case AArch64::LDRSWui:
+    case AArch64::LDURSWi:
+      return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
+    case AArch64::LDRXui:
+    case AArch64::LDURXi:
+      return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
+    }
+    // These instructions can't be paired based on their opcodes.
     return false;
+  }
 
-  // Convert the byte-offset used by unscaled into an "element" offset used
-  // by the scaled pair load/store instructions.
-  Offset /= Scale;
-  return true;
-}
-
-static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
-  if (FirstOpc == SecondOpc)
-    return true;
-  // We can also pair sign-ext and zero-ext instructions.
-  switch (FirstOpc) {
-  default:
-    return false;
-  case AArch64::STRSui:
-  case AArch64::STURSi:
-    return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
-  case AArch64::STRDui:
-  case AArch64::STURDi:
-    return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
-  case AArch64::STRQui:
-  case AArch64::STURQi:
-    return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
-  case AArch64::STRWui:
-  case AArch64::STURWi:
-    return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
-  case AArch64::STRXui:
-  case AArch64::STURXi:
-    return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
-  case AArch64::LDRSui:
-  case AArch64::LDURSi:
-    return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
-  case AArch64::LDRDui:
-  case AArch64::LDURDi:
-    return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
-  case AArch64::LDRQui:
-  case AArch64::LDURQi:
-    return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
-  case AArch64::LDRWui:
-  case AArch64::LDURWi:
-    return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
-  case AArch64::LDRSWui:
-  case AArch64::LDURSWi:
-    return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
-  case AArch64::LDRXui:
-  case AArch64::LDURXi:
-    return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
-  }
-  // These instructions can't be paired based on their opcodes.
-  return false;
-}
+  static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
+                              int64_t Offset1, unsigned Opcode1, int FI2,
+                              int64_t Offset2, unsigned Opcode2) {
+    // Accesses through fixed stack object frame indices may access a different
+    // fixed stack slot. Check that the object offsets + offsets match.
+    if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
+      int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
+      int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
+      assert(ObjectOffset1 <= ObjectOffset2 &&
+             "Object offsets are not ordered.");
+      // Convert to scaled object offsets.
+      int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
+      if (ObjectOffset1 % Scale1 != 0)
+        return false;
+      ObjectOffset1 /= Scale1;
+      int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
+      if (ObjectOffset2 % Scale2 != 0)
+        return false;
+      ObjectOffset2 /= Scale2;
+      ObjectOffset1 += Offset1;
+      ObjectOffset2 += Offset2;
+      return ObjectOffset1 + 1 == ObjectOffset2;
+    }
 
-static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
-                            int64_t Offset1, unsigned Opcode1, int FI2,
-                            int64_t Offset2, unsigned Opcode2) {
-  // Accesses through fixed stack object frame indices may access a different
-  // fixed stack slot. Check that the object offsets + offsets match.
-  if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
-    int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
-    int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
-    assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
-    // Convert to scaled object offsets.
-    int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
-    if (ObjectOffset1 % Scale1 != 0)
+    return FI1 == FI2;
+  }
+
+  /// Detect opportunities for ldp/stp formation.
+  ///
+  /// Only called for LdSt for which getMemOperandWithOffset returns true.
+  bool AArch64InstrInfo::shouldClusterMemOps(
+      ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
+      bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+      int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
+      unsigned NumBytes) const {
+    assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
+    const MachineOperand &BaseOp1 = *BaseOps1.front();
+    const MachineOperand &BaseOp2 = *BaseOps2.front();
+    const MachineInstr &FirstLdSt = *BaseOp1.getParent();
+    const MachineInstr &SecondLdSt = *BaseOp2.getParent();
+    if (BaseOp1.getType() != BaseOp2.getType())
       return false;
-    ObjectOffset1 /= Scale1;
-    int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
-    if (ObjectOffset2 % Scale2 != 0)
-      return false;
-    ObjectOffset2 /= Scale2;
-    ObjectOffset1 += Offset1;
-    ObjectOffset2 += Offset2;
-    return ObjectOffset1 + 1 == ObjectOffset2;
-  }
 
-  return FI1 == FI2;
-}
+    assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
+           "Only base registers and frame indices are supported.");
 
-/// Detect opportunities for ldp/stp formation.
-///
-/// Only called for LdSt for which getMemOperandWithOffset returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(
-    ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
-    bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
-    int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
-    unsigned NumBytes) const {
-  assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
-  const MachineOperand &BaseOp1 = *BaseOps1.front();
-  const MachineOperand &BaseOp2 = *BaseOps2.front();
-  const MachineInstr &FirstLdSt = *BaseOp1.getParent();
-  const MachineInstr &SecondLdSt = *BaseOp2.getParent();
-  if (BaseOp1.getType() != BaseOp2.getType())
-    return false;
+    // Check for both base regs and base FI.
+    if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
+      return false;
 
-  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
-         "Only base registers and frame indices are supported.");
+    // Only cluster up to a single pair.
+    if (ClusterSize > 2)
+      return false;
 
-  // Check for both base regs and base FI.
-  if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
-    return false;
+    if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
+      return false;
 
-  // Only cluster up to a single pair.
-  if (ClusterSize > 2)
-    return false;
+    // Can we pair these instructions based on their opcodes?
+    unsigned FirstOpc = FirstLdSt.getOpcode();
+    unsigned SecondOpc = SecondLdSt.getOpcode();
+    if (!canPairLdStOpc(FirstOpc, SecondOpc))
+      return false;
 
-  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
-    return false;
+    // Can't merge volatiles or load/stores that have a hint to avoid pair
+    // formation, for example.
+    if (!isCandidateToMergeOrPair(FirstLdSt) ||
+        !isCandidateToMergeOrPair(SecondLdSt))
+      return false;
 
-  // Can we pair these instructions based on their opcodes?
-  unsigned FirstOpc = FirstLdSt.getOpcode();
-  unsigned SecondOpc = SecondLdSt.getOpcode();
-  if (!canPairLdStOpc(FirstOpc, SecondOpc))
-    return false;
+    // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
+    int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
+    if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
+      return false;
 
-  // Can't merge volatiles or load/stores that have a hint to avoid pair
-  // formation, for example.
-  if (!isCandidateToMergeOrPair(FirstLdSt) ||
-      !isCandidateToMergeOrPair(SecondLdSt))
-    return false;
+    int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
+    if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
+      return false;
 
-  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
-  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
-  if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
-    return false;
+    // Pairwise instructions have a 7-bit signed offset field.
+    if (Offset1 > 63 || Offset1 < -64)
+      return false;
 
-  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
-  if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
-    return false;
+    // The caller should already have ordered First/SecondLdSt by offset.
+    // Note: except for non-equal frame index bases
+    if (BaseOp1.isFI()) {
+      assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
+             "Caller should have ordered offsets.");
 
-  // Pairwise instructions have a 7-bit signed offset field.
-  if (Offset1 > 63 || Offset1 < -64)
-    return false;
+      const MachineFrameInfo &MFI =
+          FirstLdSt.getParent()->getParent()->getFrameInfo();
+      return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
+                             BaseOp2.getIndex(), Offset2, SecondOpc);
+    }
 
-  // The caller should already have ordered First/SecondLdSt by offset.
-  // Note: except for non-equal frame index bases
-  if (BaseOp1.isFI()) {
-    assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
-           "Caller should have ordered offsets.");
+    assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
 
-    const MachineFrameInfo &MFI =
-        FirstLdSt.getParent()->getParent()->getFrameInfo();
-    return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
-                           BaseOp2.getIndex(), Offset2, SecondOpc);
+    return Offset1 + 1 == Offset2;
   }
 
-  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
+  static const MachineInstrBuilder &AddSubReg(
+      const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx,
+      unsigned State, const TargetRegisterInfo *TRI) {
+    if (!SubIdx)
+      return MIB.addReg(Reg, State);
 
-  return Offset1 + 1 == Offset2;
-}
-
-static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
-                                            MCRegister Reg, unsigned SubIdx,
-                                            unsigned State,
-                                            const TargetRegisterInfo *TRI) {
-  if (!SubIdx)
-    return MIB.addReg(Reg, State);
-
-  if (Reg.isPhysical())
-    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
-  return MIB.addReg(Reg, State, SubIdx);
-}
-
-static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
-                                        unsigned NumRegs) {
-  // We really want the positive remainder mod 32 here, that happens to be
-  // easily obtainable with a mask.
-  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
-}
-
-void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I,
-                                        const DebugLoc &DL, MCRegister DestReg,
-                                        MCRegister SrcReg, bool KillSrc,
-                                        unsigned Opcode,
-                                        ArrayRef<unsigned> Indices) const {
-  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
-  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
-  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
-  unsigned NumRegs = Indices.size();
+    if (Reg.isPhysical())
+      return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+    return MIB.addReg(Reg, State, SubIdx);
+  }
 
-  int SubReg = 0, End = NumRegs, Incr = 1;
-  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
-    SubReg = NumRegs - 1;
-    End = -1;
-    Incr = -1;
+  static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
+                                          unsigned NumRegs) {
+    // We really want the positive remainder mod 32 here, that happens to be
+    // easily obtainable with a mask.
+    return ((DestReg - SrcReg) & 0x1f) < NumRegs;
   }
 
-  for (; SubReg != End; SubReg += Incr) {
-    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
-    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
-    AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
-    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+  void AArch64InstrInfo::copyPhysRegTuple(
+      MachineBasicBlock & MBB, MachineBasicBlock::iterator I,
+      const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+      unsigned Opcode, ArrayRef<unsigned> Indices) const {
+    assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
+    const TargetRegisterInfo *TRI = &getRegisterInfo();
+    uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
+    uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
+    unsigned NumRegs = Indices.size();
+
+    int SubReg = 0, End = NumRegs, Incr = 1;
+    if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
+      SubReg = NumRegs - 1;
+      End = -1;
+      Incr = -1;
+    }
+
+    for (; SubReg != End; SubReg += Incr) {
+      const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
+      AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
+      AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
+      AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+    }
   }
-}
 
-void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator I,
-                                       const DebugLoc &DL, MCRegister DestReg,
-                                       MCRegister SrcReg, bool KillSrc,
-                                       unsigned Opcode, unsigned ZeroReg,
-                                       llvm::ArrayRef<unsigned> Indices) const {
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
-  unsigned NumRegs = Indices.size();
+  void AArch64InstrInfo::copyGPRRegTuple(
+      MachineBasicBlock & MBB, MachineBasicBlock::iterator I,
+      const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+      unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef<unsigned> Indices)
+      const {
+    const TargetRegisterInfo *TRI = &getRegisterInfo();
+    unsigned NumRegs = Indices.size();
 
 #ifndef NDEBUG
   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
@@ -5032,7 +5106,7 @@ void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
     MIB.addImm(0);
   }
-}
+  }
 
 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I,
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 50217c3a047df..e48703b1285e8 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -3089,17 +3089,41 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
           break;
         case ARMCC::HS: // C
         case ARMCC::LO: // C
-        case ARMCC::VS: // V
-        case ARMCC::VC: // V
         case ARMCC::HI: // C Z
         case ARMCC::LS: // C Z
+          // The instruction uses the C bit which is not safe.
+          return false;
+        case ARMCC::VS: // V
+        case ARMCC::VC: // V
         case ARMCC::GE: // N V
         case ARMCC::LT: // N V
         case ARMCC::GT: // Z N V
         case ARMCC::LE: // Z N V
-          // The instruction uses the V bit or C bit which is not safe.
+        {
+          // We MAY be able to do this if signed overflow is
+          // poison.
+
+          if (I->getFlag(MachineInstr::NoSWrap)) {
+            // Only adds and subs can set the V bit.
+            unsigned Opc = I->getOpcode();
+            bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
+                         Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
+                         Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
+                         Opc == ARM::tSUBi8;
+
+            bool IsAdd = Opc == ARM::ADDrr || Opc == ARM::t2ADDrr ||
+                         Opc == ARM::ADDri || Opc == ARM::t2ADDri ||
+                         Opc == ARM::tADDrr || Opc == ARM::tADDi3 ||
+                         Opc == ARM::tADDi8;
+
+            if (IsSub || IsAdd)
+              break;
+          }
+
+          // The instruction uses the V bit which is not safe.
           return false;
         }
+        }
       }
     }
   }
diff --git a/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
index c24ef372a5907..57ef72959a21a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
@@ -110,7 +110,111 @@ define i32 @add_i32(i32 %0, i32 %1) {
   ret i32 %10
 }
 
+define i64 @and_i64(i64 %0, i64 %1) {
+; CHECK-LABEL: and_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ands x0, x1, x0
+; CHECK-NEXT:    b.le .LBB4_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    b _Z2f4l
+; CHECK-NEXT:  .LBB4_2:
+; CHECK-NEXT:    b _Z2f3l
+  %3 = and i64 %1, %0
+  %4 = icmp slt i64 %3, 1
+  br i1 %4, label %5, label %7
+
+5:
+  %6 = tail call i64 @_Z2f3l(i64 %3)
+  br label %9
+
+7:
+  %8 = tail call i64 @_Z2f4l(i64 %3)
+  br label %9
+
+9:
+  %10 = phi i64 [ %6, %5 ], [ %8, %7 ]
+  ret i64 %10
+}
 
+define i32 @and_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: and_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ands w0, w1, w0
+; CHECK-NEXT:    b.le .LBB5_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    b _Z2f4l
+; CHECK-NEXT:  .LBB5_2:
+; CHECK-NEXT:    b _Z2f3l
+  %3 = and i32 %1, %0
+  %4 = icmp slt i32 %3, 1
+  br i1 %4, label %5, label %7
+
+5:
+  %6 = tail call i32 @_Z2f3l(i32 %3)
+  br label %9
+
+7:
+  %8 = tail call i32 @_Z2f4l(i32 %3)
+  br label %9
+
+9:
+  %10 = phi i32 [ %6, %5 ], [ %8, %7 ]
+  ret i32 %10
+}
+
+define i64 @and_i64_freeze(i64 %0, i64 %1) {
+; CHECK-LABEL: and_i64_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ands x0, x1, x0
+; CHECK-NEXT:    b.le .LBB6_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    b _Z2f4l
+; CHECK-NEXT:  .LBB6_2:
+; CHECK-NEXT:    b _Z2f3l
+  %3 = and i64 %1, %0
+  %freeze = freeze i64 %3
+  %4 = icmp slt i64 %3, 1
+  br i1 %4, label %5, label %7
+
+5:
+  %6 = tail call i64 @_Z2f3l(i64 %freeze)
+  br label %9
+
+7:
+  %8 = tail call i64 @_Z2f4l(i64 %freeze)
+  br label %9
+
+9:
+  %10 = phi i64 [ %6, %5 ], [ %8, %7 ]
+  ret i64 %10
+}
+
+define i32 @and_i32_freeze(i32 %0, i32 %1) {
+; CHECK-LABEL: and_i32_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ands w0, w1, w0
+; CHECK-NEXT:    b.le .LBB7_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    b _Z2f4l
+; CHECK-NEXT:  .LBB7_2:
+; CHECK-NEXT:    b _Z2f3l
+  %3 = and i32 %1, %0
+  %freeze = freeze i32 %3
+  %4 = icmp slt i32 %freeze, 1
+  br i1 %4, label %5, label %7
+
+5:
+  %6 = tail call i32 @_Z2f3l(i32 %freeze)
+  br label %9
+
+7:
+  %8 = tail call i32 @_Z2f4l(i32 %freeze)
+  br label %9
+
+9:
+  %10 = phi i32 [ %6, %5 ], [ %8, %7 ]
+  ret i32 %10
+}
 
 declare i32 @_Z2f1i(i32)
 declare i32 @_Z2f2i(i32)



More information about the llvm-commits mailing list