[llvm] [ARM][AArch64] Allow the CSE to take into consideration uses of the carry and overflow flags in ARM and AArch64 (PR #150803)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 26 18:06:18 PDT 2025


github-actions[bot] wrote:

<!--LLVM CODE FORMAT COMMENT: {clang-format}-->


:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index feb7d9695..6529aa277 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1915,3152 +1915,3182 @@ static bool isANDSOpcode(MachineInstr &MI) {
     return true;
   default:
     return false;
-}
-
-/// Check if CmpInstr can be substituted by MI.
-///
-/// CmpInstr can be substituted:
-/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
-/// - and, MI and CmpInstr are from the same MachineBB
-/// - and, condition flags are not alive in successors of the CmpInstr parent
-/// - and, if MI opcode is the S form there must be no defs of flags between
-///        MI and CmpInstr
-///        or if MI opcode is not the S form there must be neither defs of flags
-///        nor uses of flags between MI and CmpInstr.
-/// - and, if C/V flags are not used after CmpInstr
-///        or if N flag is used but MI produces poison value if signed overflow
-///        occurs.
-static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
-                                       const TargetRegisterInfo &TRI) {
-  // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
-  // that may or may not set flags.
-  assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
-
-  const unsigned CmpOpcode = CmpInstr.getOpcode();
-  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
-    return false;
+  }
 
-  assert((CmpInstr.getOperand(2).isImm() &&
-          CmpInstr.getOperand(2).getImm() == 0) &&
-         "Caller guarantees that CmpInstr compares with constant 0");
+  /// Check if CmpInstr can be substituted by MI.
+  ///
+  /// CmpInstr can be substituted:
+  /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
+  /// - and, MI and CmpInstr are from the same MachineBB
+  /// - and, condition flags are not alive in successors of the CmpInstr parent
+  /// - and, if MI opcode is the S form there must be no defs of flags between
+  ///        MI and CmpInstr
+  ///        or if MI opcode is not the S form there must be neither defs of
+  ///        flags nor uses of flags between MI and CmpInstr.
+  /// - and, if C/V flags are not used after CmpInstr
+  ///        or if N flag is used but MI produces poison value if signed
+  ///        overflow occurs.
+  static bool canInstrSubstituteCmpInstr(MachineInstr & MI,
+                                         MachineInstr & CmpInstr,
+                                         const TargetRegisterInfo &TRI) {
+    // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
+    // that may or may not set flags.
+    assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
+
+    const unsigned CmpOpcode = CmpInstr.getOpcode();
+    if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
+      return false;
 
-  std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
-  if (!NZVCUsed)
-    return false;
+    assert((CmpInstr.getOperand(2).isImm() &&
+            CmpInstr.getOperand(2).getImm() == 0) &&
+           "Caller guarantees that CmpInstr compares with constant 0");
 
-  // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
-  // '%vreg = add ...' or '%vreg = sub ...'.
-  // Condition flag C is used to indicate unsigned overflow.
-  // 1) MI and CmpInstr set N and C to the same value if Cmp is an adds
-  // 2) ADDS x, 0, always sets C to 0.
-  // In practice we should not really get here, as an unsigned comparison with 0
-  // should have been optimized out anyway, but just in case.
-  if (NZVCUsed->C && !isADDSRegImm(CmpOpcode))
-    return false;
+    std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
+    if (!NZVCUsed)
+      return false;
 
-  // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
-  // '%vreg = add ...' or '%vreg = sub ...'.
-  // Condition flag V is used to indicate signed overflow.
-  // 1) MI and CmpInstr set N and V to the same value.
-  // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
-  //    signed overflow occurs, so CmpInstr could still be simplified away.
-  // 3) ANDS also always sets V to 0.
-  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDSOpcode(MI))
-    return false;
+    // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
+    // '%vreg = add ...' or '%vreg = sub ...'.
+    // Condition flag C is used to indicate unsigned overflow.
+    // 1) MI and CmpInstr set N and C to the same value if Cmp is an adds
+    // 2) ADDS x, 0, always sets C to 0.
+    // In practice we should not really get here, as an unsigned comparison with
+    // 0 should have been optimized out anyway, but just in case.
+    if (NZVCUsed->C && !isADDSRegImm(CmpOpcode))
+      return false;
 
-  AccessKind AccessToCheck = AK_Write;
-  if (sForm(MI) != MI.getOpcode())
-    AccessToCheck = AK_All;
-  return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
-}
+    // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
+    // '%vreg = add ...' or '%vreg = sub ...'.
+    // Condition flag V is used to indicate signed overflow.
+    // 1) MI and CmpInstr set N and V to the same value.
+    // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
+    //    signed overflow occurs, so CmpInstr could still be simplified away.
+    // 3) ANDS also always sets V to 0.
+    if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDSOpcode(MI))
+      return false;
 
-/// Substitute an instruction comparing to zero with another instruction
-/// which produces needed condition flags.
-///
-/// Return true on success.
-bool AArch64InstrInfo::substituteCmpToZero(
-    MachineInstr &CmpInstr, unsigned SrcReg,
-    const MachineRegisterInfo &MRI) const {
-  // Get the unique definition of SrcReg.
-  MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
-  if (!MI)
-    return false;
+    AccessKind AccessToCheck = AK_Write;
+    if (sForm(MI) != MI.getOpcode())
+      AccessToCheck = AK_All;
+    return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
+  }
+
+  /// Substitute an instruction comparing to zero with another instruction
+  /// which produces needed condition flags.
+  ///
+  /// Return true on success.
+  bool AArch64InstrInfo::substituteCmpToZero(
+      MachineInstr & CmpInstr, unsigned SrcReg, const MachineRegisterInfo &MRI)
+      const {
+    // Get the unique definition of SrcReg.
+    MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
+    if (!MI)
+      return false;
 
-  const TargetRegisterInfo &TRI = getRegisterInfo();
+    const TargetRegisterInfo &TRI = getRegisterInfo();
 
-  unsigned NewOpc = sForm(*MI);
-  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
-    return false;
+    unsigned NewOpc = sForm(*MI);
+    if (NewOpc == AArch64::INSTRUCTION_LIST_END)
+      return false;
 
-  if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
-    return false;
+    if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
+      return false;
 
-  // Update the instruction to set NZCV.
-  MI->setDesc(get(NewOpc));
-  CmpInstr.eraseFromParent();
-  bool succeeded = UpdateOperandRegClass(*MI);
-  (void)succeeded;
-  assert(succeeded && "Some operands reg class are incompatible!");
-  MI->addRegisterDefined(AArch64::NZCV, &TRI);
-  return true;
-}
+    // Update the instruction to set NZCV.
+    MI->setDesc(get(NewOpc));
+    CmpInstr.eraseFromParent();
+    bool succeeded = UpdateOperandRegClass(*MI);
+    (void)succeeded;
+    assert(succeeded && "Some operands reg class are incompatible!");
+    MI->addRegisterDefined(AArch64::NZCV, &TRI);
+    return true;
+  }
 
-/// \returns True if \p CmpInstr can be removed.
-///
-/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
-/// codes used in \p CCUseInstrs must be inverted.
-static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
-                                 int CmpValue, const TargetRegisterInfo &TRI,
-                                 SmallVectorImpl<MachineInstr *> &CCUseInstrs,
-                                 bool &IsInvertCC) {
-  assert((CmpValue == 0 || CmpValue == 1) &&
-         "Only comparisons to 0 or 1 considered for removal!");
-
-  // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
-  unsigned MIOpc = MI.getOpcode();
-  if (MIOpc == AArch64::CSINCWr) {
-    if (MI.getOperand(1).getReg() != AArch64::WZR ||
-        MI.getOperand(2).getReg() != AArch64::WZR)
+  /// \returns True if \p CmpInstr can be removed.
+  ///
+  /// \p IsInvertCC is true if, after removing \p CmpInstr, condition
+  /// codes used in \p CCUseInstrs must be inverted.
+  static bool canCmpInstrBeRemoved(MachineInstr & MI, MachineInstr & CmpInstr,
+                                   int CmpValue, const TargetRegisterInfo &TRI,
+                                   SmallVectorImpl<MachineInstr *> &CCUseInstrs,
+                                   bool &IsInvertCC) {
+    assert((CmpValue == 0 || CmpValue == 1) &&
+           "Only comparisons to 0 or 1 considered for removal!");
+
+    // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
+    unsigned MIOpc = MI.getOpcode();
+    if (MIOpc == AArch64::CSINCWr) {
+      if (MI.getOperand(1).getReg() != AArch64::WZR ||
+          MI.getOperand(2).getReg() != AArch64::WZR)
+        return false;
+    } else if (MIOpc == AArch64::CSINCXr) {
+      if (MI.getOperand(1).getReg() != AArch64::XZR ||
+          MI.getOperand(2).getReg() != AArch64::XZR)
+        return false;
+    } else {
       return false;
-  } else if (MIOpc == AArch64::CSINCXr) {
-    if (MI.getOperand(1).getReg() != AArch64::XZR ||
-        MI.getOperand(2).getReg() != AArch64::XZR)
+    }
+    AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
+    if (MICC == AArch64CC::Invalid)
       return false;
-  } else {
-    return false;
-  }
-  AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
-  if (MICC == AArch64CC::Invalid)
-    return false;
 
-  // NZCV needs to be defined
-  if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
-    return false;
+    // NZCV needs to be defined
+    if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) !=
+        -1)
+      return false;
 
-  // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
-  const unsigned CmpOpcode = CmpInstr.getOpcode();
-  bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
-  if (CmpValue && !IsSubsRegImm)
-    return false;
-  if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
-    return false;
+    // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
+    const unsigned CmpOpcode = CmpInstr.getOpcode();
+    bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
+    if (CmpValue && !IsSubsRegImm)
+      return false;
+    if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
+      return false;
 
-  // MI conditions allowed: eq, ne, mi, pl
-  UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
-  if (MIUsedNZCV.C || MIUsedNZCV.V)
-    return false;
+    // MI conditions allowed: eq, ne, mi, pl
+    UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
+    if (MIUsedNZCV.C || MIUsedNZCV.V)
+      return false;
 
-  std::optional<UsedNZCV> NZCVUsedAfterCmp =
-      examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
-  // Condition flags are not used in CmpInstr basic block successors and only
-  // Z or N flags allowed to be used after CmpInstr within its basic block
-  if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
-    return false;
-  // Z or N flag used after CmpInstr must correspond to the flag used in MI
-  if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
-      (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
-    return false;
-  // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
-  if (MIUsedNZCV.N && !CmpValue)
-    return false;
+    std::optional<UsedNZCV> NZCVUsedAfterCmp =
+        examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
+    // Condition flags are not used in CmpInstr basic block successors and only
+    // Z or N flags allowed to be used after CmpInstr within its basic block
+    if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
+      return false;
+    // Z or N flag used after CmpInstr must correspond to the flag used in MI
+    if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
+        (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
+      return false;
+    // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
+    if (MIUsedNZCV.N && !CmpValue)
+      return false;
 
-  // There must be no defs of flags between MI and CmpInstr
-  if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
-    return false;
+    // There must be no defs of flags between MI and CmpInstr
+    if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
+      return false;
 
-  // Condition code is inverted in the following cases:
-  // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
-  // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
-  IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
-               (!CmpValue && MICC == AArch64CC::NE);
-  return true;
-}
+    // Condition code is inverted in the following cases:
+    // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
+    // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
+    IsInvertCC =
+        (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
+        (!CmpValue && MICC == AArch64CC::NE);
+    return true;
+  }
 
-/// Remove comparison in csinc-cmp sequence
-///
-/// Examples:
-/// 1. \code
-///   csinc w9, wzr, wzr, ne
-///   cmp   w9, #0
-///   b.eq
-///    \endcode
-/// to
-///    \code
-///   csinc w9, wzr, wzr, ne
-///   b.ne
-///    \endcode
-///
-/// 2. \code
-///   csinc x2, xzr, xzr, mi
-///   cmp   x2, #1
-///   b.pl
-///    \endcode
-/// to
-///    \code
-///   csinc x2, xzr, xzr, mi
-///   b.pl
-///    \endcode
-///
-/// \param  CmpInstr comparison instruction
-/// \return True when comparison removed
-bool AArch64InstrInfo::removeCmpToZeroOrOne(
-    MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
-    const MachineRegisterInfo &MRI) const {
-  MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
-  if (!MI)
-    return false;
-  const TargetRegisterInfo &TRI = getRegisterInfo();
-  SmallVector<MachineInstr *, 4> CCUseInstrs;
-  bool IsInvertCC = false;
-  if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
-                            IsInvertCC))
-    return false;
-  // Make transformation
-  CmpInstr.eraseFromParent();
-  if (IsInvertCC) {
-    // Invert condition codes in CmpInstr CC users
-    for (MachineInstr *CCUseInstr : CCUseInstrs) {
-      int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
-      assert(Idx >= 0 && "Unexpected instruction using CC.");
-      MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
-      AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
-          static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
-      CCOperand.setImm(CCUse);
+  /// Remove comparison in csinc-cmp sequence
+  ///
+  /// Examples:
+  /// 1. \code
+  ///   csinc w9, wzr, wzr, ne
+  ///   cmp   w9, #0
+  ///   b.eq
+  ///    \endcode
+  /// to
+  ///    \code
+  ///   csinc w9, wzr, wzr, ne
+  ///   b.ne
+  ///    \endcode
+  ///
+  /// 2. \code
+  ///   csinc x2, xzr, xzr, mi
+  ///   cmp   x2, #1
+  ///   b.pl
+  ///    \endcode
+  /// to
+  ///    \code
+  ///   csinc x2, xzr, xzr, mi
+  ///   b.pl
+  ///    \endcode
+  ///
+  /// \param  CmpInstr comparison instruction
+  /// \return True when comparison removed
+  bool AArch64InstrInfo::removeCmpToZeroOrOne(
+      MachineInstr & CmpInstr, unsigned SrcReg, int CmpValue,
+      const MachineRegisterInfo &MRI) const {
+    MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
+    if (!MI)
+      return false;
+    const TargetRegisterInfo &TRI = getRegisterInfo();
+    SmallVector<MachineInstr *, 4> CCUseInstrs;
+    bool IsInvertCC = false;
+    if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
+                              IsInvertCC))
+      return false;
+    // Make transformation
+    CmpInstr.eraseFromParent();
+    if (IsInvertCC) {
+      // Invert condition codes in CmpInstr CC users
+      for (MachineInstr *CCUseInstr : CCUseInstrs) {
+        int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
+        assert(Idx >= 0 && "Unexpected instruction using CC.");
+        MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
+        AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
+            static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
+        CCOperand.setImm(CCUse);
+      }
     }
+    return true;
   }
-  return true;
-}
 
-bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
-  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
-      MI.getOpcode() != AArch64::CATCHRET)
-    return false;
+  bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr & MI) const {
+    if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
+        MI.getOpcode() != AArch64::CATCHRET)
+      return false;
 
-  MachineBasicBlock &MBB = *MI.getParent();
-  auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
-  auto TRI = Subtarget.getRegisterInfo();
-  DebugLoc DL = MI.getDebugLoc();
-
-  if (MI.getOpcode() == AArch64::CATCHRET) {
-    // Skip to the first instruction before the epilog.
-    const TargetInstrInfo *TII =
-      MBB.getParent()->getSubtarget().getInstrInfo();
-    MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
-    auto MBBI = MachineBasicBlock::iterator(MI);
-    MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
-    while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
-           FirstEpilogSEH != MBB.begin())
-      FirstEpilogSEH = std::prev(FirstEpilogSEH);
-    if (FirstEpilogSEH != MBB.begin())
-      FirstEpilogSEH = std::next(FirstEpilogSEH);
-    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
-        .addReg(AArch64::X0, RegState::Define)
-        .addMBB(TargetMBB);
-    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
-        .addReg(AArch64::X0, RegState::Define)
-        .addReg(AArch64::X0)
-        .addMBB(TargetMBB)
-        .addImm(0);
-    TargetMBB->setMachineBlockAddressTaken();
-    return true;
-  }
+    MachineBasicBlock &MBB = *MI.getParent();
+    auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
+    auto TRI = Subtarget.getRegisterInfo();
+    DebugLoc DL = MI.getDebugLoc();
+
+    if (MI.getOpcode() == AArch64::CATCHRET) {
+      // Skip to the first instruction before the epilog.
+      const TargetInstrInfo *TII =
+          MBB.getParent()->getSubtarget().getInstrInfo();
+      MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
+      auto MBBI = MachineBasicBlock::iterator(MI);
+      MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
+      while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
+             FirstEpilogSEH != MBB.begin())
+        FirstEpilogSEH = std::prev(FirstEpilogSEH);
+      if (FirstEpilogSEH != MBB.begin())
+        FirstEpilogSEH = std::next(FirstEpilogSEH);
+      BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
+          .addReg(AArch64::X0, RegState::Define)
+          .addMBB(TargetMBB);
+      BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
+          .addReg(AArch64::X0, RegState::Define)
+          .addReg(AArch64::X0)
+          .addMBB(TargetMBB)
+          .addImm(0);
+      TargetMBB->setMachineBlockAddressTaken();
+      return true;
+    }
 
-  Register Reg = MI.getOperand(0).getReg();
-  Module &M = *MBB.getParent()->getFunction().getParent();
-  if (M.getStackProtectorGuard() == "sysreg") {
-    const AArch64SysReg::SysReg *SrcReg =
-        AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
-    if (!SrcReg)
-      report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
-
-    // mrs xN, sysreg
-    BuildMI(MBB, MI, DL, get(AArch64::MRS))
-        .addDef(Reg, RegState::Renamable)
-        .addImm(SrcReg->Encoding);
-    int Offset = M.getStackProtectorGuardOffset();
-    if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
-      // ldr xN, [xN, #offset]
-      BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
-          .addDef(Reg)
-          .addUse(Reg, RegState::Kill)
-          .addImm(Offset / 8);
-    } else if (Offset >= -256 && Offset <= 255) {
-      // ldur xN, [xN, #offset]
-      BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
-          .addDef(Reg)
-          .addUse(Reg, RegState::Kill)
-          .addImm(Offset);
-    } else if (Offset >= -4095 && Offset <= 4095) {
-      if (Offset > 0) {
-        // add xN, xN, #offset
-        BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
+    Register Reg = MI.getOperand(0).getReg();
+    Module &M = *MBB.getParent()->getFunction().getParent();
+    if (M.getStackProtectorGuard() == "sysreg") {
+      const AArch64SysReg::SysReg *SrcReg =
+          AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
+      if (!SrcReg)
+        report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
+
+      // mrs xN, sysreg
+      BuildMI(MBB, MI, DL, get(AArch64::MRS))
+          .addDef(Reg, RegState::Renamable)
+          .addImm(SrcReg->Encoding);
+      int Offset = M.getStackProtectorGuardOffset();
+      if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
+        // ldr xN, [xN, #offset]
+        BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
             .addDef(Reg)
             .addUse(Reg, RegState::Kill)
-            .addImm(Offset)
-            .addImm(0);
-      } else {
-        // sub xN, xN, #offset
-        BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
+            .addImm(Offset / 8);
+      } else if (Offset >= -256 && Offset <= 255) {
+        // ldur xN, [xN, #offset]
+        BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
+            .addDef(Reg)
+            .addUse(Reg, RegState::Kill)
+            .addImm(Offset);
+      } else if (Offset >= -4095 && Offset <= 4095) {
+        if (Offset > 0) {
+          // add xN, xN, #offset
+          BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
+              .addDef(Reg)
+              .addUse(Reg, RegState::Kill)
+              .addImm(Offset)
+              .addImm(0);
+        } else {
+          // sub xN, xN, #offset
+          BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
+              .addDef(Reg)
+              .addUse(Reg, RegState::Kill)
+              .addImm(-Offset)
+              .addImm(0);
+        }
+        // ldr xN, [xN]
+        BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
             .addDef(Reg)
             .addUse(Reg, RegState::Kill)
-            .addImm(-Offset)
             .addImm(0);
+      } else {
+        // Cases that are larger than +/- 4095 and not a multiple of 8, or
+        // larger than 23760. It might be nice to use AArch64::MOVi32imm here,
+        // which would get expanded in PreSched2 after PostRA, but our lone
+        // scratch Reg already contains the MRS result.
+        // findScratchNonCalleeSaveRegister() in AArch64FrameLowering might help
+        // us find such a scratch register though. If we failed to find a
+        // scratch register, we could emit a stream of add instructions to build
+        // up the immediate. Or, we could try to insert a AArch64::MOVi32imm
+        // before register allocation so that we didn't need to scavenge for a
+        // scratch register.
+        report_fatal_error("Unable to encode Stack Protector Guard Offset");
       }
-      // ldr xN, [xN]
-      BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
-          .addDef(Reg)
-          .addUse(Reg, RegState::Kill)
-          .addImm(0);
-    } else {
-      // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
-      // than 23760.
-      // It might be nice to use AArch64::MOVi32imm here, which would get
-      // expanded in PreSched2 after PostRA, but our lone scratch Reg already
-      // contains the MRS result. findScratchNonCalleeSaveRegister() in
-      // AArch64FrameLowering might help us find such a scratch register
-      // though. If we failed to find a scratch register, we could emit a
-      // stream of add instructions to build up the immediate. Or, we could try
-      // to insert a AArch64::MOVi32imm before register allocation so that we
-      // didn't need to scavenge for a scratch register.
-      report_fatal_error("Unable to encode Stack Protector Guard Offset");
+      MBB.erase(MI);
+      return true;
     }
-    MBB.erase(MI);
-    return true;
-  }
 
-  const GlobalValue *GV =
-      cast<GlobalValue>((*MI.memoperands_begin())->getValue());
-  const TargetMachine &TM = MBB.getParent()->getTarget();
-  unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
-  const unsigned char MO_NC = AArch64II::MO_NC;
-
-  if ((OpFlags & AArch64II::MO_GOT) != 0) {
-    BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
-        .addGlobalAddress(GV, 0, OpFlags);
-    if (Subtarget.isTargetILP32()) {
-      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
-      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
-          .addDef(Reg32, RegState::Dead)
-          .addUse(Reg, RegState::Kill)
-          .addImm(0)
-          .addMemOperand(*MI.memoperands_begin())
-          .addDef(Reg, RegState::Implicit);
-    } else {
+    const GlobalValue *GV =
+        cast<GlobalValue>((*MI.memoperands_begin())->getValue());
+    const TargetMachine &TM = MBB.getParent()->getTarget();
+    unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
+    const unsigned char MO_NC = AArch64II::MO_NC;
+
+    if ((OpFlags & AArch64II::MO_GOT) != 0) {
+      BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
+          .addGlobalAddress(GV, 0, OpFlags);
+      if (Subtarget.isTargetILP32()) {
+        unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+        BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+            .addDef(Reg32, RegState::Dead)
+            .addUse(Reg, RegState::Kill)
+            .addImm(0)
+            .addMemOperand(*MI.memoperands_begin())
+            .addDef(Reg, RegState::Implicit);
+      } else {
+        BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+            .addReg(Reg, RegState::Kill)
+            .addImm(0)
+            .addMemOperand(*MI.memoperands_begin());
+      }
+    } else if (TM.getCodeModel() == CodeModel::Large) {
+      assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
+      BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
+          .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
+          .addImm(0);
+      BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+          .addReg(Reg, RegState::Kill)
+          .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
+          .addImm(16);
+      BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+          .addReg(Reg, RegState::Kill)
+          .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
+          .addImm(32);
+      BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+          .addReg(Reg, RegState::Kill)
+          .addGlobalAddress(GV, 0, AArch64II::MO_G3)
+          .addImm(48);
       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
           .addReg(Reg, RegState::Kill)
           .addImm(0)
           .addMemOperand(*MI.memoperands_begin());
-    }
-  } else if (TM.getCodeModel() == CodeModel::Large) {
-    assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
-    BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
-        .addImm(0);
-    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
-        .addReg(Reg, RegState::Kill)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
-        .addImm(16);
-    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
-        .addReg(Reg, RegState::Kill)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
-        .addImm(32);
-    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
-        .addReg(Reg, RegState::Kill)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G3)
-        .addImm(48);
-    BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
-        .addReg(Reg, RegState::Kill)
-        .addImm(0)
-        .addMemOperand(*MI.memoperands_begin());
-  } else if (TM.getCodeModel() == CodeModel::Tiny) {
-    BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
-        .addGlobalAddress(GV, 0, OpFlags);
-  } else {
-    BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
-        .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
-    unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
-    if (Subtarget.isTargetILP32()) {
-      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
-      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
-          .addDef(Reg32, RegState::Dead)
-          .addUse(Reg, RegState::Kill)
-          .addGlobalAddress(GV, 0, LoFlags)
-          .addMemOperand(*MI.memoperands_begin())
-          .addDef(Reg, RegState::Implicit);
+    } else if (TM.getCodeModel() == CodeModel::Tiny) {
+      BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
+          .addGlobalAddress(GV, 0, OpFlags);
     } else {
-      BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
-          .addReg(Reg, RegState::Kill)
-          .addGlobalAddress(GV, 0, LoFlags)
-          .addMemOperand(*MI.memoperands_begin());
+      BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
+          .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
+      unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
+      if (Subtarget.isTargetILP32()) {
+        unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+        BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+            .addDef(Reg32, RegState::Dead)
+            .addUse(Reg, RegState::Kill)
+            .addGlobalAddress(GV, 0, LoFlags)
+            .addMemOperand(*MI.memoperands_begin())
+            .addDef(Reg, RegState::Implicit);
+      } else {
+        BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+            .addReg(Reg, RegState::Kill)
+            .addGlobalAddress(GV, 0, LoFlags)
+            .addMemOperand(*MI.memoperands_begin());
+      }
     }
-  }
 
-  MBB.erase(MI);
+    MBB.erase(MI);
 
-  return true;
-}
+    return true;
+  }
 
-// Return true if this instruction simply sets its single destination register
-// to zero. This is equivalent to a register rename of the zero-register.
-bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::MOVZWi:
-  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
-    if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
-      assert(MI.getDesc().getNumOperands() == 3 &&
-             MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
-      return true;
+  // Return true if this instruction simply sets its single destination register
+  // to zero. This is equivalent to a register rename of the zero-register.
+  bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case AArch64::MOVZWi:
+    case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
+      if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
+        assert(MI.getDesc().getNumOperands() == 3 &&
+               MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
+        return true;
+      }
+      break;
+    case AArch64::ANDWri: // and Rd, Rzr, #imm
+      return MI.getOperand(1).getReg() == AArch64::WZR;
+    case AArch64::ANDXri:
+      return MI.getOperand(1).getReg() == AArch64::XZR;
+    case TargetOpcode::COPY:
+      return MI.getOperand(1).getReg() == AArch64::WZR;
     }
-    break;
-  case AArch64::ANDWri: // and Rd, Rzr, #imm
-    return MI.getOperand(1).getReg() == AArch64::WZR;
-  case AArch64::ANDXri:
-    return MI.getOperand(1).getReg() == AArch64::XZR;
-  case TargetOpcode::COPY:
-    return MI.getOperand(1).getReg() == AArch64::WZR;
+    return false;
   }
-  return false;
-}
 
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case TargetOpcode::COPY: {
-    // GPR32 copies will by lowered to ORRXrs
-    Register DstReg = MI.getOperand(0).getReg();
-    return (AArch64::GPR32RegClass.contains(DstReg) ||
-            AArch64::GPR64RegClass.contains(DstReg));
-  }
-  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
-    if (MI.getOperand(1).getReg() == AArch64::XZR) {
-      assert(MI.getDesc().getNumOperands() == 4 &&
-             MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
-      return true;
+  // Return true if this instruction simply renames a general register without
+  // modifying bits.
+  bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case TargetOpcode::COPY: {
+      // GPR32 copies will by lowered to ORRXrs
+      Register DstReg = MI.getOperand(0).getReg();
+      return (AArch64::GPR32RegClass.contains(DstReg) ||
+              AArch64::GPR64RegClass.contains(DstReg));
     }
-    break;
-  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
-    if (MI.getOperand(2).getImm() == 0) {
-      assert(MI.getDesc().getNumOperands() == 4 &&
-             MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
-      return true;
+    case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
+      if (MI.getOperand(1).getReg() == AArch64::XZR) {
+        assert(MI.getDesc().getNumOperands() == 4 &&
+               MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
+        return true;
+      }
+      break;
+    case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
+      if (MI.getOperand(2).getImm() == 0) {
+        assert(MI.getDesc().getNumOperands() == 4 &&
+               MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
+        return true;
+      }
+      break;
     }
-    break;
+    return false;
   }
-  return false;
-}
 
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case TargetOpcode::COPY: {
-    Register DstReg = MI.getOperand(0).getReg();
-    return AArch64::FPR128RegClass.contains(DstReg);
-  }
-  case AArch64::ORRv16i8:
-    if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
-      assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
-             "invalid ORRv16i8 operands");
-      return true;
+  // Return true if this instruction simply renames a general register without
+  // modifying bits.
+  bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case TargetOpcode::COPY: {
+      Register DstReg = MI.getOperand(0).getReg();
+      return AArch64::FPR128RegClass.contains(DstReg);
     }
-    break;
+    case AArch64::ORRv16i8:
+      if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+        assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
+               "invalid ORRv16i8 operands");
+        return true;
+      }
+      break;
+    }
+    return false;
   }
-  return false;
-}
 
-Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
-                                               int &FrameIndex) const {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::LDRWui:
-  case AArch64::LDRXui:
-  case AArch64::LDRBui:
-  case AArch64::LDRHui:
-  case AArch64::LDRSui:
-  case AArch64::LDRDui:
-  case AArch64::LDRQui:
-  case AArch64::LDR_PXI:
-    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
-        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
-      FrameIndex = MI.getOperand(1).getIndex();
-      return MI.getOperand(0).getReg();
+  Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+                                                 int &FrameIndex) const {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case AArch64::LDRWui:
+    case AArch64::LDRXui:
+    case AArch64::LDRBui:
+    case AArch64::LDRHui:
+    case AArch64::LDRSui:
+    case AArch64::LDRDui:
+    case AArch64::LDRQui:
+    case AArch64::LDR_PXI:
+      if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
+          MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
+        FrameIndex = MI.getOperand(1).getIndex();
+        return MI.getOperand(0).getReg();
+      }
+      break;
     }
-    break;
-  }
 
-  return 0;
-}
+    return 0;
+  }
 
-Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
-                                              int &FrameIndex) const {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::STRWui:
-  case AArch64::STRXui:
-  case AArch64::STRBui:
-  case AArch64::STRHui:
-  case AArch64::STRSui:
-  case AArch64::STRDui:
-  case AArch64::STRQui:
-  case AArch64::STR_PXI:
-    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
-        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
-      FrameIndex = MI.getOperand(1).getIndex();
-      return MI.getOperand(0).getReg();
+  Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+                                                int &FrameIndex) const {
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case AArch64::STRWui:
+    case AArch64::STRXui:
+    case AArch64::STRBui:
+    case AArch64::STRHui:
+    case AArch64::STRSui:
+    case AArch64::STRDui:
+    case AArch64::STRQui:
+    case AArch64::STR_PXI:
+      if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
+          MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
+        FrameIndex = MI.getOperand(1).getIndex();
+        return MI.getOperand(0).getReg();
+      }
+      break;
     }
-    break;
+    return 0;
   }
-  return 0;
-}
 
-/// Check all MachineMemOperands for a hint to suppress pairing.
-bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
-  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
-    return MMO->getFlags() & MOSuppressPair;
-  });
-}
+  /// Check all MachineMemOperands for a hint to suppress pairing.
+  bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
+    return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+      return MMO->getFlags() & MOSuppressPair;
+    });
+  }
 
-/// Set a flag on the first MachineMemOperand to suppress pairing.
-void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
-  if (MI.memoperands_empty())
-    return;
-  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
-}
+  /// Set a flag on the first MachineMemOperand to suppress pairing.
+  void AArch64InstrInfo::suppressLdStPair(MachineInstr & MI) {
+    if (MI.memoperands_empty())
+      return;
+    (*MI.memoperands_begin())->setFlags(MOSuppressPair);
+  }
 
-/// Check all MachineMemOperands for a hint that the load/store is strided.
-bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
-  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
-    return MMO->getFlags() & MOStridedAccess;
-  });
-}
+  /// Check all MachineMemOperands for a hint that the load/store is strided.
+  bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
+    return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+      return MMO->getFlags() & MOStridedAccess;
+    });
+  }
 
-bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
-  switch (Opc) {
-  default:
-    return false;
-  case AArch64::STURSi:
-  case AArch64::STRSpre:
-  case AArch64::STURDi:
-  case AArch64::STRDpre:
-  case AArch64::STURQi:
-  case AArch64::STRQpre:
-  case AArch64::STURBBi:
-  case AArch64::STURHHi:
-  case AArch64::STURWi:
-  case AArch64::STRWpre:
-  case AArch64::STURXi:
-  case AArch64::STRXpre:
-  case AArch64::LDURSi:
-  case AArch64::LDRSpre:
-  case AArch64::LDURDi:
-  case AArch64::LDRDpre:
-  case AArch64::LDURQi:
-  case AArch64::LDRQpre:
-  case AArch64::LDURWi:
-  case AArch64::LDRWpre:
-  case AArch64::LDURXi:
-  case AArch64::LDRXpre:
-  case AArch64::LDRSWpre:
-  case AArch64::LDURSWi:
-  case AArch64::LDURHHi:
-  case AArch64::LDURBBi:
-  case AArch64::LDURSBWi:
-  case AArch64::LDURSHWi:
-    return true;
+  bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
+    switch (Opc) {
+    default:
+      return false;
+    case AArch64::STURSi:
+    case AArch64::STRSpre:
+    case AArch64::STURDi:
+    case AArch64::STRDpre:
+    case AArch64::STURQi:
+    case AArch64::STRQpre:
+    case AArch64::STURBBi:
+    case AArch64::STURHHi:
+    case AArch64::STURWi:
+    case AArch64::STRWpre:
+    case AArch64::STURXi:
+    case AArch64::STRXpre:
+    case AArch64::LDURSi:
+    case AArch64::LDRSpre:
+    case AArch64::LDURDi:
+    case AArch64::LDRDpre:
+    case AArch64::LDURQi:
+    case AArch64::LDRQpre:
+    case AArch64::LDURWi:
+    case AArch64::LDRWpre:
+    case AArch64::LDURXi:
+    case AArch64::LDRXpre:
+    case AArch64::LDRSWpre:
+    case AArch64::LDURSWi:
+    case AArch64::LDURHHi:
+    case AArch64::LDURBBi:
+    case AArch64::LDURSBWi:
+    case AArch64::LDURSHWi:
+      return true;
+    }
   }
-}
 
-std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
-  switch (Opc) {
-  default: return {};
-  case AArch64::PRFMui: return AArch64::PRFUMi;
-  case AArch64::LDRXui: return AArch64::LDURXi;
-  case AArch64::LDRWui: return AArch64::LDURWi;
-  case AArch64::LDRBui: return AArch64::LDURBi;
-  case AArch64::LDRHui: return AArch64::LDURHi;
-  case AArch64::LDRSui: return AArch64::LDURSi;
-  case AArch64::LDRDui: return AArch64::LDURDi;
-  case AArch64::LDRQui: return AArch64::LDURQi;
-  case AArch64::LDRBBui: return AArch64::LDURBBi;
-  case AArch64::LDRHHui: return AArch64::LDURHHi;
-  case AArch64::LDRSBXui: return AArch64::LDURSBXi;
-  case AArch64::LDRSBWui: return AArch64::LDURSBWi;
-  case AArch64::LDRSHXui: return AArch64::LDURSHXi;
-  case AArch64::LDRSHWui: return AArch64::LDURSHWi;
-  case AArch64::LDRSWui: return AArch64::LDURSWi;
-  case AArch64::STRXui: return AArch64::STURXi;
-  case AArch64::STRWui: return AArch64::STURWi;
-  case AArch64::STRBui: return AArch64::STURBi;
-  case AArch64::STRHui: return AArch64::STURHi;
-  case AArch64::STRSui: return AArch64::STURSi;
-  case AArch64::STRDui: return AArch64::STURDi;
-  case AArch64::STRQui: return AArch64::STURQi;
-  case AArch64::STRBBui: return AArch64::STURBBi;
-  case AArch64::STRHHui: return AArch64::STURHHi;
-  }
-}
-
-unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
-  case AArch64::ADDG:
-  case AArch64::LDAPURBi:
-  case AArch64::LDAPURHi:
-  case AArch64::LDAPURi:
-  case AArch64::LDAPURSBWi:
-  case AArch64::LDAPURSBXi:
-  case AArch64::LDAPURSHWi:
-  case AArch64::LDAPURSHXi:
-  case AArch64::LDAPURSWi:
-  case AArch64::LDAPURXi:
-  case AArch64::LDR_PPXI:
-  case AArch64::LDR_PXI:
-  case AArch64::LDR_ZXI:
-  case AArch64::LDR_ZZXI:
-  case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::LDR_ZZZXI:
-  case AArch64::LDR_ZZZZXI:
-  case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::LDRBBui:
-  case AArch64::LDRBui:
-  case AArch64::LDRDui:
-  case AArch64::LDRHHui:
-  case AArch64::LDRHui:
-  case AArch64::LDRQui:
-  case AArch64::LDRSBWui:
-  case AArch64::LDRSBXui:
-  case AArch64::LDRSHWui:
-  case AArch64::LDRSHXui:
-  case AArch64::LDRSui:
-  case AArch64::LDRSWui:
-  case AArch64::LDRWui:
-  case AArch64::LDRXui:
-  case AArch64::LDURBBi:
-  case AArch64::LDURBi:
-  case AArch64::LDURDi:
-  case AArch64::LDURHHi:
-  case AArch64::LDURHi:
-  case AArch64::LDURQi:
-  case AArch64::LDURSBWi:
-  case AArch64::LDURSBXi:
-  case AArch64::LDURSHWi:
-  case AArch64::LDURSHXi:
-  case AArch64::LDURSi:
-  case AArch64::LDURSWi:
-  case AArch64::LDURWi:
-  case AArch64::LDURXi:
-  case AArch64::PRFMui:
-  case AArch64::PRFUMi:
-  case AArch64::ST2Gi:
-  case AArch64::STGi:
-  case AArch64::STLURBi:
-  case AArch64::STLURHi:
-  case AArch64::STLURWi:
-  case AArch64::STLURXi:
-  case AArch64::StoreSwiftAsyncContext:
-  case AArch64::STR_PPXI:
-  case AArch64::STR_PXI:
-  case AArch64::STR_ZXI:
-  case AArch64::STR_ZZXI:
-  case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::STR_ZZZXI:
-  case AArch64::STR_ZZZZXI:
-  case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::STRBBui:
-  case AArch64::STRBui:
-  case AArch64::STRDui:
-  case AArch64::STRHHui:
-  case AArch64::STRHui:
-  case AArch64::STRQui:
-  case AArch64::STRSui:
-  case AArch64::STRWui:
-  case AArch64::STRXui:
-  case AArch64::STURBBi:
-  case AArch64::STURBi:
-  case AArch64::STURDi:
-  case AArch64::STURHHi:
-  case AArch64::STURHi:
-  case AArch64::STURQi:
-  case AArch64::STURSi:
-  case AArch64::STURWi:
-  case AArch64::STURXi:
-  case AArch64::STZ2Gi:
-  case AArch64::STZGi:
-  case AArch64::TAGPstack:
-  case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
-  case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
-    return 2;
-  case AArch64::LD1B_D_IMM:
-  case AArch64::LD1B_H_IMM:
-  case AArch64::LD1B_IMM:
-  case AArch64::LD1B_S_IMM:
-  case AArch64::LD1D_IMM:
-  case AArch64::LD1H_D_IMM:
-  case AArch64::LD1H_IMM:
-  case AArch64::LD1H_S_IMM:
-  case AArch64::LD1RB_D_IMM:
-  case AArch64::LD1RB_H_IMM:
-  case AArch64::LD1RB_IMM:
-  case AArch64::LD1RB_S_IMM:
-  case AArch64::LD1RD_IMM:
-  case AArch64::LD1RH_D_IMM:
-  case AArch64::LD1RH_IMM:
-  case AArch64::LD1RH_S_IMM:
-  case AArch64::LD1RSB_D_IMM:
-  case AArch64::LD1RSB_H_IMM:
-  case AArch64::LD1RSB_S_IMM:
-  case AArch64::LD1RSH_D_IMM:
-  case AArch64::LD1RSH_S_IMM:
-  case AArch64::LD1RSW_IMM:
-  case AArch64::LD1RW_D_IMM:
-  case AArch64::LD1RW_IMM:
-  case AArch64::LD1SB_D_IMM:
-  case AArch64::LD1SB_H_IMM:
-  case AArch64::LD1SB_S_IMM:
-  case AArch64::LD1SH_D_IMM:
-  case AArch64::LD1SH_S_IMM:
-  case AArch64::LD1SW_D_IMM:
-  case AArch64::LD1W_D_IMM:
-  case AArch64::LD1W_IMM:
-  case AArch64::LD2B_IMM:
-  case AArch64::LD2D_IMM:
-  case AArch64::LD2H_IMM:
-  case AArch64::LD2W_IMM:
-  case AArch64::LD3B_IMM:
-  case AArch64::LD3D_IMM:
-  case AArch64::LD3H_IMM:
-  case AArch64::LD3W_IMM:
-  case AArch64::LD4B_IMM:
-  case AArch64::LD4D_IMM:
-  case AArch64::LD4H_IMM:
-  case AArch64::LD4W_IMM:
-  case AArch64::LDG:
-  case AArch64::LDNF1B_D_IMM:
-  case AArch64::LDNF1B_H_IMM:
-  case AArch64::LDNF1B_IMM:
-  case AArch64::LDNF1B_S_IMM:
-  case AArch64::LDNF1D_IMM:
-  case AArch64::LDNF1H_D_IMM:
-  case AArch64::LDNF1H_IMM:
-  case AArch64::LDNF1H_S_IMM:
-  case AArch64::LDNF1SB_D_IMM:
-  case AArch64::LDNF1SB_H_IMM:
-  case AArch64::LDNF1SB_S_IMM:
-  case AArch64::LDNF1SH_D_IMM:
-  case AArch64::LDNF1SH_S_IMM:
-  case AArch64::LDNF1SW_D_IMM:
-  case AArch64::LDNF1W_D_IMM:
-  case AArch64::LDNF1W_IMM:
-  case AArch64::LDNPDi:
-  case AArch64::LDNPQi:
-  case AArch64::LDNPSi:
-  case AArch64::LDNPWi:
-  case AArch64::LDNPXi:
-  case AArch64::LDNT1B_ZRI:
-  case AArch64::LDNT1D_ZRI:
-  case AArch64::LDNT1H_ZRI:
-  case AArch64::LDNT1W_ZRI:
-  case AArch64::LDPDi:
-  case AArch64::LDPQi:
-  case AArch64::LDPSi:
-  case AArch64::LDPWi:
-  case AArch64::LDPXi:
-  case AArch64::LDRBBpost:
-  case AArch64::LDRBBpre:
-  case AArch64::LDRBpost:
-  case AArch64::LDRBpre:
-  case AArch64::LDRDpost:
-  case AArch64::LDRDpre:
-  case AArch64::LDRHHpost:
-  case AArch64::LDRHHpre:
-  case AArch64::LDRHpost:
-  case AArch64::LDRHpre:
-  case AArch64::LDRQpost:
-  case AArch64::LDRQpre:
-  case AArch64::LDRSpost:
-  case AArch64::LDRSpre:
-  case AArch64::LDRWpost:
-  case AArch64::LDRWpre:
-  case AArch64::LDRXpost:
-  case AArch64::LDRXpre:
-  case AArch64::ST1B_D_IMM:
-  case AArch64::ST1B_H_IMM:
-  case AArch64::ST1B_IMM:
-  case AArch64::ST1B_S_IMM:
-  case AArch64::ST1D_IMM:
-  case AArch64::ST1H_D_IMM:
-  case AArch64::ST1H_IMM:
-  case AArch64::ST1H_S_IMM:
-  case AArch64::ST1W_D_IMM:
-  case AArch64::ST1W_IMM:
-  case AArch64::ST2B_IMM:
-  case AArch64::ST2D_IMM:
-  case AArch64::ST2H_IMM:
-  case AArch64::ST2W_IMM:
-  case AArch64::ST3B_IMM:
-  case AArch64::ST3D_IMM:
-  case AArch64::ST3H_IMM:
-  case AArch64::ST3W_IMM:
-  case AArch64::ST4B_IMM:
-  case AArch64::ST4D_IMM:
-  case AArch64::ST4H_IMM:
-  case AArch64::ST4W_IMM:
-  case AArch64::STGPi:
-  case AArch64::STGPreIndex:
-  case AArch64::STZGPreIndex:
-  case AArch64::ST2GPreIndex:
-  case AArch64::STZ2GPreIndex:
-  case AArch64::STGPostIndex:
-  case AArch64::STZGPostIndex:
-  case AArch64::ST2GPostIndex:
-  case AArch64::STZ2GPostIndex:
-  case AArch64::STNPDi:
-  case AArch64::STNPQi:
-  case AArch64::STNPSi:
-  case AArch64::STNPWi:
-  case AArch64::STNPXi:
-  case AArch64::STNT1B_ZRI:
-  case AArch64::STNT1D_ZRI:
-  case AArch64::STNT1H_ZRI:
-  case AArch64::STNT1W_ZRI:
-  case AArch64::STPDi:
-  case AArch64::STPQi:
-  case AArch64::STPSi:
-  case AArch64::STPWi:
-  case AArch64::STPXi:
-  case AArch64::STRBBpost:
-  case AArch64::STRBBpre:
-  case AArch64::STRBpost:
-  case AArch64::STRBpre:
-  case AArch64::STRDpost:
-  case AArch64::STRDpre:
-  case AArch64::STRHHpost:
-  case AArch64::STRHHpre:
-  case AArch64::STRHpost:
-  case AArch64::STRHpre:
-  case AArch64::STRQpost:
-  case AArch64::STRQpre:
-  case AArch64::STRSpost:
-  case AArch64::STRSpre:
-  case AArch64::STRWpost:
-  case AArch64::STRWpre:
-  case AArch64::STRXpost:
-  case AArch64::STRXpre:
-    return 3;
-  case AArch64::LDPDpost:
-  case AArch64::LDPDpre:
-  case AArch64::LDPQpost:
-  case AArch64::LDPQpre:
-  case AArch64::LDPSpost:
-  case AArch64::LDPSpre:
-  case AArch64::LDPWpost:
-  case AArch64::LDPWpre:
-  case AArch64::LDPXpost:
-  case AArch64::LDPXpre:
-  case AArch64::STGPpre:
-  case AArch64::STGPpost:
-  case AArch64::STPDpost:
-  case AArch64::STPDpre:
-  case AArch64::STPQpost:
-  case AArch64::STPQpre:
-  case AArch64::STPSpost:
-  case AArch64::STPSpre:
-  case AArch64::STPWpost:
-  case AArch64::STPWpre:
-  case AArch64::STPXpost:
-  case AArch64::STPXpre:
-    return 4;
-  }
-}
-
-bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  // Scaled instructions.
-  case AArch64::STRSui:
-  case AArch64::STRDui:
-  case AArch64::STRQui:
-  case AArch64::STRXui:
-  case AArch64::STRWui:
-  case AArch64::LDRSui:
-  case AArch64::LDRDui:
-  case AArch64::LDRQui:
-  case AArch64::LDRXui:
-  case AArch64::LDRWui:
-  case AArch64::LDRSWui:
-  // Unscaled instructions.
-  case AArch64::STURSi:
-  case AArch64::STRSpre:
-  case AArch64::STURDi:
-  case AArch64::STRDpre:
-  case AArch64::STURQi:
-  case AArch64::STRQpre:
-  case AArch64::STURWi:
-  case AArch64::STRWpre:
-  case AArch64::STURXi:
-  case AArch64::STRXpre:
-  case AArch64::LDURSi:
-  case AArch64::LDRSpre:
-  case AArch64::LDURDi:
-  case AArch64::LDRDpre:
-  case AArch64::LDURQi:
-  case AArch64::LDRQpre:
-  case AArch64::LDURWi:
-  case AArch64::LDRWpre:
-  case AArch64::LDURXi:
-  case AArch64::LDRXpre:
-  case AArch64::LDURSWi:
-  case AArch64::LDRSWpre:
-  // SVE instructions.
-  case AArch64::LDR_ZXI:
-  case AArch64::STR_ZXI:
-    return true;
+  std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
+    switch (Opc) {
+    default:
+      return {};
+    case AArch64::PRFMui:
+      return AArch64::PRFUMi;
+    case AArch64::LDRXui:
+      return AArch64::LDURXi;
+    case AArch64::LDRWui:
+      return AArch64::LDURWi;
+    case AArch64::LDRBui:
+      return AArch64::LDURBi;
+    case AArch64::LDRHui:
+      return AArch64::LDURHi;
+    case AArch64::LDRSui:
+      return AArch64::LDURSi;
+    case AArch64::LDRDui:
+      return AArch64::LDURDi;
+    case AArch64::LDRQui:
+      return AArch64::LDURQi;
+    case AArch64::LDRBBui:
+      return AArch64::LDURBBi;
+    case AArch64::LDRHHui:
+      return AArch64::LDURHHi;
+    case AArch64::LDRSBXui:
+      return AArch64::LDURSBXi;
+    case AArch64::LDRSBWui:
+      return AArch64::LDURSBWi;
+    case AArch64::LDRSHXui:
+      return AArch64::LDURSHXi;
+    case AArch64::LDRSHWui:
+      return AArch64::LDURSHWi;
+    case AArch64::LDRSWui:
+      return AArch64::LDURSWi;
+    case AArch64::STRXui:
+      return AArch64::STURXi;
+    case AArch64::STRWui:
+      return AArch64::STURWi;
+    case AArch64::STRBui:
+      return AArch64::STURBi;
+    case AArch64::STRHui:
+      return AArch64::STURHi;
+    case AArch64::STRSui:
+      return AArch64::STURSi;
+    case AArch64::STRDui:
+      return AArch64::STURDi;
+    case AArch64::STRQui:
+      return AArch64::STURQi;
+    case AArch64::STRBBui:
+      return AArch64::STURBBi;
+    case AArch64::STRHHui:
+      return AArch64::STURHHi;
+    }
   }
-}
 
-bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    assert((!MI.isCall() || !MI.isReturn()) &&
-           "Unexpected instruction - was a new tail call opcode introduced?");
-    return false;
-  case AArch64::TCRETURNdi:
-  case AArch64::TCRETURNri:
-  case AArch64::TCRETURNrix16x17:
-  case AArch64::TCRETURNrix17:
-  case AArch64::TCRETURNrinotx16:
-  case AArch64::TCRETURNriALL:
-  case AArch64::AUTH_TCRETURN:
-  case AArch64::AUTH_TCRETURN_BTI:
-    return true;
+  unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
+    switch (Opc) {
+    default:
+      llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
+    case AArch64::ADDG:
+    case AArch64::LDAPURBi:
+    case AArch64::LDAPURHi:
+    case AArch64::LDAPURi:
+    case AArch64::LDAPURSBWi:
+    case AArch64::LDAPURSBXi:
+    case AArch64::LDAPURSHWi:
+    case AArch64::LDAPURSHXi:
+    case AArch64::LDAPURSWi:
+    case AArch64::LDAPURXi:
+    case AArch64::LDR_PPXI:
+    case AArch64::LDR_PXI:
+    case AArch64::LDR_ZXI:
+    case AArch64::LDR_ZZXI:
+    case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::LDR_ZZZXI:
+    case AArch64::LDR_ZZZZXI:
+    case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::LDRBBui:
+    case AArch64::LDRBui:
+    case AArch64::LDRDui:
+    case AArch64::LDRHHui:
+    case AArch64::LDRHui:
+    case AArch64::LDRQui:
+    case AArch64::LDRSBWui:
+    case AArch64::LDRSBXui:
+    case AArch64::LDRSHWui:
+    case AArch64::LDRSHXui:
+    case AArch64::LDRSui:
+    case AArch64::LDRSWui:
+    case AArch64::LDRWui:
+    case AArch64::LDRXui:
+    case AArch64::LDURBBi:
+    case AArch64::LDURBi:
+    case AArch64::LDURDi:
+    case AArch64::LDURHHi:
+    case AArch64::LDURHi:
+    case AArch64::LDURQi:
+    case AArch64::LDURSBWi:
+    case AArch64::LDURSBXi:
+    case AArch64::LDURSHWi:
+    case AArch64::LDURSHXi:
+    case AArch64::LDURSi:
+    case AArch64::LDURSWi:
+    case AArch64::LDURWi:
+    case AArch64::LDURXi:
+    case AArch64::PRFMui:
+    case AArch64::PRFUMi:
+    case AArch64::ST2Gi:
+    case AArch64::STGi:
+    case AArch64::STLURBi:
+    case AArch64::STLURHi:
+    case AArch64::STLURWi:
+    case AArch64::STLURXi:
+    case AArch64::StoreSwiftAsyncContext:
+    case AArch64::STR_PPXI:
+    case AArch64::STR_PXI:
+    case AArch64::STR_ZXI:
+    case AArch64::STR_ZZXI:
+    case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::STR_ZZZXI:
+    case AArch64::STR_ZZZZXI:
+    case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::STRBBui:
+    case AArch64::STRBui:
+    case AArch64::STRDui:
+    case AArch64::STRHHui:
+    case AArch64::STRHui:
+    case AArch64::STRQui:
+    case AArch64::STRSui:
+    case AArch64::STRWui:
+    case AArch64::STRXui:
+    case AArch64::STURBBi:
+    case AArch64::STURBi:
+    case AArch64::STURDi:
+    case AArch64::STURHHi:
+    case AArch64::STURHi:
+    case AArch64::STURQi:
+    case AArch64::STURSi:
+    case AArch64::STURWi:
+    case AArch64::STURXi:
+    case AArch64::STZ2Gi:
+    case AArch64::STZGi:
+    case AArch64::TAGPstack:
+    case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+    case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+      return 2;
+    case AArch64::LD1B_D_IMM:
+    case AArch64::LD1B_H_IMM:
+    case AArch64::LD1B_IMM:
+    case AArch64::LD1B_S_IMM:
+    case AArch64::LD1D_IMM:
+    case AArch64::LD1H_D_IMM:
+    case AArch64::LD1H_IMM:
+    case AArch64::LD1H_S_IMM:
+    case AArch64::LD1RB_D_IMM:
+    case AArch64::LD1RB_H_IMM:
+    case AArch64::LD1RB_IMM:
+    case AArch64::LD1RB_S_IMM:
+    case AArch64::LD1RD_IMM:
+    case AArch64::LD1RH_D_IMM:
+    case AArch64::LD1RH_IMM:
+    case AArch64::LD1RH_S_IMM:
+    case AArch64::LD1RSB_D_IMM:
+    case AArch64::LD1RSB_H_IMM:
+    case AArch64::LD1RSB_S_IMM:
+    case AArch64::LD1RSH_D_IMM:
+    case AArch64::LD1RSH_S_IMM:
+    case AArch64::LD1RSW_IMM:
+    case AArch64::LD1RW_D_IMM:
+    case AArch64::LD1RW_IMM:
+    case AArch64::LD1SB_D_IMM:
+    case AArch64::LD1SB_H_IMM:
+    case AArch64::LD1SB_S_IMM:
+    case AArch64::LD1SH_D_IMM:
+    case AArch64::LD1SH_S_IMM:
+    case AArch64::LD1SW_D_IMM:
+    case AArch64::LD1W_D_IMM:
+    case AArch64::LD1W_IMM:
+    case AArch64::LD2B_IMM:
+    case AArch64::LD2D_IMM:
+    case AArch64::LD2H_IMM:
+    case AArch64::LD2W_IMM:
+    case AArch64::LD3B_IMM:
+    case AArch64::LD3D_IMM:
+    case AArch64::LD3H_IMM:
+    case AArch64::LD3W_IMM:
+    case AArch64::LD4B_IMM:
+    case AArch64::LD4D_IMM:
+    case AArch64::LD4H_IMM:
+    case AArch64::LD4W_IMM:
+    case AArch64::LDG:
+    case AArch64::LDNF1B_D_IMM:
+    case AArch64::LDNF1B_H_IMM:
+    case AArch64::LDNF1B_IMM:
+    case AArch64::LDNF1B_S_IMM:
+    case AArch64::LDNF1D_IMM:
+    case AArch64::LDNF1H_D_IMM:
+    case AArch64::LDNF1H_IMM:
+    case AArch64::LDNF1H_S_IMM:
+    case AArch64::LDNF1SB_D_IMM:
+    case AArch64::LDNF1SB_H_IMM:
+    case AArch64::LDNF1SB_S_IMM:
+    case AArch64::LDNF1SH_D_IMM:
+    case AArch64::LDNF1SH_S_IMM:
+    case AArch64::LDNF1SW_D_IMM:
+    case AArch64::LDNF1W_D_IMM:
+    case AArch64::LDNF1W_IMM:
+    case AArch64::LDNPDi:
+    case AArch64::LDNPQi:
+    case AArch64::LDNPSi:
+    case AArch64::LDNPWi:
+    case AArch64::LDNPXi:
+    case AArch64::LDNT1B_ZRI:
+    case AArch64::LDNT1D_ZRI:
+    case AArch64::LDNT1H_ZRI:
+    case AArch64::LDNT1W_ZRI:
+    case AArch64::LDPDi:
+    case AArch64::LDPQi:
+    case AArch64::LDPSi:
+    case AArch64::LDPWi:
+    case AArch64::LDPXi:
+    case AArch64::LDRBBpost:
+    case AArch64::LDRBBpre:
+    case AArch64::LDRBpost:
+    case AArch64::LDRBpre:
+    case AArch64::LDRDpost:
+    case AArch64::LDRDpre:
+    case AArch64::LDRHHpost:
+    case AArch64::LDRHHpre:
+    case AArch64::LDRHpost:
+    case AArch64::LDRHpre:
+    case AArch64::LDRQpost:
+    case AArch64::LDRQpre:
+    case AArch64::LDRSpost:
+    case AArch64::LDRSpre:
+    case AArch64::LDRWpost:
+    case AArch64::LDRWpre:
+    case AArch64::LDRXpost:
+    case AArch64::LDRXpre:
+    case AArch64::ST1B_D_IMM:
+    case AArch64::ST1B_H_IMM:
+    case AArch64::ST1B_IMM:
+    case AArch64::ST1B_S_IMM:
+    case AArch64::ST1D_IMM:
+    case AArch64::ST1H_D_IMM:
+    case AArch64::ST1H_IMM:
+    case AArch64::ST1H_S_IMM:
+    case AArch64::ST1W_D_IMM:
+    case AArch64::ST1W_IMM:
+    case AArch64::ST2B_IMM:
+    case AArch64::ST2D_IMM:
+    case AArch64::ST2H_IMM:
+    case AArch64::ST2W_IMM:
+    case AArch64::ST3B_IMM:
+    case AArch64::ST3D_IMM:
+    case AArch64::ST3H_IMM:
+    case AArch64::ST3W_IMM:
+    case AArch64::ST4B_IMM:
+    case AArch64::ST4D_IMM:
+    case AArch64::ST4H_IMM:
+    case AArch64::ST4W_IMM:
+    case AArch64::STGPi:
+    case AArch64::STGPreIndex:
+    case AArch64::STZGPreIndex:
+    case AArch64::ST2GPreIndex:
+    case AArch64::STZ2GPreIndex:
+    case AArch64::STGPostIndex:
+    case AArch64::STZGPostIndex:
+    case AArch64::ST2GPostIndex:
+    case AArch64::STZ2GPostIndex:
+    case AArch64::STNPDi:
+    case AArch64::STNPQi:
+    case AArch64::STNPSi:
+    case AArch64::STNPWi:
+    case AArch64::STNPXi:
+    case AArch64::STNT1B_ZRI:
+    case AArch64::STNT1D_ZRI:
+    case AArch64::STNT1H_ZRI:
+    case AArch64::STNT1W_ZRI:
+    case AArch64::STPDi:
+    case AArch64::STPQi:
+    case AArch64::STPSi:
+    case AArch64::STPWi:
+    case AArch64::STPXi:
+    case AArch64::STRBBpost:
+    case AArch64::STRBBpre:
+    case AArch64::STRBpost:
+    case AArch64::STRBpre:
+    case AArch64::STRDpost:
+    case AArch64::STRDpre:
+    case AArch64::STRHHpost:
+    case AArch64::STRHHpre:
+    case AArch64::STRHpost:
+    case AArch64::STRHpre:
+    case AArch64::STRQpost:
+    case AArch64::STRQpre:
+    case AArch64::STRSpost:
+    case AArch64::STRSpre:
+    case AArch64::STRWpost:
+    case AArch64::STRWpre:
+    case AArch64::STRXpost:
+    case AArch64::STRXpre:
+      return 3;
+    case AArch64::LDPDpost:
+    case AArch64::LDPDpre:
+    case AArch64::LDPQpost:
+    case AArch64::LDPQpre:
+    case AArch64::LDPSpost:
+    case AArch64::LDPSpre:
+    case AArch64::LDPWpost:
+    case AArch64::LDPWpre:
+    case AArch64::LDPXpost:
+    case AArch64::LDPXpre:
+    case AArch64::STGPpre:
+    case AArch64::STGPpost:
+    case AArch64::STPDpost:
+    case AArch64::STPDpre:
+    case AArch64::STPQpost:
+    case AArch64::STPQpre:
+    case AArch64::STPSpost:
+    case AArch64::STPSpre:
+    case AArch64::STPWpost:
+    case AArch64::STPWpre:
+    case AArch64::STPXpost:
+    case AArch64::STPXpre:
+      return 4;
+    }
   }
-}
 
-unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Opcode has no flag setting equivalent!");
-  // 32-bit cases:
-  case AArch64::ADDWri:
-    return AArch64::ADDSWri;
-  case AArch64::ADDWrr:
-    return AArch64::ADDSWrr;
-  case AArch64::ADDWrs:
-    return AArch64::ADDSWrs;
-  case AArch64::ADDWrx:
-    return AArch64::ADDSWrx;
-  case AArch64::ANDWri:
-    return AArch64::ANDSWri;
-  case AArch64::ANDWrr:
-    return AArch64::ANDSWrr;
-  case AArch64::ANDWrs:
-    return AArch64::ANDSWrs;
-  case AArch64::BICWrr:
-    return AArch64::BICSWrr;
-  case AArch64::BICWrs:
-    return AArch64::BICSWrs;
-  case AArch64::SUBWri:
-    return AArch64::SUBSWri;
-  case AArch64::SUBWrr:
-    return AArch64::SUBSWrr;
-  case AArch64::SUBWrs:
-    return AArch64::SUBSWrs;
-  case AArch64::SUBWrx:
-    return AArch64::SUBSWrx;
-  // 64-bit cases:
-  case AArch64::ADDXri:
-    return AArch64::ADDSXri;
-  case AArch64::ADDXrr:
-    return AArch64::ADDSXrr;
-  case AArch64::ADDXrs:
-    return AArch64::ADDSXrs;
-  case AArch64::ADDXrx:
-    return AArch64::ADDSXrx;
-  case AArch64::ANDXri:
-    return AArch64::ANDSXri;
-  case AArch64::ANDXrr:
-    return AArch64::ANDSXrr;
-  case AArch64::ANDXrs:
-    return AArch64::ANDSXrs;
-  case AArch64::BICXrr:
-    return AArch64::BICSXrr;
-  case AArch64::BICXrs:
-    return AArch64::BICSXrs;
-  case AArch64::SUBXri:
-    return AArch64::SUBSXri;
-  case AArch64::SUBXrr:
-    return AArch64::SUBSXrr;
-  case AArch64::SUBXrs:
-    return AArch64::SUBSXrs;
-  case AArch64::SUBXrx:
-    return AArch64::SUBSXrx;
-  // SVE instructions:
-  case AArch64::AND_PPzPP:
-    return AArch64::ANDS_PPzPP;
-  case AArch64::BIC_PPzPP:
-    return AArch64::BICS_PPzPP;
-  case AArch64::EOR_PPzPP:
-    return AArch64::EORS_PPzPP;
-  case AArch64::NAND_PPzPP:
-    return AArch64::NANDS_PPzPP;
-  case AArch64::NOR_PPzPP:
-    return AArch64::NORS_PPzPP;
-  case AArch64::ORN_PPzPP:
-    return AArch64::ORNS_PPzPP;
-  case AArch64::ORR_PPzPP:
-    return AArch64::ORRS_PPzPP;
-  case AArch64::BRKA_PPzP:
-    return AArch64::BRKAS_PPzP;
-  case AArch64::BRKPA_PPzPP:
-    return AArch64::BRKPAS_PPzPP;
-  case AArch64::BRKB_PPzP:
-    return AArch64::BRKBS_PPzP;
-  case AArch64::BRKPB_PPzPP:
-    return AArch64::BRKPBS_PPzPP;
-  case AArch64::BRKN_PPzP:
-    return AArch64::BRKNS_PPzP;
-  case AArch64::RDFFR_PPz:
-    return AArch64::RDFFRS_PPz;
-  case AArch64::PTRUE_B:
-    return AArch64::PTRUES_B;
+  bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      return false;
+    // Scaled instructions.
+    case AArch64::STRSui:
+    case AArch64::STRDui:
+    case AArch64::STRQui:
+    case AArch64::STRXui:
+    case AArch64::STRWui:
+    case AArch64::LDRSui:
+    case AArch64::LDRDui:
+    case AArch64::LDRQui:
+    case AArch64::LDRXui:
+    case AArch64::LDRWui:
+    case AArch64::LDRSWui:
+    // Unscaled instructions.
+    case AArch64::STURSi:
+    case AArch64::STRSpre:
+    case AArch64::STURDi:
+    case AArch64::STRDpre:
+    case AArch64::STURQi:
+    case AArch64::STRQpre:
+    case AArch64::STURWi:
+    case AArch64::STRWpre:
+    case AArch64::STURXi:
+    case AArch64::STRXpre:
+    case AArch64::LDURSi:
+    case AArch64::LDRSpre:
+    case AArch64::LDURDi:
+    case AArch64::LDRDpre:
+    case AArch64::LDURQi:
+    case AArch64::LDRQpre:
+    case AArch64::LDURWi:
+    case AArch64::LDRWpre:
+    case AArch64::LDURXi:
+    case AArch64::LDRXpre:
+    case AArch64::LDURSWi:
+    case AArch64::LDRSWpre:
+    // SVE instructions.
+    case AArch64::LDR_ZXI:
+    case AArch64::STR_ZXI:
+      return true;
+    }
   }
-}
 
-// Is this a candidate for ld/st merging or pairing?  For example, we don't
-// touch volatiles or load/stores that have a hint to avoid pair formation.
-bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
+  bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      assert((!MI.isCall() || !MI.isReturn()) &&
+             "Unexpected instruction - was a new tail call opcode introduced?");
+      return false;
+    case AArch64::TCRETURNdi:
+    case AArch64::TCRETURNri:
+    case AArch64::TCRETURNrix16x17:
+    case AArch64::TCRETURNrix17:
+    case AArch64::TCRETURNrinotx16:
+    case AArch64::TCRETURNriALL:
+    case AArch64::AUTH_TCRETURN:
+    case AArch64::AUTH_TCRETURN_BTI:
+      return true;
+    }
+  }
 
-  bool IsPreLdSt = isPreLdSt(MI);
+  unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
+    switch (Opc) {
+    default:
+      llvm_unreachable("Opcode has no flag setting equivalent!");
+    // 32-bit cases:
+    case AArch64::ADDWri:
+      return AArch64::ADDSWri;
+    case AArch64::ADDWrr:
+      return AArch64::ADDSWrr;
+    case AArch64::ADDWrs:
+      return AArch64::ADDSWrs;
+    case AArch64::ADDWrx:
+      return AArch64::ADDSWrx;
+    case AArch64::ANDWri:
+      return AArch64::ANDSWri;
+    case AArch64::ANDWrr:
+      return AArch64::ANDSWrr;
+    case AArch64::ANDWrs:
+      return AArch64::ANDSWrs;
+    case AArch64::BICWrr:
+      return AArch64::BICSWrr;
+    case AArch64::BICWrs:
+      return AArch64::BICSWrs;
+    case AArch64::SUBWri:
+      return AArch64::SUBSWri;
+    case AArch64::SUBWrr:
+      return AArch64::SUBSWrr;
+    case AArch64::SUBWrs:
+      return AArch64::SUBSWrs;
+    case AArch64::SUBWrx:
+      return AArch64::SUBSWrx;
+    // 64-bit cases:
+    case AArch64::ADDXri:
+      return AArch64::ADDSXri;
+    case AArch64::ADDXrr:
+      return AArch64::ADDSXrr;
+    case AArch64::ADDXrs:
+      return AArch64::ADDSXrs;
+    case AArch64::ADDXrx:
+      return AArch64::ADDSXrx;
+    case AArch64::ANDXri:
+      return AArch64::ANDSXri;
+    case AArch64::ANDXrr:
+      return AArch64::ANDSXrr;
+    case AArch64::ANDXrs:
+      return AArch64::ANDSXrs;
+    case AArch64::BICXrr:
+      return AArch64::BICSXrr;
+    case AArch64::BICXrs:
+      return AArch64::BICSXrs;
+    case AArch64::SUBXri:
+      return AArch64::SUBSXri;
+    case AArch64::SUBXrr:
+      return AArch64::SUBSXrr;
+    case AArch64::SUBXrs:
+      return AArch64::SUBSXrs;
+    case AArch64::SUBXrx:
+      return AArch64::SUBSXrx;
+    // SVE instructions:
+    case AArch64::AND_PPzPP:
+      return AArch64::ANDS_PPzPP;
+    case AArch64::BIC_PPzPP:
+      return AArch64::BICS_PPzPP;
+    case AArch64::EOR_PPzPP:
+      return AArch64::EORS_PPzPP;
+    case AArch64::NAND_PPzPP:
+      return AArch64::NANDS_PPzPP;
+    case AArch64::NOR_PPzPP:
+      return AArch64::NORS_PPzPP;
+    case AArch64::ORN_PPzPP:
+      return AArch64::ORNS_PPzPP;
+    case AArch64::ORR_PPzPP:
+      return AArch64::ORRS_PPzPP;
+    case AArch64::BRKA_PPzP:
+      return AArch64::BRKAS_PPzP;
+    case AArch64::BRKPA_PPzPP:
+      return AArch64::BRKPAS_PPzPP;
+    case AArch64::BRKB_PPzP:
+      return AArch64::BRKBS_PPzP;
+    case AArch64::BRKPB_PPzPP:
+      return AArch64::BRKPBS_PPzPP;
+    case AArch64::BRKN_PPzP:
+      return AArch64::BRKNS_PPzP;
+    case AArch64::RDFFR_PPz:
+      return AArch64::RDFFRS_PPz;
+    case AArch64::PTRUE_B:
+      return AArch64::PTRUES_B;
+    }
+  }
 
-  // If this is a volatile load/store, don't mess with it.
-  if (MI.hasOrderedMemoryRef())
-    return false;
+  // Is this a candidate for ld/st merging or pairing?  For example, we don't
+  // touch volatiles or load/stores that have a hint to avoid pair formation.
+  bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI)
+      const {
+
+    bool IsPreLdSt = isPreLdSt(MI);
 
-  // Make sure this is a reg/fi+imm (as opposed to an address reloc).
-  // For Pre-inc LD/ST, the operand is shifted by one.
-  assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
-          MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
-         "Expected a reg or frame index operand.");
+    // If this is a volatile load/store, don't mess with it.
+    if (MI.hasOrderedMemoryRef())
+      return false;
 
-  // For Pre-indexed addressing quadword instructions, the third operand is the
-  // immediate value.
-  bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
+    // Make sure this is a reg/fi+imm (as opposed to an address reloc).
+    // For Pre-inc LD/ST, the operand is shifted by one.
+    assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
+            MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
+           "Expected a reg or frame index operand.");
 
-  if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
-    return false;
+    // For Pre-indexed addressing quadword instructions, the third operand is
+    // the immediate value.
+    bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
 
-  // Can't merge/pair if the instruction modifies the base register.
-  // e.g., ldr x0, [x0]
-  // This case will never occur with an FI base.
-  // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
-  // STR<S,D,Q,W,X>pre, it can be merged.
-  // For example:
-  //   ldr q0, [x11, #32]!
-  //   ldr q1, [x11, #16]
-  //   to
-  //   ldp q0, q1, [x11, #32]!
-  if (MI.getOperand(1).isReg() && !IsPreLdSt) {
-    Register BaseReg = MI.getOperand(1).getReg();
-    const TargetRegisterInfo *TRI = &getRegisterInfo();
-    if (MI.modifiesRegister(BaseReg, TRI))
+    if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
+      return false;
+
+    // Can't merge/pair if the instruction modifies the base register.
+    // e.g., ldr x0, [x0]
+    // This case will never occur with an FI base.
+    // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
+    // STR<S,D,Q,W,X>pre, it can be merged.
+    // For example:
+    //   ldr q0, [x11, #32]!
+    //   ldr q1, [x11, #16]
+    //   to
+    //   ldp q0, q1, [x11, #32]!
+    if (MI.getOperand(1).isReg() && !IsPreLdSt) {
+      Register BaseReg = MI.getOperand(1).getReg();
+      const TargetRegisterInfo *TRI = &getRegisterInfo();
+      if (MI.modifiesRegister(BaseReg, TRI))
+        return false;
+    }
+
+    // Pairing SVE fills/spills is only valid for little-endian targets that
+    // implement VLS 128.
+    switch (MI.getOpcode()) {
+    default:
+      break;
+    case AArch64::LDR_ZXI:
+    case AArch64::STR_ZXI:
+      if (!Subtarget.isLittleEndian() ||
+          Subtarget.getSVEVectorSizeInBits() != 128)
+        return false;
+    }
+
+    // Check if this load/store has a hint to avoid pair formation.
+    // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
+    if (isLdStPairSuppressed(MI))
       return false;
+
+    // Do not pair any callee-save store/reload instructions in the
+    // prologue/epilogue if the CFI information encoded the operations as
+    // separate instructions, as that will cause the size of the actual prologue
+    // to mismatch with the prologue size recorded in the Windows CFI.
+    const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
+    bool NeedsWinCFI = MAI->usesWindowsCFI() &&
+                       MI.getMF()->getFunction().needsUnwindTableEntry();
+    if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
+                        MI.getFlag(MachineInstr::FrameDestroy)))
+      return false;
+
+    // On some CPUs quad load/store pairs are slower than two single
+    // load/stores.
+    if (Subtarget.isPaired128Slow()) {
+      switch (MI.getOpcode()) {
+      default:
+        break;
+      case AArch64::LDURQi:
+      case AArch64::STURQi:
+      case AArch64::LDRQui:
+      case AArch64::STRQui:
+        return false;
+      }
+    }
+
+    return true;
   }
 
-  // Pairing SVE fills/spills is only valid for little-endian targets that
-  // implement VLS 128.
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::LDR_ZXI:
-  case AArch64::STR_ZXI:
-    if (!Subtarget.isLittleEndian() ||
-        Subtarget.getSVEVectorSizeInBits() != 128)
+  bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, LocationSize &Width,
+      const TargetRegisterInfo *TRI) const {
+    if (!LdSt.mayLoadOrStore())
+      return false;
+
+    const MachineOperand *BaseOp;
+    TypeSize WidthN(0, false);
+    if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
+                                      WidthN, TRI))
       return false;
+    // The maximum vscale is 16 under AArch64, return the maximal extent for the
+    // vector.
+    Width = LocationSize::precise(WidthN);
+    BaseOps.push_back(BaseOp);
+    return true;
   }
 
-  // Check if this load/store has a hint to avoid pair formation.
-  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
-  if (isLdStPairSuppressed(MI))
-    return false;
+  std::optional<ExtAddrMode> AArch64InstrInfo::getAddrModeFromMemoryOp(
+      const MachineInstr &MemI, const TargetRegisterInfo *TRI) const {
+    const MachineOperand *Base; // Filled with the base operand of MI.
+    int64_t Offset;             // Filled with the offset of MI.
+    bool OffsetIsScalable;
+    if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
+      return std::nullopt;
 
-  // Do not pair any callee-save store/reload instructions in the
-  // prologue/epilogue if the CFI information encoded the operations as separate
-  // instructions, as that will cause the size of the actual prologue to mismatch
-  // with the prologue size recorded in the Windows CFI.
-  const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
-  bool NeedsWinCFI = MAI->usesWindowsCFI() &&
-                     MI.getMF()->getFunction().needsUnwindTableEntry();
-  if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
-                      MI.getFlag(MachineInstr::FrameDestroy)))
-    return false;
+    if (!Base->isReg())
+      return std::nullopt;
+    ExtAddrMode AM;
+    AM.BaseReg = Base->getReg();
+    AM.Displacement = Offset;
+    AM.ScaledReg = 0;
+    AM.Scale = 0;
+    return AM;
+  }
 
-  // On some CPUs quad load/store pairs are slower than two single load/stores.
-  if (Subtarget.isPaired128Slow()) {
-    switch (MI.getOpcode()) {
+  bool AArch64InstrInfo::canFoldIntoAddrMode(
+      const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI,
+      ExtAddrMode &AM) const {
+    // Filter out instructions into which we cannot fold.
+    unsigned NumBytes;
+    int64_t OffsetScale = 1;
+    switch (MemI.getOpcode()) {
     default:
-      break;
+      return false;
+
     case AArch64::LDURQi:
     case AArch64::STURQi:
+      NumBytes = 16;
+      break;
+
+    case AArch64::LDURDi:
+    case AArch64::STURDi:
+    case AArch64::LDURXi:
+    case AArch64::STURXi:
+      NumBytes = 8;
+      break;
+
+    case AArch64::LDURWi:
+    case AArch64::LDURSWi:
+    case AArch64::STURWi:
+      NumBytes = 4;
+      break;
+
+    case AArch64::LDURHi:
+    case AArch64::STURHi:
+    case AArch64::LDURHHi:
+    case AArch64::STURHHi:
+    case AArch64::LDURSHXi:
+    case AArch64::LDURSHWi:
+      NumBytes = 2;
+      break;
+
+    case AArch64::LDRBroX:
+    case AArch64::LDRBBroX:
+    case AArch64::LDRSBXroX:
+    case AArch64::LDRSBWroX:
+    case AArch64::STRBroX:
+    case AArch64::STRBBroX:
+    case AArch64::LDURBi:
+    case AArch64::LDURBBi:
+    case AArch64::LDURSBXi:
+    case AArch64::LDURSBWi:
+    case AArch64::STURBi:
+    case AArch64::STURBBi:
+    case AArch64::LDRBui:
+    case AArch64::LDRBBui:
+    case AArch64::LDRSBXui:
+    case AArch64::LDRSBWui:
+    case AArch64::STRBui:
+    case AArch64::STRBBui:
+      NumBytes = 1;
+      break;
+
+    case AArch64::LDRQroX:
+    case AArch64::STRQroX:
     case AArch64::LDRQui:
     case AArch64::STRQui:
-      return false;
-    }
-  }
+      NumBytes = 16;
+      OffsetScale = 16;
+      break;
 
-  return true;
-}
+    case AArch64::LDRDroX:
+    case AArch64::STRDroX:
+    case AArch64::LDRXroX:
+    case AArch64::STRXroX:
+    case AArch64::LDRDui:
+    case AArch64::STRDui:
+    case AArch64::LDRXui:
+    case AArch64::STRXui:
+      NumBytes = 8;
+      OffsetScale = 8;
+      break;
 
-bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
-    const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
-    const TargetRegisterInfo *TRI) const {
-  if (!LdSt.mayLoadOrStore())
-    return false;
+    case AArch64::LDRWroX:
+    case AArch64::LDRSWroX:
+    case AArch64::STRWroX:
+    case AArch64::LDRWui:
+    case AArch64::LDRSWui:
+    case AArch64::STRWui:
+      NumBytes = 4;
+      OffsetScale = 4;
+      break;
 
-  const MachineOperand *BaseOp;
-  TypeSize WidthN(0, false);
-  if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
-                                    WidthN, TRI))
-    return false;
-  // The maximum vscale is 16 under AArch64, return the maximal extent for the
-  // vector.
-  Width = LocationSize::precise(WidthN);
-  BaseOps.push_back(BaseOp);
-  return true;
-}
+    case AArch64::LDRHroX:
+    case AArch64::STRHroX:
+    case AArch64::LDRHHroX:
+    case AArch64::STRHHroX:
+    case AArch64::LDRSHXroX:
+    case AArch64::LDRSHWroX:
+    case AArch64::LDRHui:
+    case AArch64::STRHui:
+    case AArch64::LDRHHui:
+    case AArch64::STRHHui:
+    case AArch64::LDRSHXui:
+    case AArch64::LDRSHWui:
+      NumBytes = 2;
+      OffsetScale = 2;
+      break;
+    }
 
-std::optional<ExtAddrMode>
-AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
-                                          const TargetRegisterInfo *TRI) const {
-  const MachineOperand *Base; // Filled with the base operand of MI.
-  int64_t Offset;             // Filled with the offset of MI.
-  bool OffsetIsScalable;
-  if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
-    return std::nullopt;
+    // Check the fold operand is not the loaded/stored value.
+    const MachineOperand &BaseRegOp = MemI.getOperand(0);
+    if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
+      return false;
 
-  if (!Base->isReg())
-    return std::nullopt;
-  ExtAddrMode AM;
-  AM.BaseReg = Base->getReg();
-  AM.Displacement = Offset;
-  AM.ScaledReg = 0;
-  AM.Scale = 0;
-  return AM;
-}
-
-bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
-                                           Register Reg,
-                                           const MachineInstr &AddrI,
-                                           ExtAddrMode &AM) const {
-  // Filter out instructions into which we cannot fold.
-  unsigned NumBytes;
-  int64_t OffsetScale = 1;
-  switch (MemI.getOpcode()) {
-  default:
-    return false;
+    // Handle memory instructions with a [Reg, Reg] addressing mode.
+    if (MemI.getOperand(2).isReg()) {
+      // Bail if the addressing mode already includes extension of the offset
+      // register.
+      if (MemI.getOperand(3).getImm())
+        return false;
 
-  case AArch64::LDURQi:
-  case AArch64::STURQi:
-    NumBytes = 16;
-    break;
+      // Check if we actually have a scaled offset.
+      if (MemI.getOperand(4).getImm() == 0)
+        OffsetScale = 1;
 
-  case AArch64::LDURDi:
-  case AArch64::STURDi:
-  case AArch64::LDURXi:
-  case AArch64::STURXi:
-    NumBytes = 8;
-    break;
+      // If the address instructions is folded into the base register, then the
+      // addressing mode must not have a scale. Then we can swap the base and
+      // the scaled registers.
+      if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
+        return false;
 
-  case AArch64::LDURWi:
-  case AArch64::LDURSWi:
-  case AArch64::STURWi:
-    NumBytes = 4;
-    break;
+      switch (AddrI.getOpcode()) {
+      default:
+        return false;
 
-  case AArch64::LDURHi:
-  case AArch64::STURHi:
-  case AArch64::LDURHHi:
-  case AArch64::STURHHi:
-  case AArch64::LDURSHXi:
-  case AArch64::LDURSHWi:
-    NumBytes = 2;
-    break;
+      case AArch64::SBFMXri:
+        // sxtw Xa, Wm
+        // ldr Xd, [Xn, Xa, lsl #N]
+        // ->
+        // ldr Xd, [Xn, Wm, sxtw #N]
+        if (AddrI.getOperand(2).getImm() != 0 ||
+            AddrI.getOperand(3).getImm() != 31)
+          return false;
 
-  case AArch64::LDRBroX:
-  case AArch64::LDRBBroX:
-  case AArch64::LDRSBXroX:
-  case AArch64::LDRSBWroX:
-  case AArch64::STRBroX:
-  case AArch64::STRBBroX:
-  case AArch64::LDURBi:
-  case AArch64::LDURBBi:
-  case AArch64::LDURSBXi:
-  case AArch64::LDURSBWi:
-  case AArch64::STURBi:
-  case AArch64::STURBBi:
-  case AArch64::LDRBui:
-  case AArch64::LDRBBui:
-  case AArch64::LDRSBXui:
-  case AArch64::LDRSBWui:
-  case AArch64::STRBui:
-  case AArch64::STRBBui:
-    NumBytes = 1;
-    break;
+        AM.BaseReg = MemI.getOperand(1).getReg();
+        if (AM.BaseReg == Reg)
+          AM.BaseReg = MemI.getOperand(2).getReg();
+        AM.ScaledReg = AddrI.getOperand(1).getReg();
+        AM.Scale = OffsetScale;
+        AM.Displacement = 0;
+        AM.Form = ExtAddrMode::Formula::SExtScaledReg;
+        return true;
 
-  case AArch64::LDRQroX:
-  case AArch64::STRQroX:
-  case AArch64::LDRQui:
-  case AArch64::STRQui:
-    NumBytes = 16;
-    OffsetScale = 16;
-    break;
+      case TargetOpcode::SUBREG_TO_REG: {
+        // mov Wa, Wm
+        // ldr Xd, [Xn, Xa, lsl #N]
+        // ->
+        // ldr Xd, [Xn, Wm, uxtw #N]
 
-  case AArch64::LDRDroX:
-  case AArch64::STRDroX:
-  case AArch64::LDRXroX:
-  case AArch64::STRXroX:
-  case AArch64::LDRDui:
-  case AArch64::STRDui:
-  case AArch64::LDRXui:
-  case AArch64::STRXui:
-    NumBytes = 8;
-    OffsetScale = 8;
-    break;
+        // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
+        if (AddrI.getOperand(1).getImm() != 0 ||
+            AddrI.getOperand(3).getImm() != AArch64::sub_32)
+          return false;
 
-  case AArch64::LDRWroX:
-  case AArch64::LDRSWroX:
-  case AArch64::STRWroX:
-  case AArch64::LDRWui:
-  case AArch64::LDRSWui:
-  case AArch64::STRWui:
-    NumBytes = 4;
-    OffsetScale = 4;
-    break;
+        const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
+        Register OffsetReg = AddrI.getOperand(2).getReg();
+        if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
+          return false;
 
-  case AArch64::LDRHroX:
-  case AArch64::STRHroX:
-  case AArch64::LDRHHroX:
-  case AArch64::STRHHroX:
-  case AArch64::LDRSHXroX:
-  case AArch64::LDRSHWroX:
-  case AArch64::LDRHui:
-  case AArch64::STRHui:
-  case AArch64::LDRHHui:
-  case AArch64::STRHHui:
-  case AArch64::LDRSHXui:
-  case AArch64::LDRSHWui:
-    NumBytes = 2;
-    OffsetScale = 2;
-    break;
-  }
+        const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
+        if (DefMI.getOpcode() != AArch64::ORRWrs ||
+            DefMI.getOperand(1).getReg() != AArch64::WZR ||
+            DefMI.getOperand(3).getImm() != 0)
+          return false;
 
-  // Check the fold operand is not the loaded/stored value.
-  const MachineOperand &BaseRegOp = MemI.getOperand(0);
-  if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
-    return false;
+        AM.BaseReg = MemI.getOperand(1).getReg();
+        if (AM.BaseReg == Reg)
+          AM.BaseReg = MemI.getOperand(2).getReg();
+        AM.ScaledReg = DefMI.getOperand(2).getReg();
+        AM.Scale = OffsetScale;
+        AM.Displacement = 0;
+        AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
+        return true;
+      }
+      }
+    }
 
-  // Handle memory instructions with a [Reg, Reg] addressing mode.
-  if (MemI.getOperand(2).isReg()) {
-    // Bail if the addressing mode already includes extension of the offset
-    // register.
-    if (MemI.getOperand(3).getImm())
-      return false;
+    // Handle memory instructions with a [Reg, #Imm] addressing mode.
 
-    // Check if we actually have a scaled offset.
-    if (MemI.getOperand(4).getImm() == 0)
-      OffsetScale = 1;
+    // Check we are not breaking a potential conversion to an LDP.
+    auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
+                                   int64_t NewOffset) -> bool {
+      int64_t MinOffset, MaxOffset;
+      switch (NumBytes) {
+      default:
+        return true;
+      case 4:
+        MinOffset = -256;
+        MaxOffset = 252;
+        break;
+      case 8:
+        MinOffset = -512;
+        MaxOffset = 504;
+        break;
+      case 16:
+        MinOffset = -1024;
+        MaxOffset = 1008;
+        break;
+      }
+      return OldOffset < MinOffset || OldOffset > MaxOffset ||
+             (NewOffset >= MinOffset && NewOffset <= MaxOffset);
+    };
+    auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
+      int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
+      int64_t NewOffset = OldOffset + Disp;
+      if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
+        return false;
+      // If the old offset would fit into an LDP, but the new offset wouldn't,
+      // bail out.
+      if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
+        return false;
+      AM.BaseReg = AddrI.getOperand(1).getReg();
+      AM.ScaledReg = 0;
+      AM.Scale = 0;
+      AM.Displacement = NewOffset;
+      AM.Form = ExtAddrMode::Formula::Basic;
+      return true;
+    };
 
-    // If the address instructions is folded into the base register, then the
-    // addressing mode must not have a scale. Then we can swap the base and the
-    // scaled registers.
-    if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
-      return false;
+    auto canFoldAddRegIntoAddrMode =
+        [&](int64_t Scale,
+            ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
+      if (MemI.getOperand(2).getImm() != 0)
+        return false;
+      if ((unsigned)Scale != Scale)
+        return false;
+      if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
+        return false;
+      AM.BaseReg = AddrI.getOperand(1).getReg();
+      AM.ScaledReg = AddrI.getOperand(2).getReg();
+      AM.Scale = Scale;
+      AM.Displacement = 0;
+      AM.Form = Form;
+      return true;
+    };
+
+    auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
+      unsigned Opcode = MemI.getOpcode();
+      return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
+             Subtarget.isSTRQroSlow();
+    };
 
+    int64_t Disp = 0;
+    const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
     switch (AddrI.getOpcode()) {
     default:
       return false;
 
-    case AArch64::SBFMXri:
-      // sxtw Xa, Wm
-      // ldr Xd, [Xn, Xa, lsl #N]
+    case AArch64::ADDXri:
+      // add Xa, Xn, #N
+      // ldr Xd, [Xa, #M]
       // ->
-      // ldr Xd, [Xn, Wm, sxtw #N]
-      if (AddrI.getOperand(2).getImm() != 0 ||
-          AddrI.getOperand(3).getImm() != 31)
-        return false;
+      // ldr Xd, [Xn, #N'+M]
+      Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+      return canFoldAddSubImmIntoAddrMode(Disp);
 
-      AM.BaseReg = MemI.getOperand(1).getReg();
-      if (AM.BaseReg == Reg)
-        AM.BaseReg = MemI.getOperand(2).getReg();
-      AM.ScaledReg = AddrI.getOperand(1).getReg();
-      AM.Scale = OffsetScale;
-      AM.Displacement = 0;
-      AM.Form = ExtAddrMode::Formula::SExtScaledReg;
-      return true;
+    case AArch64::SUBXri:
+      // sub Xa, Xn, #N
+      // ldr Xd, [Xa, #M]
+      // ->
+      // ldr Xd, [Xn, #N'+M]
+      Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+      return canFoldAddSubImmIntoAddrMode(-Disp);
 
-    case TargetOpcode::SUBREG_TO_REG: {
-      // mov Wa, Wm
-      // ldr Xd, [Xn, Xa, lsl #N]
+    case AArch64::ADDXrs: {
+      // add Xa, Xn, Xm, lsl #N
+      // ldr Xd, [Xa]
       // ->
-      // ldr Xd, [Xn, Wm, uxtw #N]
+      // ldr Xd, [Xn, Xm, lsl #N]
 
-      // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
-      if (AddrI.getOperand(1).getImm() != 0 ||
-          AddrI.getOperand(3).getImm() != AArch64::sub_32)
+      // Don't fold the add if the result would be slower, unless optimising for
+      // size.
+      unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
+      if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
         return false;
+      Shift = AArch64_AM::getShiftValue(Shift);
+      if (!OptSize) {
+        if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
+          return false;
+        if (avoidSlowSTRQ(MemI))
+          return false;
+      }
+      return canFoldAddRegIntoAddrMode(1ULL << Shift);
+    }
+
+    case AArch64::ADDXrr:
+      // add Xa, Xn, Xm
+      // ldr Xd, [Xa]
+      // ->
+      // ldr Xd, [Xn, Xm, lsl #0]
 
-      const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
-      Register OffsetReg = AddrI.getOperand(2).getReg();
-      if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
+      // Don't fold the add if the result would be slower, unless optimising for
+      // size.
+      if (!OptSize && avoidSlowSTRQ(MemI))
         return false;
+      return canFoldAddRegIntoAddrMode(1);
 
-      const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
-      if (DefMI.getOpcode() != AArch64::ORRWrs ||
-          DefMI.getOperand(1).getReg() != AArch64::WZR ||
-          DefMI.getOperand(3).getImm() != 0)
+    case AArch64::ADDXrx:
+      // add Xa, Xn, Wm, {s,u}xtw #N
+      // ldr Xd, [Xa]
+      // ->
+      // ldr Xd, [Xn, Wm, {s,u}xtw #N]
+
+      // Don't fold the add if the result would be slower, unless optimising for
+      // size.
+      if (!OptSize && avoidSlowSTRQ(MemI))
         return false;
 
-      AM.BaseReg = MemI.getOperand(1).getReg();
-      if (AM.BaseReg == Reg)
-        AM.BaseReg = MemI.getOperand(2).getReg();
-      AM.ScaledReg = DefMI.getOperand(2).getReg();
-      AM.Scale = OffsetScale;
-      AM.Displacement = 0;
-      AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
-      return true;
+      // Can fold only sign-/zero-extend of a word.
+      unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
+      AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
+      if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
+        return false;
+
+      return canFoldAddRegIntoAddrMode(
+          1ULL << AArch64_AM::getArithShiftValue(Imm),
+          (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
+                                       : ExtAddrMode::Formula::ZExtScaledReg);
     }
+  }
+
+  // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+  // return the opcode of an instruction performing the same operation, but
+  // using the [Reg, Reg] addressing mode.
+  static unsigned regOffsetOpcode(unsigned Opcode) {
+    switch (Opcode) {
+    default:
+      llvm_unreachable("Address folding not implemented for instruction");
+
+    case AArch64::LDURQi:
+    case AArch64::LDRQui:
+      return AArch64::LDRQroX;
+    case AArch64::STURQi:
+    case AArch64::STRQui:
+      return AArch64::STRQroX;
+    case AArch64::LDURDi:
+    case AArch64::LDRDui:
+      return AArch64::LDRDroX;
+    case AArch64::STURDi:
+    case AArch64::STRDui:
+      return AArch64::STRDroX;
+    case AArch64::LDURXi:
+    case AArch64::LDRXui:
+      return AArch64::LDRXroX;
+    case AArch64::STURXi:
+    case AArch64::STRXui:
+      return AArch64::STRXroX;
+    case AArch64::LDURWi:
+    case AArch64::LDRWui:
+      return AArch64::LDRWroX;
+    case AArch64::LDURSWi:
+    case AArch64::LDRSWui:
+      return AArch64::LDRSWroX;
+    case AArch64::STURWi:
+    case AArch64::STRWui:
+      return AArch64::STRWroX;
+    case AArch64::LDURHi:
+    case AArch64::LDRHui:
+      return AArch64::LDRHroX;
+    case AArch64::STURHi:
+    case AArch64::STRHui:
+      return AArch64::STRHroX;
+    case AArch64::LDURHHi:
+    case AArch64::LDRHHui:
+      return AArch64::LDRHHroX;
+    case AArch64::STURHHi:
+    case AArch64::STRHHui:
+      return AArch64::STRHHroX;
+    case AArch64::LDURSHXi:
+    case AArch64::LDRSHXui:
+      return AArch64::LDRSHXroX;
+    case AArch64::LDURSHWi:
+    case AArch64::LDRSHWui:
+      return AArch64::LDRSHWroX;
+    case AArch64::LDURBi:
+    case AArch64::LDRBui:
+      return AArch64::LDRBroX;
+    case AArch64::LDURBBi:
+    case AArch64::LDRBBui:
+      return AArch64::LDRBBroX;
+    case AArch64::LDURSBXi:
+    case AArch64::LDRSBXui:
+      return AArch64::LDRSBXroX;
+    case AArch64::LDURSBWi:
+    case AArch64::LDRSBWui:
+      return AArch64::LDRSBWroX;
+    case AArch64::STURBi:
+    case AArch64::STRBui:
+      return AArch64::STRBroX;
+    case AArch64::STURBBi:
+    case AArch64::STRBBui:
+      return AArch64::STRBBroX;
     }
   }
 
-  // Handle memory instructions with a [Reg, #Imm] addressing mode.
+  // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+  // return the opcode of an instruction performing the same operation, but
+  // using the [Reg, #Imm] addressing mode with scaled offset.
+  unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
+    switch (Opcode) {
+    default:
+      llvm_unreachable("Address folding not implemented for instruction");
 
-  // Check we are not breaking a potential conversion to an LDP.
-  auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
-                                 int64_t NewOffset) -> bool {
-    int64_t MinOffset, MaxOffset;
-    switch (NumBytes) {
+    case AArch64::LDURQi:
+      Scale = 16;
+      return AArch64::LDRQui;
+    case AArch64::STURQi:
+      Scale = 16;
+      return AArch64::STRQui;
+    case AArch64::LDURDi:
+      Scale = 8;
+      return AArch64::LDRDui;
+    case AArch64::STURDi:
+      Scale = 8;
+      return AArch64::STRDui;
+    case AArch64::LDURXi:
+      Scale = 8;
+      return AArch64::LDRXui;
+    case AArch64::STURXi:
+      Scale = 8;
+      return AArch64::STRXui;
+    case AArch64::LDURWi:
+      Scale = 4;
+      return AArch64::LDRWui;
+    case AArch64::LDURSWi:
+      Scale = 4;
+      return AArch64::LDRSWui;
+    case AArch64::STURWi:
+      Scale = 4;
+      return AArch64::STRWui;
+    case AArch64::LDURHi:
+      Scale = 2;
+      return AArch64::LDRHui;
+    case AArch64::STURHi:
+      Scale = 2;
+      return AArch64::STRHui;
+    case AArch64::LDURHHi:
+      Scale = 2;
+      return AArch64::LDRHHui;
+    case AArch64::STURHHi:
+      Scale = 2;
+      return AArch64::STRHHui;
+    case AArch64::LDURSHXi:
+      Scale = 2;
+      return AArch64::LDRSHXui;
+    case AArch64::LDURSHWi:
+      Scale = 2;
+      return AArch64::LDRSHWui;
+    case AArch64::LDURBi:
+      Scale = 1;
+      return AArch64::LDRBui;
+    case AArch64::LDURBBi:
+      Scale = 1;
+      return AArch64::LDRBBui;
+    case AArch64::LDURSBXi:
+      Scale = 1;
+      return AArch64::LDRSBXui;
+    case AArch64::LDURSBWi:
+      Scale = 1;
+      return AArch64::LDRSBWui;
+    case AArch64::STURBi:
+      Scale = 1;
+      return AArch64::STRBui;
+    case AArch64::STURBBi:
+      Scale = 1;
+      return AArch64::STRBBui;
+    case AArch64::LDRQui:
+    case AArch64::STRQui:
+      Scale = 16;
+      return Opcode;
+    case AArch64::LDRDui:
+    case AArch64::STRDui:
+    case AArch64::LDRXui:
+    case AArch64::STRXui:
+      Scale = 8;
+      return Opcode;
+    case AArch64::LDRWui:
+    case AArch64::LDRSWui:
+    case AArch64::STRWui:
+      Scale = 4;
+      return Opcode;
+    case AArch64::LDRHui:
+    case AArch64::STRHui:
+    case AArch64::LDRHHui:
+    case AArch64::STRHHui:
+    case AArch64::LDRSHXui:
+    case AArch64::LDRSHWui:
+      Scale = 2;
+      return Opcode;
+    case AArch64::LDRBui:
+    case AArch64::LDRBBui:
+    case AArch64::LDRSBXui:
+    case AArch64::LDRSBWui:
+    case AArch64::STRBui:
+    case AArch64::STRBBui:
+      Scale = 1;
+      return Opcode;
+    }
+  }
+
+  // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+  // return the opcode of an instruction performing the same operation, but
+  // using the [Reg, #Imm] addressing mode with unscaled offset.
+  unsigned unscaledOffsetOpcode(unsigned Opcode) {
+    switch (Opcode) {
     default:
-      return true;
-    case 4:
-      MinOffset = -256;
-      MaxOffset = 252;
-      break;
-    case 8:
-      MinOffset = -512;
-      MaxOffset = 504;
-      break;
-    case 16:
-      MinOffset = -1024;
-      MaxOffset = 1008;
-      break;
+      llvm_unreachable("Address folding not implemented for instruction");
+
+    case AArch64::LDURQi:
+    case AArch64::STURQi:
+    case AArch64::LDURDi:
+    case AArch64::STURDi:
+    case AArch64::LDURXi:
+    case AArch64::STURXi:
+    case AArch64::LDURWi:
+    case AArch64::LDURSWi:
+    case AArch64::STURWi:
+    case AArch64::LDURHi:
+    case AArch64::STURHi:
+    case AArch64::LDURHHi:
+    case AArch64::STURHHi:
+    case AArch64::LDURSHXi:
+    case AArch64::LDURSHWi:
+    case AArch64::LDURBi:
+    case AArch64::STURBi:
+    case AArch64::LDURBBi:
+    case AArch64::STURBBi:
+    case AArch64::LDURSBWi:
+    case AArch64::LDURSBXi:
+      return Opcode;
+    case AArch64::LDRQui:
+      return AArch64::LDURQi;
+    case AArch64::STRQui:
+      return AArch64::STURQi;
+    case AArch64::LDRDui:
+      return AArch64::LDURDi;
+    case AArch64::STRDui:
+      return AArch64::STURDi;
+    case AArch64::LDRXui:
+      return AArch64::LDURXi;
+    case AArch64::STRXui:
+      return AArch64::STURXi;
+    case AArch64::LDRWui:
+      return AArch64::LDURWi;
+    case AArch64::LDRSWui:
+      return AArch64::LDURSWi;
+    case AArch64::STRWui:
+      return AArch64::STURWi;
+    case AArch64::LDRHui:
+      return AArch64::LDURHi;
+    case AArch64::STRHui:
+      return AArch64::STURHi;
+    case AArch64::LDRHHui:
+      return AArch64::LDURHHi;
+    case AArch64::STRHHui:
+      return AArch64::STURHHi;
+    case AArch64::LDRSHXui:
+      return AArch64::LDURSHXi;
+    case AArch64::LDRSHWui:
+      return AArch64::LDURSHWi;
+    case AArch64::LDRBBui:
+      return AArch64::LDURBBi;
+    case AArch64::LDRBui:
+      return AArch64::LDURBi;
+    case AArch64::STRBBui:
+      return AArch64::STURBBi;
+    case AArch64::STRBui:
+      return AArch64::STURBi;
+    case AArch64::LDRSBWui:
+      return AArch64::LDURSBWi;
+    case AArch64::LDRSBXui:
+      return AArch64::LDURSBXi;
     }
-    return OldOffset < MinOffset || OldOffset > MaxOffset ||
-           (NewOffset >= MinOffset && NewOffset <= MaxOffset);
-  };
-  auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
-    int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
-    int64_t NewOffset = OldOffset + Disp;
-    if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
-      return false;
-    // If the old offset would fit into an LDP, but the new offset wouldn't,
-    // bail out.
-    if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
-      return false;
-    AM.BaseReg = AddrI.getOperand(1).getReg();
-    AM.ScaledReg = 0;
-    AM.Scale = 0;
-    AM.Displacement = NewOffset;
-    AM.Form = ExtAddrMode::Formula::Basic;
-    return true;
-  };
+  }
+
+  // Given the opcode of a memory load/store instruction, return the opcode of
+  // an instruction performing the same operation, but using the [Reg, Reg,
+  // {s,u}xtw #N] addressing mode with sign-/zero-extend of the offset register.
+  static unsigned offsetExtendOpcode(unsigned Opcode) {
+    switch (Opcode) {
+    default:
+      llvm_unreachable("Address folding not implemented for instruction");
 
-  auto canFoldAddRegIntoAddrMode =
-      [&](int64_t Scale,
-          ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
-    if (MemI.getOperand(2).getImm() != 0)
+    case AArch64::LDRQroX:
+    case AArch64::LDURQi:
+    case AArch64::LDRQui:
+      return AArch64::LDRQroW;
+    case AArch64::STRQroX:
+    case AArch64::STURQi:
+    case AArch64::STRQui:
+      return AArch64::STRQroW;
+    case AArch64::LDRDroX:
+    case AArch64::LDURDi:
+    case AArch64::LDRDui:
+      return AArch64::LDRDroW;
+    case AArch64::STRDroX:
+    case AArch64::STURDi:
+    case AArch64::STRDui:
+      return AArch64::STRDroW;
+    case AArch64::LDRXroX:
+    case AArch64::LDURXi:
+    case AArch64::LDRXui:
+      return AArch64::LDRXroW;
+    case AArch64::STRXroX:
+    case AArch64::STURXi:
+    case AArch64::STRXui:
+      return AArch64::STRXroW;
+    case AArch64::LDRWroX:
+    case AArch64::LDURWi:
+    case AArch64::LDRWui:
+      return AArch64::LDRWroW;
+    case AArch64::LDRSWroX:
+    case AArch64::LDURSWi:
+    case AArch64::LDRSWui:
+      return AArch64::LDRSWroW;
+    case AArch64::STRWroX:
+    case AArch64::STURWi:
+    case AArch64::STRWui:
+      return AArch64::STRWroW;
+    case AArch64::LDRHroX:
+    case AArch64::LDURHi:
+    case AArch64::LDRHui:
+      return AArch64::LDRHroW;
+    case AArch64::STRHroX:
+    case AArch64::STURHi:
+    case AArch64::STRHui:
+      return AArch64::STRHroW;
+    case AArch64::LDRHHroX:
+    case AArch64::LDURHHi:
+    case AArch64::LDRHHui:
+      return AArch64::LDRHHroW;
+    case AArch64::STRHHroX:
+    case AArch64::STURHHi:
+    case AArch64::STRHHui:
+      return AArch64::STRHHroW;
+    case AArch64::LDRSHXroX:
+    case AArch64::LDURSHXi:
+    case AArch64::LDRSHXui:
+      return AArch64::LDRSHXroW;
+    case AArch64::LDRSHWroX:
+    case AArch64::LDURSHWi:
+    case AArch64::LDRSHWui:
+      return AArch64::LDRSHWroW;
+    case AArch64::LDRBroX:
+    case AArch64::LDURBi:
+    case AArch64::LDRBui:
+      return AArch64::LDRBroW;
+    case AArch64::LDRBBroX:
+    case AArch64::LDURBBi:
+    case AArch64::LDRBBui:
+      return AArch64::LDRBBroW;
+    case AArch64::LDRSBXroX:
+    case AArch64::LDURSBXi:
+    case AArch64::LDRSBXui:
+      return AArch64::LDRSBXroW;
+    case AArch64::LDRSBWroX:
+    case AArch64::LDURSBWi:
+    case AArch64::LDRSBWui:
+      return AArch64::LDRSBWroW;
+    case AArch64::STRBroX:
+    case AArch64::STURBi:
+    case AArch64::STRBui:
+      return AArch64::STRBroW;
+    case AArch64::STRBBroX:
+    case AArch64::STURBBi:
+    case AArch64::STRBBui:
+      return AArch64::STRBBroW;
+    }
+  }
+
+  MachineInstr *AArch64InstrInfo::emitLdStWithAddr(
+      MachineInstr & MemI, const ExtAddrMode &AM) const {
+
+    const DebugLoc &DL = MemI.getDebugLoc();
+    MachineBasicBlock &MBB = *MemI.getParent();
+    MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
+
+    if (AM.Form == ExtAddrMode::Formula::Basic) {
+      if (AM.ScaledReg) {
+        // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
+        unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
+        MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
+        auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+                     .addReg(MemI.getOperand(0).getReg(),
+                             MemI.mayLoad() ? RegState::Define : 0)
+                     .addReg(AM.BaseReg)
+                     .addReg(AM.ScaledReg)
+                     .addImm(0)
+                     .addImm(AM.Scale > 1)
+                     .setMemRefs(MemI.memoperands())
+                     .setMIFlags(MemI.getFlags());
+        return B.getInstr();
+      }
+
+      assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+             "Addressing mode not supported for folding");
+
+      // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
+      unsigned Scale = 1;
+      unsigned Opcode = MemI.getOpcode();
+      if (isInt<9>(AM.Displacement))
+        Opcode = unscaledOffsetOpcode(Opcode);
+      else
+        Opcode = scaledOffsetOpcode(Opcode, Scale);
+
+      auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+                   .addReg(MemI.getOperand(0).getReg(),
+                           MemI.mayLoad() ? RegState::Define : 0)
+                   .addReg(AM.BaseReg)
+                   .addImm(AM.Displacement / Scale)
+                   .setMemRefs(MemI.memoperands())
+                   .setMIFlags(MemI.getFlags());
+      return B.getInstr();
+    }
+
+    if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||
+        AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
+      // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw
+      // #N]`.
+      assert(AM.ScaledReg && !AM.Displacement &&
+             "Address offset can be a register or an immediate, but not both");
+      unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
+      MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
+      // Make sure the offset register is in the correct register class.
+      Register OffsetReg = AM.ScaledReg;
+      const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
+      if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
+        OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+        BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
+            .addReg(AM.ScaledReg, 0, AArch64::sub_32);
+      }
+      auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+                   .addReg(MemI.getOperand(0).getReg(),
+                           MemI.mayLoad() ? RegState::Define : 0)
+                   .addReg(AM.BaseReg)
+                   .addReg(OffsetReg)
+                   .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)
+                   .addImm(AM.Scale != 1)
+                   .setMemRefs(MemI.memoperands())
+                   .setMIFlags(MemI.getFlags());
+
+      return B.getInstr();
+    }
+
+    llvm_unreachable(
+        "Function must not be called with an addressing mode it can't handle");
+  }
+
+  /// Return true if the opcode is a post-index ld/st instruction, which really
+  /// loads from base+0.
+  static bool isPostIndexLdStOpcode(unsigned Opcode) {
+    switch (Opcode) {
+    default:
       return false;
-    if ((unsigned)Scale != Scale)
+    case AArch64::LD1Fourv16b_POST:
+    case AArch64::LD1Fourv1d_POST:
+    case AArch64::LD1Fourv2d_POST:
+    case AArch64::LD1Fourv2s_POST:
+    case AArch64::LD1Fourv4h_POST:
+    case AArch64::LD1Fourv4s_POST:
+    case AArch64::LD1Fourv8b_POST:
+    case AArch64::LD1Fourv8h_POST:
+    case AArch64::LD1Onev16b_POST:
+    case AArch64::LD1Onev1d_POST:
+    case AArch64::LD1Onev2d_POST:
+    case AArch64::LD1Onev2s_POST:
+    case AArch64::LD1Onev4h_POST:
+    case AArch64::LD1Onev4s_POST:
+    case AArch64::LD1Onev8b_POST:
+    case AArch64::LD1Onev8h_POST:
+    case AArch64::LD1Rv16b_POST:
+    case AArch64::LD1Rv1d_POST:
+    case AArch64::LD1Rv2d_POST:
+    case AArch64::LD1Rv2s_POST:
+    case AArch64::LD1Rv4h_POST:
+    case AArch64::LD1Rv4s_POST:
+    case AArch64::LD1Rv8b_POST:
+    case AArch64::LD1Rv8h_POST:
+    case AArch64::LD1Threev16b_POST:
+    case AArch64::LD1Threev1d_POST:
+    case AArch64::LD1Threev2d_POST:
+    case AArch64::LD1Threev2s_POST:
+    case AArch64::LD1Threev4h_POST:
+    case AArch64::LD1Threev4s_POST:
+    case AArch64::LD1Threev8b_POST:
+    case AArch64::LD1Threev8h_POST:
+    case AArch64::LD1Twov16b_POST:
+    case AArch64::LD1Twov1d_POST:
+    case AArch64::LD1Twov2d_POST:
+    case AArch64::LD1Twov2s_POST:
+    case AArch64::LD1Twov4h_POST:
+    case AArch64::LD1Twov4s_POST:
+    case AArch64::LD1Twov8b_POST:
+    case AArch64::LD1Twov8h_POST:
+    case AArch64::LD1i16_POST:
+    case AArch64::LD1i32_POST:
+    case AArch64::LD1i64_POST:
+    case AArch64::LD1i8_POST:
+    case AArch64::LD2Rv16b_POST:
+    case AArch64::LD2Rv1d_POST:
+    case AArch64::LD2Rv2d_POST:
+    case AArch64::LD2Rv2s_POST:
+    case AArch64::LD2Rv4h_POST:
+    case AArch64::LD2Rv4s_POST:
+    case AArch64::LD2Rv8b_POST:
+    case AArch64::LD2Rv8h_POST:
+    case AArch64::LD2Twov16b_POST:
+    case AArch64::LD2Twov2d_POST:
+    case AArch64::LD2Twov2s_POST:
+    case AArch64::LD2Twov4h_POST:
+    case AArch64::LD2Twov4s_POST:
+    case AArch64::LD2Twov8b_POST:
+    case AArch64::LD2Twov8h_POST:
+    case AArch64::LD2i16_POST:
+    case AArch64::LD2i32_POST:
+    case AArch64::LD2i64_POST:
+    case AArch64::LD2i8_POST:
+    case AArch64::LD3Rv16b_POST:
+    case AArch64::LD3Rv1d_POST:
+    case AArch64::LD3Rv2d_POST:
+    case AArch64::LD3Rv2s_POST:
+    case AArch64::LD3Rv4h_POST:
+    case AArch64::LD3Rv4s_POST:
+    case AArch64::LD3Rv8b_POST:
+    case AArch64::LD3Rv8h_POST:
+    case AArch64::LD3Threev16b_POST:
+    case AArch64::LD3Threev2d_POST:
+    case AArch64::LD3Threev2s_POST:
+    case AArch64::LD3Threev4h_POST:
+    case AArch64::LD3Threev4s_POST:
+    case AArch64::LD3Threev8b_POST:
+    case AArch64::LD3Threev8h_POST:
+    case AArch64::LD3i16_POST:
+    case AArch64::LD3i32_POST:
+    case AArch64::LD3i64_POST:
+    case AArch64::LD3i8_POST:
+    case AArch64::LD4Fourv16b_POST:
+    case AArch64::LD4Fourv2d_POST:
+    case AArch64::LD4Fourv2s_POST:
+    case AArch64::LD4Fourv4h_POST:
+    case AArch64::LD4Fourv4s_POST:
+    case AArch64::LD4Fourv8b_POST:
+    case AArch64::LD4Fourv8h_POST:
+    case AArch64::LD4Rv16b_POST:
+    case AArch64::LD4Rv1d_POST:
+    case AArch64::LD4Rv2d_POST:
+    case AArch64::LD4Rv2s_POST:
+    case AArch64::LD4Rv4h_POST:
+    case AArch64::LD4Rv4s_POST:
+    case AArch64::LD4Rv8b_POST:
+    case AArch64::LD4Rv8h_POST:
+    case AArch64::LD4i16_POST:
+    case AArch64::LD4i32_POST:
+    case AArch64::LD4i64_POST:
+    case AArch64::LD4i8_POST:
+    case AArch64::LDAPRWpost:
+    case AArch64::LDAPRXpost:
+    case AArch64::LDIAPPWpost:
+    case AArch64::LDIAPPXpost:
+    case AArch64::LDPDpost:
+    case AArch64::LDPQpost:
+    case AArch64::LDPSWpost:
+    case AArch64::LDPSpost:
+    case AArch64::LDPWpost:
+    case AArch64::LDPXpost:
+    case AArch64::LDRBBpost:
+    case AArch64::LDRBpost:
+    case AArch64::LDRDpost:
+    case AArch64::LDRHHpost:
+    case AArch64::LDRHpost:
+    case AArch64::LDRQpost:
+    case AArch64::LDRSBWpost:
+    case AArch64::LDRSBXpost:
+    case AArch64::LDRSHWpost:
+    case AArch64::LDRSHXpost:
+    case AArch64::LDRSWpost:
+    case AArch64::LDRSpost:
+    case AArch64::LDRWpost:
+    case AArch64::LDRXpost:
+    case AArch64::ST1Fourv16b_POST:
+    case AArch64::ST1Fourv1d_POST:
+    case AArch64::ST1Fourv2d_POST:
+    case AArch64::ST1Fourv2s_POST:
+    case AArch64::ST1Fourv4h_POST:
+    case AArch64::ST1Fourv4s_POST:
+    case AArch64::ST1Fourv8b_POST:
+    case AArch64::ST1Fourv8h_POST:
+    case AArch64::ST1Onev16b_POST:
+    case AArch64::ST1Onev1d_POST:
+    case AArch64::ST1Onev2d_POST:
+    case AArch64::ST1Onev2s_POST:
+    case AArch64::ST1Onev4h_POST:
+    case AArch64::ST1Onev4s_POST:
+    case AArch64::ST1Onev8b_POST:
+    case AArch64::ST1Onev8h_POST:
+    case AArch64::ST1Threev16b_POST:
+    case AArch64::ST1Threev1d_POST:
+    case AArch64::ST1Threev2d_POST:
+    case AArch64::ST1Threev2s_POST:
+    case AArch64::ST1Threev4h_POST:
+    case AArch64::ST1Threev4s_POST:
+    case AArch64::ST1Threev8b_POST:
+    case AArch64::ST1Threev8h_POST:
+    case AArch64::ST1Twov16b_POST:
+    case AArch64::ST1Twov1d_POST:
+    case AArch64::ST1Twov2d_POST:
+    case AArch64::ST1Twov2s_POST:
+    case AArch64::ST1Twov4h_POST:
+    case AArch64::ST1Twov4s_POST:
+    case AArch64::ST1Twov8b_POST:
+    case AArch64::ST1Twov8h_POST:
+    case AArch64::ST1i16_POST:
+    case AArch64::ST1i32_POST:
+    case AArch64::ST1i64_POST:
+    case AArch64::ST1i8_POST:
+    case AArch64::ST2GPostIndex:
+    case AArch64::ST2Twov16b_POST:
+    case AArch64::ST2Twov2d_POST:
+    case AArch64::ST2Twov2s_POST:
+    case AArch64::ST2Twov4h_POST:
+    case AArch64::ST2Twov4s_POST:
+    case AArch64::ST2Twov8b_POST:
+    case AArch64::ST2Twov8h_POST:
+    case AArch64::ST2i16_POST:
+    case AArch64::ST2i32_POST:
+    case AArch64::ST2i64_POST:
+    case AArch64::ST2i8_POST:
+    case AArch64::ST3Threev16b_POST:
+    case AArch64::ST3Threev2d_POST:
+    case AArch64::ST3Threev2s_POST:
+    case AArch64::ST3Threev4h_POST:
+    case AArch64::ST3Threev4s_POST:
+    case AArch64::ST3Threev8b_POST:
+    case AArch64::ST3Threev8h_POST:
+    case AArch64::ST3i16_POST:
+    case AArch64::ST3i32_POST:
+    case AArch64::ST3i64_POST:
+    case AArch64::ST3i8_POST:
+    case AArch64::ST4Fourv16b_POST:
+    case AArch64::ST4Fourv2d_POST:
+    case AArch64::ST4Fourv2s_POST:
+    case AArch64::ST4Fourv4h_POST:
+    case AArch64::ST4Fourv4s_POST:
+    case AArch64::ST4Fourv8b_POST:
+    case AArch64::ST4Fourv8h_POST:
+    case AArch64::ST4i16_POST:
+    case AArch64::ST4i32_POST:
+    case AArch64::ST4i64_POST:
+    case AArch64::ST4i8_POST:
+    case AArch64::STGPostIndex:
+    case AArch64::STGPpost:
+    case AArch64::STPDpost:
+    case AArch64::STPQpost:
+    case AArch64::STPSpost:
+    case AArch64::STPWpost:
+    case AArch64::STPXpost:
+    case AArch64::STRBBpost:
+    case AArch64::STRBpost:
+    case AArch64::STRDpost:
+    case AArch64::STRHHpost:
+    case AArch64::STRHpost:
+    case AArch64::STRQpost:
+    case AArch64::STRSpost:
+    case AArch64::STRWpost:
+    case AArch64::STRXpost:
+    case AArch64::STZ2GPostIndex:
+    case AArch64::STZGPostIndex:
+      return true;
+    }
+  }
+
+  bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
+      const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
+      bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI)
+      const {
+    assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+    // Handle only loads/stores with base register followed by immediate offset.
+    if (LdSt.getNumExplicitOperands() == 3) {
+      // Non-paired instruction (e.g., ldr x1, [x0, #8]).
+      if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
+          !LdSt.getOperand(2).isImm())
+        return false;
+    } else if (LdSt.getNumExplicitOperands() == 4) {
+      // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
+      if (!LdSt.getOperand(1).isReg() ||
+          (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
+          !LdSt.getOperand(3).isImm())
+        return false;
+    } else
       return false;
-    if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
+
+    // Get the scaling factor for the instruction and set the width for the
+    // instruction.
+    TypeSize Scale(0U, false);
+    int64_t Dummy1, Dummy2;
+
+    // If this returns false, then it's an instruction we don't want to handle.
+    if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
       return false;
-    AM.BaseReg = AddrI.getOperand(1).getReg();
-    AM.ScaledReg = AddrI.getOperand(2).getReg();
-    AM.Scale = Scale;
-    AM.Displacement = 0;
-    AM.Form = Form;
-    return true;
-  };
 
-  auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
-    unsigned Opcode = MemI.getOpcode();
-    return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
-           Subtarget.isSTRQroSlow();
-  };
+    // Compute the offset. Offset is calculated as the immediate operand
+    // multiplied by the scaling factor. Unscaled instructions have scaling
+    // factor set to 1. Postindex are a special case which have an offset of 0.
+    if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
+      BaseOp = &LdSt.getOperand(2);
+      Offset = 0;
+    } else if (LdSt.getNumExplicitOperands() == 3) {
+      BaseOp = &LdSt.getOperand(1);
+      Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
+    } else {
+      assert(LdSt.getNumExplicitOperands() == 4 &&
+             "invalid number of operands");
+      BaseOp = &LdSt.getOperand(2);
+      Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
+    }
+    OffsetIsScalable = Scale.isScalable();
+
+    return BaseOp->isReg() || BaseOp->isFI();
+  }
+
+  MachineOperand &AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(
+      MachineInstr & LdSt) const {
+    assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+    MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
+    assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
+    return OfsOp;
+  }
+
+  bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
+                                      TypeSize &Width, int64_t &MinOffset,
+                                      int64_t &MaxOffset) {
+    switch (Opcode) {
+    // Not a memory operation or something we want to handle.
+    default:
+      Scale = TypeSize::getFixed(0);
+      Width = TypeSize::getFixed(0);
+      MinOffset = MaxOffset = 0;
+      return false;
+    // LDR / STR
+    case AArch64::LDRQui:
+    case AArch64::STRQui:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(16);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::LDRXui:
+    case AArch64::LDRDui:
+    case AArch64::STRXui:
+    case AArch64::STRDui:
+    case AArch64::PRFMui:
+      Scale = TypeSize::getFixed(8);
+      Width = TypeSize::getFixed(8);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::LDRWui:
+    case AArch64::LDRSui:
+    case AArch64::LDRSWui:
+    case AArch64::STRWui:
+    case AArch64::STRSui:
+      Scale = TypeSize::getFixed(4);
+      Width = TypeSize::getFixed(4);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::LDRHui:
+    case AArch64::LDRHHui:
+    case AArch64::LDRSHWui:
+    case AArch64::LDRSHXui:
+    case AArch64::STRHui:
+    case AArch64::STRHHui:
+      Scale = TypeSize::getFixed(2);
+      Width = TypeSize::getFixed(2);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::LDRBui:
+    case AArch64::LDRBBui:
+    case AArch64::LDRSBWui:
+    case AArch64::LDRSBXui:
+    case AArch64::STRBui:
+    case AArch64::STRBBui:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(1);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    // post/pre inc
+    case AArch64::STRQpre:
+    case AArch64::LDRQpost:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(16);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDRDpost:
+    case AArch64::LDRDpre:
+    case AArch64::LDRXpost:
+    case AArch64::LDRXpre:
+    case AArch64::STRDpost:
+    case AArch64::STRDpre:
+    case AArch64::STRXpost:
+    case AArch64::STRXpre:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(8);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::STRWpost:
+    case AArch64::STRWpre:
+    case AArch64::LDRWpost:
+    case AArch64::LDRWpre:
+    case AArch64::STRSpost:
+    case AArch64::STRSpre:
+    case AArch64::LDRSpost:
+    case AArch64::LDRSpre:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(4);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDRHpost:
+    case AArch64::LDRHpre:
+    case AArch64::STRHpost:
+    case AArch64::STRHpre:
+    case AArch64::LDRHHpost:
+    case AArch64::LDRHHpre:
+    case AArch64::STRHHpost:
+    case AArch64::STRHHpre:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(2);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDRBpost:
+    case AArch64::LDRBpre:
+    case AArch64::STRBpost:
+    case AArch64::STRBpre:
+    case AArch64::LDRBBpost:
+    case AArch64::LDRBBpre:
+    case AArch64::STRBBpost:
+    case AArch64::STRBBpre:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(1);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    // Unscaled
+    case AArch64::LDURQi:
+    case AArch64::STURQi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(16);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDURXi:
+    case AArch64::LDURDi:
+    case AArch64::LDAPURXi:
+    case AArch64::STURXi:
+    case AArch64::STURDi:
+    case AArch64::STLURXi:
+    case AArch64::PRFUMi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(8);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDURWi:
+    case AArch64::LDURSi:
+    case AArch64::LDURSWi:
+    case AArch64::LDAPURi:
+    case AArch64::LDAPURSWi:
+    case AArch64::STURWi:
+    case AArch64::STURSi:
+    case AArch64::STLURWi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(4);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDURHi:
+    case AArch64::LDURHHi:
+    case AArch64::LDURSHXi:
+    case AArch64::LDURSHWi:
+    case AArch64::LDAPURHi:
+    case AArch64::LDAPURSHWi:
+    case AArch64::LDAPURSHXi:
+    case AArch64::STURHi:
+    case AArch64::STURHHi:
+    case AArch64::STLURHi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(2);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDURBi:
+    case AArch64::LDURBBi:
+    case AArch64::LDURSBXi:
+    case AArch64::LDURSBWi:
+    case AArch64::LDAPURBi:
+    case AArch64::LDAPURSBWi:
+    case AArch64::LDAPURSBXi:
+    case AArch64::STURBi:
+    case AArch64::STURBBi:
+    case AArch64::STLURBi:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(1);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    // LDP / STP (including pre/post inc)
+    case AArch64::LDPQi:
+    case AArch64::LDNPQi:
+    case AArch64::STPQi:
+    case AArch64::STNPQi:
+    case AArch64::LDPQpost:
+    case AArch64::LDPQpre:
+    case AArch64::STPQpost:
+    case AArch64::STPQpre:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(16 * 2);
+      MinOffset = -64;
+      MaxOffset = 63;
+      break;
+    case AArch64::LDPXi:
+    case AArch64::LDPDi:
+    case AArch64::LDNPXi:
+    case AArch64::LDNPDi:
+    case AArch64::STPXi:
+    case AArch64::STPDi:
+    case AArch64::STNPXi:
+    case AArch64::STNPDi:
+    case AArch64::LDPDpost:
+    case AArch64::LDPDpre:
+    case AArch64::LDPXpost:
+    case AArch64::LDPXpre:
+    case AArch64::STPDpost:
+    case AArch64::STPDpre:
+    case AArch64::STPXpost:
+    case AArch64::STPXpre:
+      Scale = TypeSize::getFixed(8);
+      Width = TypeSize::getFixed(8 * 2);
+      MinOffset = -64;
+      MaxOffset = 63;
+      break;
+    case AArch64::LDPWi:
+    case AArch64::LDPSi:
+    case AArch64::LDNPWi:
+    case AArch64::LDNPSi:
+    case AArch64::STPWi:
+    case AArch64::STPSi:
+    case AArch64::STNPWi:
+    case AArch64::STNPSi:
+    case AArch64::LDPSpost:
+    case AArch64::LDPSpre:
+    case AArch64::LDPWpost:
+    case AArch64::LDPWpre:
+    case AArch64::STPSpost:
+    case AArch64::STPSpre:
+    case AArch64::STPWpost:
+    case AArch64::STPWpre:
+      Scale = TypeSize::getFixed(4);
+      Width = TypeSize::getFixed(4 * 2);
+      MinOffset = -64;
+      MaxOffset = 63;
+      break;
+    case AArch64::StoreSwiftAsyncContext:
+      // Store is an STRXui, but there might be an ADDXri in the expansion too.
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(8);
+      MinOffset = 0;
+      MaxOffset = 4095;
+      break;
+    case AArch64::ADDG:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(0);
+      MinOffset = 0;
+      MaxOffset = 63;
+      break;
+    case AArch64::TAGPstack:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(0);
+      // TAGP with a negative offset turns into SUBP, which has a maximum offset
+      // of 63 (not 64!).
+      MinOffset = -63;
+      MaxOffset = 63;
+      break;
+    case AArch64::LDG:
+    case AArch64::STGi:
+    case AArch64::STGPreIndex:
+    case AArch64::STGPostIndex:
+    case AArch64::STZGi:
+    case AArch64::STZGPreIndex:
+    case AArch64::STZGPostIndex:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(16);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    // SVE
+    case AArch64::STR_ZZZZXI:
+    case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::LDR_ZZZZXI:
+    case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16 * 4);
+      MinOffset = -256;
+      MaxOffset = 252;
+      break;
+    case AArch64::STR_ZZZXI:
+    case AArch64::LDR_ZZZXI:
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16 * 3);
+      MinOffset = -256;
+      MaxOffset = 253;
+      break;
+    case AArch64::STR_ZZXI:
+    case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
+    case AArch64::LDR_ZZXI:
+    case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16 * 2);
+      MinOffset = -256;
+      MaxOffset = 254;
+      break;
+    case AArch64::LDR_PXI:
+    case AArch64::STR_PXI:
+      Scale = TypeSize::getScalable(2);
+      Width = TypeSize::getScalable(2);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LDR_PPXI:
+    case AArch64::STR_PPXI:
+      Scale = TypeSize::getScalable(2);
+      Width = TypeSize::getScalable(2 * 2);
+      MinOffset = -256;
+      MaxOffset = 254;
+      break;
+    case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+    case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+    case AArch64::LDR_ZXI:
+    case AArch64::STR_ZXI:
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::LD1B_IMM:
+    case AArch64::LD1H_IMM:
+    case AArch64::LD1W_IMM:
+    case AArch64::LD1D_IMM:
+    case AArch64::LDNT1B_ZRI:
+    case AArch64::LDNT1H_ZRI:
+    case AArch64::LDNT1W_ZRI:
+    case AArch64::LDNT1D_ZRI:
+    case AArch64::ST1B_IMM:
+    case AArch64::ST1H_IMM:
+    case AArch64::ST1W_IMM:
+    case AArch64::ST1D_IMM:
+    case AArch64::STNT1B_ZRI:
+    case AArch64::STNT1H_ZRI:
+    case AArch64::STNT1W_ZRI:
+    case AArch64::STNT1D_ZRI:
+    case AArch64::LDNF1B_IMM:
+    case AArch64::LDNF1H_IMM:
+    case AArch64::LDNF1W_IMM:
+    case AArch64::LDNF1D_IMM:
+      // A full vectors worth of data
+      // Width = mbytes * elements
+      Scale = TypeSize::getScalable(16);
+      Width = TypeSize::getScalable(16);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD2B_IMM:
+    case AArch64::LD2H_IMM:
+    case AArch64::LD2W_IMM:
+    case AArch64::LD2D_IMM:
+    case AArch64::ST2B_IMM:
+    case AArch64::ST2H_IMM:
+    case AArch64::ST2W_IMM:
+    case AArch64::ST2D_IMM:
+      Scale = TypeSize::getScalable(32);
+      Width = TypeSize::getScalable(16 * 2);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD3B_IMM:
+    case AArch64::LD3H_IMM:
+    case AArch64::LD3W_IMM:
+    case AArch64::LD3D_IMM:
+    case AArch64::ST3B_IMM:
+    case AArch64::ST3H_IMM:
+    case AArch64::ST3W_IMM:
+    case AArch64::ST3D_IMM:
+      Scale = TypeSize::getScalable(48);
+      Width = TypeSize::getScalable(16 * 3);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD4B_IMM:
+    case AArch64::LD4H_IMM:
+    case AArch64::LD4W_IMM:
+    case AArch64::LD4D_IMM:
+    case AArch64::ST4B_IMM:
+    case AArch64::ST4H_IMM:
+    case AArch64::ST4W_IMM:
+    case AArch64::ST4D_IMM:
+      Scale = TypeSize::getScalable(64);
+      Width = TypeSize::getScalable(16 * 4);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD1B_H_IMM:
+    case AArch64::LD1SB_H_IMM:
+    case AArch64::LD1H_S_IMM:
+    case AArch64::LD1SH_S_IMM:
+    case AArch64::LD1W_D_IMM:
+    case AArch64::LD1SW_D_IMM:
+    case AArch64::ST1B_H_IMM:
+    case AArch64::ST1H_S_IMM:
+    case AArch64::ST1W_D_IMM:
+    case AArch64::LDNF1B_H_IMM:
+    case AArch64::LDNF1SB_H_IMM:
+    case AArch64::LDNF1H_S_IMM:
+    case AArch64::LDNF1SH_S_IMM:
+    case AArch64::LDNF1W_D_IMM:
+    case AArch64::LDNF1SW_D_IMM:
+      // A half vector worth of data
+      // Width = mbytes * elements
+      Scale = TypeSize::getScalable(8);
+      Width = TypeSize::getScalable(8);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD1B_S_IMM:
+    case AArch64::LD1SB_S_IMM:
+    case AArch64::LD1H_D_IMM:
+    case AArch64::LD1SH_D_IMM:
+    case AArch64::ST1B_S_IMM:
+    case AArch64::ST1H_D_IMM:
+    case AArch64::LDNF1B_S_IMM:
+    case AArch64::LDNF1SB_S_IMM:
+    case AArch64::LDNF1H_D_IMM:
+    case AArch64::LDNF1SH_D_IMM:
+      // A quarter vector worth of data
+      // Width = mbytes * elements
+      Scale = TypeSize::getScalable(4);
+      Width = TypeSize::getScalable(4);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::LD1B_D_IMM:
+    case AArch64::LD1SB_D_IMM:
+    case AArch64::ST1B_D_IMM:
+    case AArch64::LDNF1B_D_IMM:
+    case AArch64::LDNF1SB_D_IMM:
+      // A eighth vector worth of data
+      // Width = mbytes * elements
+      Scale = TypeSize::getScalable(2);
+      Width = TypeSize::getScalable(2);
+      MinOffset = -8;
+      MaxOffset = 7;
+      break;
+    case AArch64::ST2Gi:
+    case AArch64::ST2GPreIndex:
+    case AArch64::ST2GPostIndex:
+    case AArch64::STZ2Gi:
+    case AArch64::STZ2GPreIndex:
+    case AArch64::STZ2GPostIndex:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(32);
+      MinOffset = -256;
+      MaxOffset = 255;
+      break;
+    case AArch64::STGPi:
+    case AArch64::STGPpost:
+    case AArch64::STGPpre:
+      Scale = TypeSize::getFixed(16);
+      Width = TypeSize::getFixed(16);
+      MinOffset = -64;
+      MaxOffset = 63;
+      break;
+    case AArch64::LD1RB_IMM:
+    case AArch64::LD1RB_H_IMM:
+    case AArch64::LD1RB_S_IMM:
+    case AArch64::LD1RB_D_IMM:
+    case AArch64::LD1RSB_H_IMM:
+    case AArch64::LD1RSB_S_IMM:
+    case AArch64::LD1RSB_D_IMM:
+      Scale = TypeSize::getFixed(1);
+      Width = TypeSize::getFixed(1);
+      MinOffset = 0;
+      MaxOffset = 63;
+      break;
+    case AArch64::LD1RH_IMM:
+    case AArch64::LD1RH_S_IMM:
+    case AArch64::LD1RH_D_IMM:
+    case AArch64::LD1RSH_S_IMM:
+    case AArch64::LD1RSH_D_IMM:
+      Scale = TypeSize::getFixed(2);
+      Width = TypeSize::getFixed(2);
+      MinOffset = 0;
+      MaxOffset = 63;
+      break;
+    case AArch64::LD1RW_IMM:
+    case AArch64::LD1RW_D_IMM:
+    case AArch64::LD1RSW_IMM:
+      Scale = TypeSize::getFixed(4);
+      Width = TypeSize::getFixed(4);
+      MinOffset = 0;
+      MaxOffset = 63;
+      break;
+    case AArch64::LD1RD_IMM:
+      Scale = TypeSize::getFixed(8);
+      Width = TypeSize::getFixed(8);
+      MinOffset = 0;
+      MaxOffset = 63;
+      break;
+    }
 
-  int64_t Disp = 0;
-  const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
-  switch (AddrI.getOpcode()) {
-  default:
-    return false;
+    return true;
+  }
 
-  case AArch64::ADDXri:
-    // add Xa, Xn, #N
-    // ldr Xd, [Xa, #M]
-    // ->
-    // ldr Xd, [Xn, #N'+M]
-    Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
-    return canFoldAddSubImmIntoAddrMode(Disp);
+  // Scaling factor for unscaled load or store.
+  int AArch64InstrInfo::getMemScale(unsigned Opc) {
+    switch (Opc) {
+    default:
+      llvm_unreachable("Opcode has unknown scale!");
+    case AArch64::LDRBBui:
+    case AArch64::LDURBBi:
+    case AArch64::LDRSBWui:
+    case AArch64::LDURSBWi:
+    case AArch64::STRBBui:
+    case AArch64::STURBBi:
+      return 1;
+    case AArch64::LDRHHui:
+    case AArch64::LDURHHi:
+    case AArch64::LDRSHWui:
+    case AArch64::LDURSHWi:
+    case AArch64::STRHHui:
+    case AArch64::STURHHi:
+      return 2;
+    case AArch64::LDRSui:
+    case AArch64::LDURSi:
+    case AArch64::LDRSpre:
+    case AArch64::LDRSWui:
+    case AArch64::LDURSWi:
+    case AArch64::LDRSWpre:
+    case AArch64::LDRWpre:
+    case AArch64::LDRWui:
+    case AArch64::LDURWi:
+    case AArch64::STRSui:
+    case AArch64::STURSi:
+    case AArch64::STRSpre:
+    case AArch64::STRWui:
+    case AArch64::STURWi:
+    case AArch64::STRWpre:
+    case AArch64::LDPSi:
+    case AArch64::LDPSWi:
+    case AArch64::LDPWi:
+    case AArch64::STPSi:
+    case AArch64::STPWi:
+      return 4;
+    case AArch64::LDRDui:
+    case AArch64::LDURDi:
+    case AArch64::LDRDpre:
+    case AArch64::LDRXui:
+    case AArch64::LDURXi:
+    case AArch64::LDRXpre:
+    case AArch64::STRDui:
+    case AArch64::STURDi:
+    case AArch64::STRDpre:
+    case AArch64::STRXui:
+    case AArch64::STURXi:
+    case AArch64::STRXpre:
+    case AArch64::LDPDi:
+    case AArch64::LDPXi:
+    case AArch64::STPDi:
+    case AArch64::STPXi:
+      return 8;
+    case AArch64::LDRQui:
+    case AArch64::LDURQi:
+    case AArch64::STRQui:
+    case AArch64::STURQi:
+    case AArch64::STRQpre:
+    case AArch64::LDPQi:
+    case AArch64::LDRQpre:
+    case AArch64::STPQi:
+    case AArch64::STGi:
+    case AArch64::STZGi:
+    case AArch64::ST2Gi:
+    case AArch64::STZ2Gi:
+    case AArch64::STGPi:
+      return 16;
+    }
+  }
 
-  case AArch64::SUBXri:
-    // sub Xa, Xn, #N
-    // ldr Xd, [Xa, #M]
-    // ->
-    // ldr Xd, [Xn, #N'+M]
-    Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
-    return canFoldAddSubImmIntoAddrMode(-Disp);
-
-  case AArch64::ADDXrs: {
-    // add Xa, Xn, Xm, lsl #N
-    // ldr Xd, [Xa]
-    // ->
-    // ldr Xd, [Xn, Xm, lsl #N]
-
-    // Don't fold the add if the result would be slower, unless optimising for
-    // size.
-    unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
-    if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
+  bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
       return false;
-    Shift = AArch64_AM::getShiftValue(Shift);
-    if (!OptSize) {
-      if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
-        return false;
-      if (avoidSlowSTRQ(MemI))
-        return false;
+    case AArch64::LDRWpre:
+    case AArch64::LDRXpre:
+    case AArch64::LDRSWpre:
+    case AArch64::LDRSpre:
+    case AArch64::LDRDpre:
+    case AArch64::LDRQpre:
+      return true;
     }
-    return canFoldAddRegIntoAddrMode(1ULL << Shift);
   }
 
-  case AArch64::ADDXrr:
-    // add Xa, Xn, Xm
-    // ldr Xd, [Xa]
-    // ->
-    // ldr Xd, [Xn, Xm, lsl #0]
-
-    // Don't fold the add if the result would be slower, unless optimising for
-    // size.
-    if (!OptSize && avoidSlowSTRQ(MemI))
+  bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
       return false;
-    return canFoldAddRegIntoAddrMode(1);
+    case AArch64::STRWpre:
+    case AArch64::STRXpre:
+    case AArch64::STRSpre:
+    case AArch64::STRDpre:
+    case AArch64::STRQpre:
+      return true;
+    }
+  }
 
-  case AArch64::ADDXrx:
-    // add Xa, Xn, Wm, {s,u}xtw #N
-    // ldr Xd, [Xa]
-    // ->
-    // ldr Xd, [Xn, Wm, {s,u}xtw #N]
-
-    // Don't fold the add if the result would be slower, unless optimising for
-    // size.
-    if (!OptSize && avoidSlowSTRQ(MemI))
-      return false;
+  bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
+    return isPreLd(MI) || isPreSt(MI);
+  }
 
-    // Can fold only sign-/zero-extend of a word.
-    unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
-    AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
-    if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
+  bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
       return false;
-
-    return canFoldAddRegIntoAddrMode(
-        1ULL << AArch64_AM::getArithShiftValue(Imm),
-        (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
-                                     : ExtAddrMode::Formula::ZExtScaledReg);
+    case AArch64::LDPSi:
+    case AArch64::LDPSWi:
+    case AArch64::LDPDi:
+    case AArch64::LDPQi:
+    case AArch64::LDPWi:
+    case AArch64::LDPXi:
+    case AArch64::STPSi:
+    case AArch64::STPDi:
+    case AArch64::STPQi:
+    case AArch64::STPWi:
+    case AArch64::STPXi:
+    case AArch64::STGPi:
+      return true;
+    }
   }
-}
-
-// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
-// return the opcode of an instruction performing the same operation, but using
-// the [Reg, Reg] addressing mode.
-static unsigned regOffsetOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Address folding not implemented for instruction");
-
-  case AArch64::LDURQi:
-  case AArch64::LDRQui:
-    return AArch64::LDRQroX;
-  case AArch64::STURQi:
-  case AArch64::STRQui:
-    return AArch64::STRQroX;
-  case AArch64::LDURDi:
-  case AArch64::LDRDui:
-    return AArch64::LDRDroX;
-  case AArch64::STURDi:
-  case AArch64::STRDui:
-    return AArch64::STRDroX;
-  case AArch64::LDURXi:
-  case AArch64::LDRXui:
-    return AArch64::LDRXroX;
-  case AArch64::STURXi:
-  case AArch64::STRXui:
-    return AArch64::STRXroX;
-  case AArch64::LDURWi:
-  case AArch64::LDRWui:
-    return AArch64::LDRWroX;
-  case AArch64::LDURSWi:
-  case AArch64::LDRSWui:
-    return AArch64::LDRSWroX;
-  case AArch64::STURWi:
-  case AArch64::STRWui:
-    return AArch64::STRWroX;
-  case AArch64::LDURHi:
-  case AArch64::LDRHui:
-    return AArch64::LDRHroX;
-  case AArch64::STURHi:
-  case AArch64::STRHui:
-    return AArch64::STRHroX;
-  case AArch64::LDURHHi:
-  case AArch64::LDRHHui:
-    return AArch64::LDRHHroX;
-  case AArch64::STURHHi:
-  case AArch64::STRHHui:
-    return AArch64::STRHHroX;
-  case AArch64::LDURSHXi:
-  case AArch64::LDRSHXui:
-    return AArch64::LDRSHXroX;
-  case AArch64::LDURSHWi:
-  case AArch64::LDRSHWui:
-    return AArch64::LDRSHWroX;
-  case AArch64::LDURBi:
-  case AArch64::LDRBui:
-    return AArch64::LDRBroX;
-  case AArch64::LDURBBi:
-  case AArch64::LDRBBui:
-    return AArch64::LDRBBroX;
-  case AArch64::LDURSBXi:
-  case AArch64::LDRSBXui:
-    return AArch64::LDRSBXroX;
-  case AArch64::LDURSBWi:
-  case AArch64::LDRSBWui:
-    return AArch64::LDRSBWroX;
-  case AArch64::STURBi:
-  case AArch64::STRBui:
-    return AArch64::STRBroX;
-  case AArch64::STURBBi:
-  case AArch64::STRBBui:
-    return AArch64::STRBBroX;
-  }
-}
-
-// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
-// the opcode of an instruction performing the same operation, but using the
-// [Reg, #Imm] addressing mode with scaled offset.
-unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Address folding not implemented for instruction");
-
-  case AArch64::LDURQi:
-    Scale = 16;
-    return AArch64::LDRQui;
-  case AArch64::STURQi:
-    Scale = 16;
-    return AArch64::STRQui;
-  case AArch64::LDURDi:
-    Scale = 8;
-    return AArch64::LDRDui;
-  case AArch64::STURDi:
-    Scale = 8;
-    return AArch64::STRDui;
-  case AArch64::LDURXi:
-    Scale = 8;
-    return AArch64::LDRXui;
-  case AArch64::STURXi:
-    Scale = 8;
-    return AArch64::STRXui;
-  case AArch64::LDURWi:
-    Scale = 4;
-    return AArch64::LDRWui;
-  case AArch64::LDURSWi:
-    Scale = 4;
-    return AArch64::LDRSWui;
-  case AArch64::STURWi:
-    Scale = 4;
-    return AArch64::STRWui;
-  case AArch64::LDURHi:
-    Scale = 2;
-    return AArch64::LDRHui;
-  case AArch64::STURHi:
-    Scale = 2;
-    return AArch64::STRHui;
-  case AArch64::LDURHHi:
-    Scale = 2;
-    return AArch64::LDRHHui;
-  case AArch64::STURHHi:
-    Scale = 2;
-    return AArch64::STRHHui;
-  case AArch64::LDURSHXi:
-    Scale = 2;
-    return AArch64::LDRSHXui;
-  case AArch64::LDURSHWi:
-    Scale = 2;
-    return AArch64::LDRSHWui;
-  case AArch64::LDURBi:
-    Scale = 1;
-    return AArch64::LDRBui;
-  case AArch64::LDURBBi:
-    Scale = 1;
-    return AArch64::LDRBBui;
-  case AArch64::LDURSBXi:
-    Scale = 1;
-    return AArch64::LDRSBXui;
-  case AArch64::LDURSBWi:
-    Scale = 1;
-    return AArch64::LDRSBWui;
-  case AArch64::STURBi:
-    Scale = 1;
-    return AArch64::STRBui;
-  case AArch64::STURBBi:
-    Scale = 1;
-    return AArch64::STRBBui;
-  case AArch64::LDRQui:
-  case AArch64::STRQui:
-    Scale = 16;
-    return Opcode;
-  case AArch64::LDRDui:
-  case AArch64::STRDui:
-  case AArch64::LDRXui:
-  case AArch64::STRXui:
-    Scale = 8;
-    return Opcode;
-  case AArch64::LDRWui:
-  case AArch64::LDRSWui:
-  case AArch64::STRWui:
-    Scale = 4;
-    return Opcode;
-  case AArch64::LDRHui:
-  case AArch64::STRHui:
-  case AArch64::LDRHHui:
-  case AArch64::STRHHui:
-  case AArch64::LDRSHXui:
-  case AArch64::LDRSHWui:
-    Scale = 2;
-    return Opcode;
-  case AArch64::LDRBui:
-  case AArch64::LDRBBui:
-  case AArch64::LDRSBXui:
-  case AArch64::LDRSBWui:
-  case AArch64::STRBui:
-  case AArch64::STRBBui:
-    Scale = 1;
-    return Opcode;
-  }
-}
-
-// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
-// the opcode of an instruction performing the same operation, but using the
-// [Reg, #Imm] addressing mode with unscaled offset.
-unsigned unscaledOffsetOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Address folding not implemented for instruction");
-
-  case AArch64::LDURQi:
-  case AArch64::STURQi:
-  case AArch64::LDURDi:
-  case AArch64::STURDi:
-  case AArch64::LDURXi:
-  case AArch64::STURXi:
-  case AArch64::LDURWi:
-  case AArch64::LDURSWi:
-  case AArch64::STURWi:
-  case AArch64::LDURHi:
-  case AArch64::STURHi:
-  case AArch64::LDURHHi:
-  case AArch64::STURHHi:
-  case AArch64::LDURSHXi:
-  case AArch64::LDURSHWi:
-  case AArch64::LDURBi:
-  case AArch64::STURBi:
-  case AArch64::LDURBBi:
-  case AArch64::STURBBi:
-  case AArch64::LDURSBWi:
-  case AArch64::LDURSBXi:
-    return Opcode;
-  case AArch64::LDRQui:
-    return AArch64::LDURQi;
-  case AArch64::STRQui:
-    return AArch64::STURQi;
-  case AArch64::LDRDui:
-    return AArch64::LDURDi;
-  case AArch64::STRDui:
-    return AArch64::STURDi;
-  case AArch64::LDRXui:
-    return AArch64::LDURXi;
-  case AArch64::STRXui:
-    return AArch64::STURXi;
-  case AArch64::LDRWui:
-    return AArch64::LDURWi;
-  case AArch64::LDRSWui:
-    return AArch64::LDURSWi;
-  case AArch64::STRWui:
-    return AArch64::STURWi;
-  case AArch64::LDRHui:
-    return AArch64::LDURHi;
-  case AArch64::STRHui:
-    return AArch64::STURHi;
-  case AArch64::LDRHHui:
-    return AArch64::LDURHHi;
-  case AArch64::STRHHui:
-    return AArch64::STURHHi;
-  case AArch64::LDRSHXui:
-    return AArch64::LDURSHXi;
-  case AArch64::LDRSHWui:
-    return AArch64::LDURSHWi;
-  case AArch64::LDRBBui:
-    return AArch64::LDURBBi;
-  case AArch64::LDRBui:
-    return AArch64::LDURBi;
-  case AArch64::STRBBui:
-    return AArch64::STURBBi;
-  case AArch64::STRBui:
-    return AArch64::STURBi;
-  case AArch64::LDRSBWui:
-    return AArch64::LDURSBWi;
-  case AArch64::LDRSBXui:
-    return AArch64::LDURSBXi;
-  }
-}
-
-// Given the opcode of a memory load/store instruction, return the opcode of an
-// instruction performing the same operation, but using
-// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
-// offset register.
-static unsigned offsetExtendOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Address folding not implemented for instruction");
 
-  case AArch64::LDRQroX:
-  case AArch64::LDURQi:
-  case AArch64::LDRQui:
-    return AArch64::LDRQroW;
-  case AArch64::STRQroX:
-  case AArch64::STURQi:
-  case AArch64::STRQui:
-    return AArch64::STRQroW;
-  case AArch64::LDRDroX:
-  case AArch64::LDURDi:
-  case AArch64::LDRDui:
-    return AArch64::LDRDroW;
-  case AArch64::STRDroX:
-  case AArch64::STURDi:
-  case AArch64::STRDui:
-    return AArch64::STRDroW;
-  case AArch64::LDRXroX:
-  case AArch64::LDURXi:
-  case AArch64::LDRXui:
-    return AArch64::LDRXroW;
-  case AArch64::STRXroX:
-  case AArch64::STURXi:
-  case AArch64::STRXui:
-    return AArch64::STRXroW;
-  case AArch64::LDRWroX:
-  case AArch64::LDURWi:
-  case AArch64::LDRWui:
-    return AArch64::LDRWroW;
-  case AArch64::LDRSWroX:
-  case AArch64::LDURSWi:
-  case AArch64::LDRSWui:
-    return AArch64::LDRSWroW;
-  case AArch64::STRWroX:
-  case AArch64::STURWi:
-  case AArch64::STRWui:
-    return AArch64::STRWroW;
-  case AArch64::LDRHroX:
-  case AArch64::LDURHi:
-  case AArch64::LDRHui:
-    return AArch64::LDRHroW;
-  case AArch64::STRHroX:
-  case AArch64::STURHi:
-  case AArch64::STRHui:
-    return AArch64::STRHroW;
-  case AArch64::LDRHHroX:
-  case AArch64::LDURHHi:
-  case AArch64::LDRHHui:
-    return AArch64::LDRHHroW;
-  case AArch64::STRHHroX:
-  case AArch64::STURHHi:
-  case AArch64::STRHHui:
-    return AArch64::STRHHroW;
-  case AArch64::LDRSHXroX:
-  case AArch64::LDURSHXi:
-  case AArch64::LDRSHXui:
-    return AArch64::LDRSHXroW;
-  case AArch64::LDRSHWroX:
-  case AArch64::LDURSHWi:
-  case AArch64::LDRSHWui:
-    return AArch64::LDRSHWroW;
-  case AArch64::LDRBroX:
-  case AArch64::LDURBi:
-  case AArch64::LDRBui:
-    return AArch64::LDRBroW;
-  case AArch64::LDRBBroX:
-  case AArch64::LDURBBi:
-  case AArch64::LDRBBui:
-    return AArch64::LDRBBroW;
-  case AArch64::LDRSBXroX:
-  case AArch64::LDURSBXi:
-  case AArch64::LDRSBXui:
-    return AArch64::LDRSBXroW;
-  case AArch64::LDRSBWroX:
-  case AArch64::LDURSBWi:
-  case AArch64::LDRSBWui:
-    return AArch64::LDRSBWroW;
-  case AArch64::STRBroX:
-  case AArch64::STURBi:
-  case AArch64::STRBui:
-    return AArch64::STRBroW;
-  case AArch64::STRBBroX:
-  case AArch64::STURBBi:
-  case AArch64::STRBBui:
-    return AArch64::STRBBroW;
+  const MachineOperand &AArch64InstrInfo::getLdStBaseOp(
+      const MachineInstr &MI) {
+    assert(MI.mayLoadOrStore() && "Load or store instruction expected");
+    unsigned Idx =
+        AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI)
+            ? 2
+            : 1;
+    return MI.getOperand(Idx);
   }
-}
-
-MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
-                                                 const ExtAddrMode &AM) const {
 
-  const DebugLoc &DL = MemI.getDebugLoc();
-  MachineBasicBlock &MBB = *MemI.getParent();
-  MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
+  const MachineOperand &AArch64InstrInfo::getLdStOffsetOp(
+      const MachineInstr &MI) {
+    assert(MI.mayLoadOrStore() && "Load or store instruction expected");
+    unsigned Idx =
+        AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI)
+            ? 3
+            : 2;
+    return MI.getOperand(Idx);
+  }
 
-  if (AM.Form == ExtAddrMode::Formula::Basic) {
-    if (AM.ScaledReg) {
-      // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
-      unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
-      MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
-      auto B = BuildMI(MBB, MemI, DL, get(Opcode))
-                   .addReg(MemI.getOperand(0).getReg(),
-                           MemI.mayLoad() ? RegState::Define : 0)
-                   .addReg(AM.BaseReg)
-                   .addReg(AM.ScaledReg)
-                   .addImm(0)
-                   .addImm(AM.Scale > 1)
-                   .setMemRefs(MemI.memoperands())
-                   .setMIFlags(MemI.getFlags());
-      return B.getInstr();
+  const MachineOperand &AArch64InstrInfo::getLdStAmountOp(
+      const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      llvm_unreachable("Unexpected opcode");
+    case AArch64::LDRBroX:
+    case AArch64::LDRBBroX:
+    case AArch64::LDRSBXroX:
+    case AArch64::LDRSBWroX:
+    case AArch64::LDRHroX:
+    case AArch64::LDRHHroX:
+    case AArch64::LDRSHXroX:
+    case AArch64::LDRSHWroX:
+    case AArch64::LDRWroX:
+    case AArch64::LDRSroX:
+    case AArch64::LDRSWroX:
+    case AArch64::LDRDroX:
+    case AArch64::LDRXroX:
+    case AArch64::LDRQroX:
+      return MI.getOperand(4);
     }
-
-    assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
-           "Addressing mode not supported for folding");
-
-    // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
-    unsigned Scale = 1;
-    unsigned Opcode = MemI.getOpcode();
-    if (isInt<9>(AM.Displacement))
-      Opcode = unscaledOffsetOpcode(Opcode);
-    else
-      Opcode = scaledOffsetOpcode(Opcode, Scale);
-
-    auto B = BuildMI(MBB, MemI, DL, get(Opcode))
-                 .addReg(MemI.getOperand(0).getReg(),
-                         MemI.mayLoad() ? RegState::Define : 0)
-                 .addReg(AM.BaseReg)
-                 .addImm(AM.Displacement / Scale)
-                 .setMemRefs(MemI.memoperands())
-                 .setMIFlags(MemI.getFlags());
-    return B.getInstr();
-  }
-
-  if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||
-      AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
-    // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
-    assert(AM.ScaledReg && !AM.Displacement &&
-           "Address offset can be a register or an immediate, but not both");
-    unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
-    MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
-    // Make sure the offset register is in the correct register class.
-    Register OffsetReg = AM.ScaledReg;
-    const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
-    if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
-      OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-      BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
-          .addReg(AM.ScaledReg, 0, AArch64::sub_32);
-    }
-    auto B = BuildMI(MBB, MemI, DL, get(Opcode))
-                 .addReg(MemI.getOperand(0).getReg(),
-                         MemI.mayLoad() ? RegState::Define : 0)
-                 .addReg(AM.BaseReg)
-                 .addReg(OffsetReg)
-                 .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)
-                 .addImm(AM.Scale != 1)
-                 .setMemRefs(MemI.memoperands())
-                 .setMIFlags(MemI.getFlags());
-
-    return B.getInstr();
-  }
-
-  llvm_unreachable(
-      "Function must not be called with an addressing mode it can't handle");
-}
-
-/// Return true if the opcode is a post-index ld/st instruction, which really
-/// loads from base+0.
-static bool isPostIndexLdStOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    return false;
-  case AArch64::LD1Fourv16b_POST:
-  case AArch64::LD1Fourv1d_POST:
-  case AArch64::LD1Fourv2d_POST:
-  case AArch64::LD1Fourv2s_POST:
-  case AArch64::LD1Fourv4h_POST:
-  case AArch64::LD1Fourv4s_POST:
-  case AArch64::LD1Fourv8b_POST:
-  case AArch64::LD1Fourv8h_POST:
-  case AArch64::LD1Onev16b_POST:
-  case AArch64::LD1Onev1d_POST:
-  case AArch64::LD1Onev2d_POST:
-  case AArch64::LD1Onev2s_POST:
-  case AArch64::LD1Onev4h_POST:
-  case AArch64::LD1Onev4s_POST:
-  case AArch64::LD1Onev8b_POST:
-  case AArch64::LD1Onev8h_POST:
-  case AArch64::LD1Rv16b_POST:
-  case AArch64::LD1Rv1d_POST:
-  case AArch64::LD1Rv2d_POST:
-  case AArch64::LD1Rv2s_POST:
-  case AArch64::LD1Rv4h_POST:
-  case AArch64::LD1Rv4s_POST:
-  case AArch64::LD1Rv8b_POST:
-  case AArch64::LD1Rv8h_POST:
-  case AArch64::LD1Threev16b_POST:
-  case AArch64::LD1Threev1d_POST:
-  case AArch64::LD1Threev2d_POST:
-  case AArch64::LD1Threev2s_POST:
-  case AArch64::LD1Threev4h_POST:
-  case AArch64::LD1Threev4s_POST:
-  case AArch64::LD1Threev8b_POST:
-  case AArch64::LD1Threev8h_POST:
-  case AArch64::LD1Twov16b_POST:
-  case AArch64::LD1Twov1d_POST:
-  case AArch64::LD1Twov2d_POST:
-  case AArch64::LD1Twov2s_POST:
-  case AArch64::LD1Twov4h_POST:
-  case AArch64::LD1Twov4s_POST:
-  case AArch64::LD1Twov8b_POST:
-  case AArch64::LD1Twov8h_POST:
-  case AArch64::LD1i16_POST:
-  case AArch64::LD1i32_POST:
-  case AArch64::LD1i64_POST:
-  case AArch64::LD1i8_POST:
-  case AArch64::LD2Rv16b_POST:
-  case AArch64::LD2Rv1d_POST:
-  case AArch64::LD2Rv2d_POST:
-  case AArch64::LD2Rv2s_POST:
-  case AArch64::LD2Rv4h_POST:
-  case AArch64::LD2Rv4s_POST:
-  case AArch64::LD2Rv8b_POST:
-  case AArch64::LD2Rv8h_POST:
-  case AArch64::LD2Twov16b_POST:
-  case AArch64::LD2Twov2d_POST:
-  case AArch64::LD2Twov2s_POST:
-  case AArch64::LD2Twov4h_POST:
-  case AArch64::LD2Twov4s_POST:
-  case AArch64::LD2Twov8b_POST:
-  case AArch64::LD2Twov8h_POST:
-  case AArch64::LD2i16_POST:
-  case AArch64::LD2i32_POST:
-  case AArch64::LD2i64_POST:
-  case AArch64::LD2i8_POST:
-  case AArch64::LD3Rv16b_POST:
-  case AArch64::LD3Rv1d_POST:
-  case AArch64::LD3Rv2d_POST:
-  case AArch64::LD3Rv2s_POST:
-  case AArch64::LD3Rv4h_POST:
-  case AArch64::LD3Rv4s_POST:
-  case AArch64::LD3Rv8b_POST:
-  case AArch64::LD3Rv8h_POST:
-  case AArch64::LD3Threev16b_POST:
-  case AArch64::LD3Threev2d_POST:
-  case AArch64::LD3Threev2s_POST:
-  case AArch64::LD3Threev4h_POST:
-  case AArch64::LD3Threev4s_POST:
-  case AArch64::LD3Threev8b_POST:
-  case AArch64::LD3Threev8h_POST:
-  case AArch64::LD3i16_POST:
-  case AArch64::LD3i32_POST:
-  case AArch64::LD3i64_POST:
-  case AArch64::LD3i8_POST:
-  case AArch64::LD4Fourv16b_POST:
-  case AArch64::LD4Fourv2d_POST:
-  case AArch64::LD4Fourv2s_POST:
-  case AArch64::LD4Fourv4h_POST:
-  case AArch64::LD4Fourv4s_POST:
-  case AArch64::LD4Fourv8b_POST:
-  case AArch64::LD4Fourv8h_POST:
-  case AArch64::LD4Rv16b_POST:
-  case AArch64::LD4Rv1d_POST:
-  case AArch64::LD4Rv2d_POST:
-  case AArch64::LD4Rv2s_POST:
-  case AArch64::LD4Rv4h_POST:
-  case AArch64::LD4Rv4s_POST:
-  case AArch64::LD4Rv8b_POST:
-  case AArch64::LD4Rv8h_POST:
-  case AArch64::LD4i16_POST:
-  case AArch64::LD4i32_POST:
-  case AArch64::LD4i64_POST:
-  case AArch64::LD4i8_POST:
-  case AArch64::LDAPRWpost:
-  case AArch64::LDAPRXpost:
-  case AArch64::LDIAPPWpost:
-  case AArch64::LDIAPPXpost:
-  case AArch64::LDPDpost:
-  case AArch64::LDPQpost:
-  case AArch64::LDPSWpost:
-  case AArch64::LDPSpost:
-  case AArch64::LDPWpost:
-  case AArch64::LDPXpost:
-  case AArch64::LDRBBpost:
-  case AArch64::LDRBpost:
-  case AArch64::LDRDpost:
-  case AArch64::LDRHHpost:
-  case AArch64::LDRHpost:
-  case AArch64::LDRQpost:
-  case AArch64::LDRSBWpost:
-  case AArch64::LDRSBXpost:
-  case AArch64::LDRSHWpost:
-  case AArch64::LDRSHXpost:
-  case AArch64::LDRSWpost:
-  case AArch64::LDRSpost:
-  case AArch64::LDRWpost:
-  case AArch64::LDRXpost:
-  case AArch64::ST1Fourv16b_POST:
-  case AArch64::ST1Fourv1d_POST:
-  case AArch64::ST1Fourv2d_POST:
-  case AArch64::ST1Fourv2s_POST:
-  case AArch64::ST1Fourv4h_POST:
-  case AArch64::ST1Fourv4s_POST:
-  case AArch64::ST1Fourv8b_POST:
-  case AArch64::ST1Fourv8h_POST:
-  case AArch64::ST1Onev16b_POST:
-  case AArch64::ST1Onev1d_POST:
-  case AArch64::ST1Onev2d_POST:
-  case AArch64::ST1Onev2s_POST:
-  case AArch64::ST1Onev4h_POST:
-  case AArch64::ST1Onev4s_POST:
-  case AArch64::ST1Onev8b_POST:
-  case AArch64::ST1Onev8h_POST:
-  case AArch64::ST1Threev16b_POST:
-  case AArch64::ST1Threev1d_POST:
-  case AArch64::ST1Threev2d_POST:
-  case AArch64::ST1Threev2s_POST:
-  case AArch64::ST1Threev4h_POST:
-  case AArch64::ST1Threev4s_POST:
-  case AArch64::ST1Threev8b_POST:
-  case AArch64::ST1Threev8h_POST:
-  case AArch64::ST1Twov16b_POST:
-  case AArch64::ST1Twov1d_POST:
-  case AArch64::ST1Twov2d_POST:
-  case AArch64::ST1Twov2s_POST:
-  case AArch64::ST1Twov4h_POST:
-  case AArch64::ST1Twov4s_POST:
-  case AArch64::ST1Twov8b_POST:
-  case AArch64::ST1Twov8h_POST:
-  case AArch64::ST1i16_POST:
-  case AArch64::ST1i32_POST:
-  case AArch64::ST1i64_POST:
-  case AArch64::ST1i8_POST:
-  case AArch64::ST2GPostIndex:
-  case AArch64::ST2Twov16b_POST:
-  case AArch64::ST2Twov2d_POST:
-  case AArch64::ST2Twov2s_POST:
-  case AArch64::ST2Twov4h_POST:
-  case AArch64::ST2Twov4s_POST:
-  case AArch64::ST2Twov8b_POST:
-  case AArch64::ST2Twov8h_POST:
-  case AArch64::ST2i16_POST:
-  case AArch64::ST2i32_POST:
-  case AArch64::ST2i64_POST:
-  case AArch64::ST2i8_POST:
-  case AArch64::ST3Threev16b_POST:
-  case AArch64::ST3Threev2d_POST:
-  case AArch64::ST3Threev2s_POST:
-  case AArch64::ST3Threev4h_POST:
-  case AArch64::ST3Threev4s_POST:
-  case AArch64::ST3Threev8b_POST:
-  case AArch64::ST3Threev8h_POST:
-  case AArch64::ST3i16_POST:
-  case AArch64::ST3i32_POST:
-  case AArch64::ST3i64_POST:
-  case AArch64::ST3i8_POST:
-  case AArch64::ST4Fourv16b_POST:
-  case AArch64::ST4Fourv2d_POST:
-  case AArch64::ST4Fourv2s_POST:
-  case AArch64::ST4Fourv4h_POST:
-  case AArch64::ST4Fourv4s_POST:
-  case AArch64::ST4Fourv8b_POST:
-  case AArch64::ST4Fourv8h_POST:
-  case AArch64::ST4i16_POST:
-  case AArch64::ST4i32_POST:
-  case AArch64::ST4i64_POST:
-  case AArch64::ST4i8_POST:
-  case AArch64::STGPostIndex:
-  case AArch64::STGPpost:
-  case AArch64::STPDpost:
-  case AArch64::STPQpost:
-  case AArch64::STPSpost:
-  case AArch64::STPWpost:
-  case AArch64::STPXpost:
-  case AArch64::STRBBpost:
-  case AArch64::STRBpost:
-  case AArch64::STRDpost:
-  case AArch64::STRHHpost:
-  case AArch64::STRHpost:
-  case AArch64::STRQpost:
-  case AArch64::STRSpost:
-  case AArch64::STRWpost:
-  case AArch64::STRXpost:
-  case AArch64::STZ2GPostIndex:
-  case AArch64::STZGPostIndex:
-    return true;
   }
-}
-
-bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
-    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
-    bool &OffsetIsScalable, TypeSize &Width,
-    const TargetRegisterInfo *TRI) const {
-  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
-  // Handle only loads/stores with base register followed by immediate offset.
-  if (LdSt.getNumExplicitOperands() == 3) {
-    // Non-paired instruction (e.g., ldr x1, [x0, #8]).
-    if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
-        !LdSt.getOperand(2).isImm())
-      return false;
-  } else if (LdSt.getNumExplicitOperands() == 4) {
-    // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
-    if (!LdSt.getOperand(1).isReg() ||
-        (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
-        !LdSt.getOperand(3).isImm())
-      return false;
-  } else
-    return false;
-
-  // Get the scaling factor for the instruction and set the width for the
-  // instruction.
-  TypeSize Scale(0U, false);
-  int64_t Dummy1, Dummy2;
-
-  // If this returns false, then it's an instruction we don't want to handle.
-  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
-    return false;
 
-  // Compute the offset. Offset is calculated as the immediate operand
-  // multiplied by the scaling factor. Unscaled instructions have scaling factor
-  // set to 1. Postindex are a special case which have an offset of 0.
-  if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
-    BaseOp = &LdSt.getOperand(2);
-    Offset = 0;
-  } else if (LdSt.getNumExplicitOperands() == 3) {
-    BaseOp = &LdSt.getOperand(1);
-    Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
-  } else {
-    assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
-    BaseOp = &LdSt.getOperand(2);
-    Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
+  static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
+                                                Register Reg) {
+    if (MI.getParent() == nullptr)
+      return nullptr;
+    const MachineFunction *MF = MI.getParent()->getParent();
+    return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
   }
-  OffsetIsScalable = Scale.isScalable();
-
-  return BaseOp->isReg() || BaseOp->isFI();
-}
 
-MachineOperand &
-AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
-  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
-  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
-  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
-  return OfsOp;
-}
-
-bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
-                                    TypeSize &Width, int64_t &MinOffset,
-                                    int64_t &MaxOffset) {
-  switch (Opcode) {
-  // Not a memory operation or something we want to handle.
-  default:
-    Scale = TypeSize::getFixed(0);
-    Width = TypeSize::getFixed(0);
-    MinOffset = MaxOffset = 0;
-    return false;
-  // LDR / STR
-  case AArch64::LDRQui:
-  case AArch64::STRQui:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(16);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::LDRXui:
-  case AArch64::LDRDui:
-  case AArch64::STRXui:
-  case AArch64::STRDui:
-  case AArch64::PRFMui:
-    Scale = TypeSize::getFixed(8);
-    Width = TypeSize::getFixed(8);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::LDRWui:
-  case AArch64::LDRSui:
-  case AArch64::LDRSWui:
-  case AArch64::STRWui:
-  case AArch64::STRSui:
-    Scale = TypeSize::getFixed(4);
-    Width = TypeSize::getFixed(4);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::LDRHui:
-  case AArch64::LDRHHui:
-  case AArch64::LDRSHWui:
-  case AArch64::LDRSHXui:
-  case AArch64::STRHui:
-  case AArch64::STRHHui:
-    Scale = TypeSize::getFixed(2);
-    Width = TypeSize::getFixed(2);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::LDRBui:
-  case AArch64::LDRBBui:
-  case AArch64::LDRSBWui:
-  case AArch64::LDRSBXui:
-  case AArch64::STRBui:
-  case AArch64::STRBBui:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(1);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  // post/pre inc
-  case AArch64::STRQpre:
-  case AArch64::LDRQpost:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(16);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDRDpost:
-  case AArch64::LDRDpre:
-  case AArch64::LDRXpost:
-  case AArch64::LDRXpre:
-  case AArch64::STRDpost:
-  case AArch64::STRDpre:
-  case AArch64::STRXpost:
-  case AArch64::STRXpre:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(8);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::STRWpost:
-  case AArch64::STRWpre:
-  case AArch64::LDRWpost:
-  case AArch64::LDRWpre:
-  case AArch64::STRSpost:
-  case AArch64::STRSpre:
-  case AArch64::LDRSpost:
-  case AArch64::LDRSpre:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(4);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDRHpost:
-  case AArch64::LDRHpre:
-  case AArch64::STRHpost:
-  case AArch64::STRHpre:
-  case AArch64::LDRHHpost:
-  case AArch64::LDRHHpre:
-  case AArch64::STRHHpost:
-  case AArch64::STRHHpre:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(2);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDRBpost:
-  case AArch64::LDRBpre:
-  case AArch64::STRBpost:
-  case AArch64::STRBpre:
-  case AArch64::LDRBBpost:
-  case AArch64::LDRBBpre:
-  case AArch64::STRBBpost:
-  case AArch64::STRBBpre:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(1);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  // Unscaled
-  case AArch64::LDURQi:
-  case AArch64::STURQi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(16);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDURXi:
-  case AArch64::LDURDi:
-  case AArch64::LDAPURXi:
-  case AArch64::STURXi:
-  case AArch64::STURDi:
-  case AArch64::STLURXi:
-  case AArch64::PRFUMi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(8);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDURWi:
-  case AArch64::LDURSi:
-  case AArch64::LDURSWi:
-  case AArch64::LDAPURi:
-  case AArch64::LDAPURSWi:
-  case AArch64::STURWi:
-  case AArch64::STURSi:
-  case AArch64::STLURWi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(4);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDURHi:
-  case AArch64::LDURHHi:
-  case AArch64::LDURSHXi:
-  case AArch64::LDURSHWi:
-  case AArch64::LDAPURHi:
-  case AArch64::LDAPURSHWi:
-  case AArch64::LDAPURSHXi:
-  case AArch64::STURHi:
-  case AArch64::STURHHi:
-  case AArch64::STLURHi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(2);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDURBi:
-  case AArch64::LDURBBi:
-  case AArch64::LDURSBXi:
-  case AArch64::LDURSBWi:
-  case AArch64::LDAPURBi:
-  case AArch64::LDAPURSBWi:
-  case AArch64::LDAPURSBXi:
-  case AArch64::STURBi:
-  case AArch64::STURBBi:
-  case AArch64::STLURBi:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(1);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  // LDP / STP (including pre/post inc)
-  case AArch64::LDPQi:
-  case AArch64::LDNPQi:
-  case AArch64::STPQi:
-  case AArch64::STNPQi:
-  case AArch64::LDPQpost:
-  case AArch64::LDPQpre:
-  case AArch64::STPQpost:
-  case AArch64::STPQpre:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(16 * 2);
-    MinOffset = -64;
-    MaxOffset = 63;
-    break;
-  case AArch64::LDPXi:
-  case AArch64::LDPDi:
-  case AArch64::LDNPXi:
-  case AArch64::LDNPDi:
-  case AArch64::STPXi:
-  case AArch64::STPDi:
-  case AArch64::STNPXi:
-  case AArch64::STNPDi:
-  case AArch64::LDPDpost:
-  case AArch64::LDPDpre:
-  case AArch64::LDPXpost:
-  case AArch64::LDPXpre:
-  case AArch64::STPDpost:
-  case AArch64::STPDpre:
-  case AArch64::STPXpost:
-  case AArch64::STPXpre:
-    Scale = TypeSize::getFixed(8);
-    Width = TypeSize::getFixed(8 * 2);
-    MinOffset = -64;
-    MaxOffset = 63;
-    break;
-  case AArch64::LDPWi:
-  case AArch64::LDPSi:
-  case AArch64::LDNPWi:
-  case AArch64::LDNPSi:
-  case AArch64::STPWi:
-  case AArch64::STPSi:
-  case AArch64::STNPWi:
-  case AArch64::STNPSi:
-  case AArch64::LDPSpost:
-  case AArch64::LDPSpre:
-  case AArch64::LDPWpost:
-  case AArch64::LDPWpre:
-  case AArch64::STPSpost:
-  case AArch64::STPSpre:
-  case AArch64::STPWpost:
-  case AArch64::STPWpre:
-    Scale = TypeSize::getFixed(4);
-    Width = TypeSize::getFixed(4 * 2);
-    MinOffset = -64;
-    MaxOffset = 63;
-    break;
-  case AArch64::StoreSwiftAsyncContext:
-    // Store is an STRXui, but there might be an ADDXri in the expansion too.
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(8);
-    MinOffset = 0;
-    MaxOffset = 4095;
-    break;
-  case AArch64::ADDG:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(0);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
-  case AArch64::TAGPstack:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(0);
-    // TAGP with a negative offset turns into SUBP, which has a maximum offset
-    // of 63 (not 64!).
-    MinOffset = -63;
-    MaxOffset = 63;
-    break;
-  case AArch64::LDG:
-  case AArch64::STGi:
-  case AArch64::STGPreIndex:
-  case AArch64::STGPostIndex:
-  case AArch64::STZGi:
-  case AArch64::STZGPreIndex:
-  case AArch64::STZGPostIndex:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(16);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  // SVE
-  case AArch64::STR_ZZZZXI:
-  case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::LDR_ZZZZXI:
-  case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16 * 4);
-    MinOffset = -256;
-    MaxOffset = 252;
-    break;
-  case AArch64::STR_ZZZXI:
-  case AArch64::LDR_ZZZXI:
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16 * 3);
-    MinOffset = -256;
-    MaxOffset = 253;
-    break;
-  case AArch64::STR_ZZXI:
-  case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
-  case AArch64::LDR_ZZXI:
-  case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16 * 2);
-    MinOffset = -256;
-    MaxOffset = 254;
-    break;
-  case AArch64::LDR_PXI:
-  case AArch64::STR_PXI:
-    Scale = TypeSize::getScalable(2);
-    Width = TypeSize::getScalable(2);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LDR_PPXI:
-  case AArch64::STR_PPXI:
-    Scale = TypeSize::getScalable(2);
-    Width = TypeSize::getScalable(2 * 2);
-    MinOffset = -256;
-    MaxOffset = 254;
-    break;
-  case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
-  case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
-  case AArch64::LDR_ZXI:
-  case AArch64::STR_ZXI:
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::LD1B_IMM:
-  case AArch64::LD1H_IMM:
-  case AArch64::LD1W_IMM:
-  case AArch64::LD1D_IMM:
-  case AArch64::LDNT1B_ZRI:
-  case AArch64::LDNT1H_ZRI:
-  case AArch64::LDNT1W_ZRI:
-  case AArch64::LDNT1D_ZRI:
-  case AArch64::ST1B_IMM:
-  case AArch64::ST1H_IMM:
-  case AArch64::ST1W_IMM:
-  case AArch64::ST1D_IMM:
-  case AArch64::STNT1B_ZRI:
-  case AArch64::STNT1H_ZRI:
-  case AArch64::STNT1W_ZRI:
-  case AArch64::STNT1D_ZRI:
-  case AArch64::LDNF1B_IMM:
-  case AArch64::LDNF1H_IMM:
-  case AArch64::LDNF1W_IMM:
-  case AArch64::LDNF1D_IMM:
-    // A full vectors worth of data
-    // Width = mbytes * elements
-    Scale = TypeSize::getScalable(16);
-    Width = TypeSize::getScalable(16);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD2B_IMM:
-  case AArch64::LD2H_IMM:
-  case AArch64::LD2W_IMM:
-  case AArch64::LD2D_IMM:
-  case AArch64::ST2B_IMM:
-  case AArch64::ST2H_IMM:
-  case AArch64::ST2W_IMM:
-  case AArch64::ST2D_IMM:
-    Scale = TypeSize::getScalable(32);
-    Width = TypeSize::getScalable(16 * 2);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD3B_IMM:
-  case AArch64::LD3H_IMM:
-  case AArch64::LD3W_IMM:
-  case AArch64::LD3D_IMM:
-  case AArch64::ST3B_IMM:
-  case AArch64::ST3H_IMM:
-  case AArch64::ST3W_IMM:
-  case AArch64::ST3D_IMM:
-    Scale = TypeSize::getScalable(48);
-    Width = TypeSize::getScalable(16 * 3);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD4B_IMM:
-  case AArch64::LD4H_IMM:
-  case AArch64::LD4W_IMM:
-  case AArch64::LD4D_IMM:
-  case AArch64::ST4B_IMM:
-  case AArch64::ST4H_IMM:
-  case AArch64::ST4W_IMM:
-  case AArch64::ST4D_IMM:
-    Scale = TypeSize::getScalable(64);
-    Width = TypeSize::getScalable(16 * 4);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD1B_H_IMM:
-  case AArch64::LD1SB_H_IMM:
-  case AArch64::LD1H_S_IMM:
-  case AArch64::LD1SH_S_IMM:
-  case AArch64::LD1W_D_IMM:
-  case AArch64::LD1SW_D_IMM:
-  case AArch64::ST1B_H_IMM:
-  case AArch64::ST1H_S_IMM:
-  case AArch64::ST1W_D_IMM:
-  case AArch64::LDNF1B_H_IMM:
-  case AArch64::LDNF1SB_H_IMM:
-  case AArch64::LDNF1H_S_IMM:
-  case AArch64::LDNF1SH_S_IMM:
-  case AArch64::LDNF1W_D_IMM:
-  case AArch64::LDNF1SW_D_IMM:
-    // A half vector worth of data
-    // Width = mbytes * elements
-    Scale = TypeSize::getScalable(8);
-    Width = TypeSize::getScalable(8);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD1B_S_IMM:
-  case AArch64::LD1SB_S_IMM:
-  case AArch64::LD1H_D_IMM:
-  case AArch64::LD1SH_D_IMM:
-  case AArch64::ST1B_S_IMM:
-  case AArch64::ST1H_D_IMM:
-  case AArch64::LDNF1B_S_IMM:
-  case AArch64::LDNF1SB_S_IMM:
-  case AArch64::LDNF1H_D_IMM:
-  case AArch64::LDNF1SH_D_IMM:
-    // A quarter vector worth of data
-    // Width = mbytes * elements
-    Scale = TypeSize::getScalable(4);
-    Width = TypeSize::getScalable(4);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::LD1B_D_IMM:
-  case AArch64::LD1SB_D_IMM:
-  case AArch64::ST1B_D_IMM:
-  case AArch64::LDNF1B_D_IMM:
-  case AArch64::LDNF1SB_D_IMM:
-    // A eighth vector worth of data
-    // Width = mbytes * elements
-    Scale = TypeSize::getScalable(2);
-    Width = TypeSize::getScalable(2);
-    MinOffset = -8;
-    MaxOffset = 7;
-    break;
-  case AArch64::ST2Gi:
-  case AArch64::ST2GPreIndex:
-  case AArch64::ST2GPostIndex:
-  case AArch64::STZ2Gi:
-  case AArch64::STZ2GPreIndex:
-  case AArch64::STZ2GPostIndex:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(32);
-    MinOffset = -256;
-    MaxOffset = 255;
-    break;
-  case AArch64::STGPi:
-  case AArch64::STGPpost:
-  case AArch64::STGPpre:
-    Scale = TypeSize::getFixed(16);
-    Width = TypeSize::getFixed(16);
-    MinOffset = -64;
-    MaxOffset = 63;
-    break;
-  case AArch64::LD1RB_IMM:
-  case AArch64::LD1RB_H_IMM:
-  case AArch64::LD1RB_S_IMM:
-  case AArch64::LD1RB_D_IMM:
-  case AArch64::LD1RSB_H_IMM:
-  case AArch64::LD1RSB_S_IMM:
-  case AArch64::LD1RSB_D_IMM:
-    Scale = TypeSize::getFixed(1);
-    Width = TypeSize::getFixed(1);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
-  case AArch64::LD1RH_IMM:
-  case AArch64::LD1RH_S_IMM:
-  case AArch64::LD1RH_D_IMM:
-  case AArch64::LD1RSH_S_IMM:
-  case AArch64::LD1RSH_D_IMM:
-    Scale = TypeSize::getFixed(2);
-    Width = TypeSize::getFixed(2);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
-  case AArch64::LD1RW_IMM:
-  case AArch64::LD1RW_D_IMM:
-  case AArch64::LD1RSW_IMM:
-    Scale = TypeSize::getFixed(4);
-    Width = TypeSize::getFixed(4);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
-  case AArch64::LD1RD_IMM:
-    Scale = TypeSize::getFixed(8);
-    Width = TypeSize::getFixed(8);
-    MinOffset = 0;
-    MaxOffset = 63;
-    break;
+  bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
+    auto IsHFPR = [&](const MachineOperand &Op) {
+      if (!Op.isReg())
+        return false;
+      auto Reg = Op.getReg();
+      if (Reg.isPhysical())
+        return AArch64::FPR16RegClass.contains(Reg);
+      const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+      return TRC == &AArch64::FPR16RegClass ||
+             TRC == &AArch64::FPR16_loRegClass;
+    };
+    return llvm::any_of(MI.operands(), IsHFPR);
   }
 
-  return true;
-}
-
-// Scaling factor for unscaled load or store.
-int AArch64InstrInfo::getMemScale(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Opcode has unknown scale!");
-  case AArch64::LDRBBui:
-  case AArch64::LDURBBi:
-  case AArch64::LDRSBWui:
-  case AArch64::LDURSBWi:
-  case AArch64::STRBBui:
-  case AArch64::STURBBi:
-    return 1;
-  case AArch64::LDRHHui:
-  case AArch64::LDURHHi:
-  case AArch64::LDRSHWui:
-  case AArch64::LDURSHWi:
-  case AArch64::STRHHui:
-  case AArch64::STURHHi:
-    return 2;
-  case AArch64::LDRSui:
-  case AArch64::LDURSi:
-  case AArch64::LDRSpre:
-  case AArch64::LDRSWui:
-  case AArch64::LDURSWi:
-  case AArch64::LDRSWpre:
-  case AArch64::LDRWpre:
-  case AArch64::LDRWui:
-  case AArch64::LDURWi:
-  case AArch64::STRSui:
-  case AArch64::STURSi:
-  case AArch64::STRSpre:
-  case AArch64::STRWui:
-  case AArch64::STURWi:
-  case AArch64::STRWpre:
-  case AArch64::LDPSi:
-  case AArch64::LDPSWi:
-  case AArch64::LDPWi:
-  case AArch64::STPSi:
-  case AArch64::STPWi:
-    return 4;
-  case AArch64::LDRDui:
-  case AArch64::LDURDi:
-  case AArch64::LDRDpre:
-  case AArch64::LDRXui:
-  case AArch64::LDURXi:
-  case AArch64::LDRXpre:
-  case AArch64::STRDui:
-  case AArch64::STURDi:
-  case AArch64::STRDpre:
-  case AArch64::STRXui:
-  case AArch64::STURXi:
-  case AArch64::STRXpre:
-  case AArch64::LDPDi:
-  case AArch64::LDPXi:
-  case AArch64::STPDi:
-  case AArch64::STPXi:
-    return 8;
-  case AArch64::LDRQui:
-  case AArch64::LDURQi:
-  case AArch64::STRQui:
-  case AArch64::STURQi:
-  case AArch64::STRQpre:
-  case AArch64::LDPQi:
-  case AArch64::LDRQpre:
-  case AArch64::STPQi:
-  case AArch64::STGi:
-  case AArch64::STZGi:
-  case AArch64::ST2Gi:
-  case AArch64::STZ2Gi:
-  case AArch64::STGPi:
-    return 16;
-  }
-}
-
-bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case AArch64::LDRWpre:
-  case AArch64::LDRXpre:
-  case AArch64::LDRSWpre:
-  case AArch64::LDRSpre:
-  case AArch64::LDRDpre:
-  case AArch64::LDRQpre:
-    return true;
+  bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
+    auto IsQFPR = [&](const MachineOperand &Op) {
+      if (!Op.isReg())
+        return false;
+      auto Reg = Op.getReg();
+      if (Reg.isPhysical())
+        return AArch64::FPR128RegClass.contains(Reg);
+      const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+      return TRC == &AArch64::FPR128RegClass ||
+             TRC == &AArch64::FPR128_loRegClass;
+    };
+    return llvm::any_of(MI.operands(), IsQFPR);
   }
-}
 
-bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case AArch64::STRWpre:
-  case AArch64::STRXpre:
-  case AArch64::STRSpre:
-  case AArch64::STRDpre:
-  case AArch64::STRQpre:
-    return true;
+  bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    case AArch64::BRK:
+    case AArch64::HLT:
+    case AArch64::PACIASP:
+    case AArch64::PACIBSP:
+      // Implicit BTI behavior.
+      return true;
+    case AArch64::PAUTH_PROLOGUE:
+      // PAUTH_PROLOGUE expands to PACI(A|B)SP.
+      return true;
+    case AArch64::HINT: {
+      unsigned Imm = MI.getOperand(0).getImm();
+      // Explicit BTI instruction.
+      if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
+        return true;
+      // PACI(A|B)SP instructions.
+      if (Imm == 25 || Imm == 27)
+        return true;
+      return false;
+    }
+    default:
+      return false;
+    }
   }
-}
 
-bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
-  return isPreLd(MI) || isPreSt(MI);
-}
-
-bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case AArch64::LDPSi:
-  case AArch64::LDPSWi:
-  case AArch64::LDPDi:
-  case AArch64::LDPQi:
-  case AArch64::LDPWi:
-  case AArch64::LDPXi:
-  case AArch64::STPSi:
-  case AArch64::STPDi:
-  case AArch64::STPQi:
-  case AArch64::STPWi:
-  case AArch64::STPXi:
-  case AArch64::STGPi:
-    return true;
+  bool AArch64InstrInfo::isFpOrNEON(Register Reg) {
+    if (Reg == 0)
+      return false;
+    assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
+    return AArch64::FPR128RegClass.contains(Reg) ||
+           AArch64::FPR64RegClass.contains(Reg) ||
+           AArch64::FPR32RegClass.contains(Reg) ||
+           AArch64::FPR16RegClass.contains(Reg) ||
+           AArch64::FPR8RegClass.contains(Reg);
   }
-}
-
-const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
-  assert(MI.mayLoadOrStore() && "Load or store instruction expected");
-  unsigned Idx =
-      AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2
-                                                                            : 1;
-  return MI.getOperand(Idx);
-}
-
-const MachineOperand &
-AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
-  assert(MI.mayLoadOrStore() && "Load or store instruction expected");
-  unsigned Idx =
-      AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3
-                                                                            : 2;
-  return MI.getOperand(Idx);
-}
 
-const MachineOperand &
-AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    llvm_unreachable("Unexpected opcode");
-  case AArch64::LDRBroX:
-  case AArch64::LDRBBroX:
-  case AArch64::LDRSBXroX:
-  case AArch64::LDRSBWroX:
-  case AArch64::LDRHroX:
-  case AArch64::LDRHHroX:
-  case AArch64::LDRSHXroX:
-  case AArch64::LDRSHWroX:
-  case AArch64::LDRWroX:
-  case AArch64::LDRSroX:
-  case AArch64::LDRSWroX:
-  case AArch64::LDRDroX:
-  case AArch64::LDRXroX:
-  case AArch64::LDRQroX:
-    return MI.getOperand(4);
+  bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
+    auto IsFPR = [&](const MachineOperand &Op) {
+      if (!Op.isReg())
+        return false;
+      auto Reg = Op.getReg();
+      if (Reg.isPhysical())
+        return isFpOrNEON(Reg);
+
+      const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+      return TRC == &AArch64::FPR128RegClass ||
+             TRC == &AArch64::FPR128_loRegClass ||
+             TRC == &AArch64::FPR64RegClass ||
+             TRC == &AArch64::FPR64_loRegClass ||
+             TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
+             TRC == &AArch64::FPR8RegClass;
+    };
+    return llvm::any_of(MI.operands(), IsFPR);
   }
-}
-
-static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
-                                              Register Reg) {
-  if (MI.getParent() == nullptr)
-    return nullptr;
-  const MachineFunction *MF = MI.getParent()->getParent();
-  return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
-}
 
-bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
-  auto IsHFPR = [&](const MachineOperand &Op) {
-    if (!Op.isReg())
-      return false;
-    auto Reg = Op.getReg();
-    if (Reg.isPhysical())
-      return AArch64::FPR16RegClass.contains(Reg);
-    const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
-    return TRC == &AArch64::FPR16RegClass ||
-           TRC == &AArch64::FPR16_loRegClass;
-  };
-  return llvm::any_of(MI.operands(), IsHFPR);
-}
+  // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
+  // scaled.
+  static bool scaleOffset(unsigned Opc, int64_t &Offset) {
+    int Scale = AArch64InstrInfo::getMemScale(Opc);
 
-bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
-  auto IsQFPR = [&](const MachineOperand &Op) {
-    if (!Op.isReg())
+    // If the byte-offset isn't a multiple of the stride, we can't scale this
+    // offset.
+    if (Offset % Scale != 0)
       return false;
-    auto Reg = Op.getReg();
-    if (Reg.isPhysical())
-      return AArch64::FPR128RegClass.contains(Reg);
-    const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
-    return TRC == &AArch64::FPR128RegClass ||
-           TRC == &AArch64::FPR128_loRegClass;
-  };
-  return llvm::any_of(MI.operands(), IsQFPR);
-}
 
-bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case AArch64::BRK:
-  case AArch64::HLT:
-  case AArch64::PACIASP:
-  case AArch64::PACIBSP:
-    // Implicit BTI behavior.
+    // Convert the byte-offset used by unscaled into an "element" offset used
+    // by the scaled pair load/store instructions.
+    Offset /= Scale;
     return true;
-  case AArch64::PAUTH_PROLOGUE:
-    // PAUTH_PROLOGUE expands to PACI(A|B)SP.
-    return true;
-  case AArch64::HINT: {
-    unsigned Imm = MI.getOperand(0).getImm();
-    // Explicit BTI instruction.
-    if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
-      return true;
-    // PACI(A|B)SP instructions.
-    if (Imm == 25 || Imm == 27)
-      return true;
-    return false;
-  }
-  default:
-    return false;
   }
-}
-
-bool AArch64InstrInfo::isFpOrNEON(Register Reg) {
-  if (Reg == 0)
-    return false;
-  assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
-  return AArch64::FPR128RegClass.contains(Reg) ||
-         AArch64::FPR64RegClass.contains(Reg) ||
-         AArch64::FPR32RegClass.contains(Reg) ||
-         AArch64::FPR16RegClass.contains(Reg) ||
-         AArch64::FPR8RegClass.contains(Reg);
-}
 
-bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
-  auto IsFPR = [&](const MachineOperand &Op) {
-    if (!Op.isReg())
+  static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
+    if (FirstOpc == SecondOpc)
+      return true;
+    // We can also pair sign-ext and zero-ext instructions.
+    switch (FirstOpc) {
+    default:
       return false;
-    auto Reg = Op.getReg();
-    if (Reg.isPhysical())
-      return isFpOrNEON(Reg);
-
-    const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
-    return TRC == &AArch64::FPR128RegClass ||
-           TRC == &AArch64::FPR128_loRegClass ||
-           TRC == &AArch64::FPR64RegClass ||
-           TRC == &AArch64::FPR64_loRegClass ||
-           TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
-           TRC == &AArch64::FPR8RegClass;
-  };
-  return llvm::any_of(MI.operands(), IsFPR);
-}
-
-// Scale the unscaled offsets.  Returns false if the unscaled offset can't be
-// scaled.
-static bool scaleOffset(unsigned Opc, int64_t &Offset) {
-  int Scale = AArch64InstrInfo::getMemScale(Opc);
-
-  // If the byte-offset isn't a multiple of the stride, we can't scale this
-  // offset.
-  if (Offset % Scale != 0)
+    case AArch64::STRSui:
+    case AArch64::STURSi:
+      return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
+    case AArch64::STRDui:
+    case AArch64::STURDi:
+      return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
+    case AArch64::STRQui:
+    case AArch64::STURQi:
+      return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
+    case AArch64::STRWui:
+    case AArch64::STURWi:
+      return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
+    case AArch64::STRXui:
+    case AArch64::STURXi:
+      return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
+    case AArch64::LDRSui:
+    case AArch64::LDURSi:
+      return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
+    case AArch64::LDRDui:
+    case AArch64::LDURDi:
+      return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
+    case AArch64::LDRQui:
+    case AArch64::LDURQi:
+      return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
+    case AArch64::LDRWui:
+    case AArch64::LDURWi:
+      return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
+    case AArch64::LDRSWui:
+    case AArch64::LDURSWi:
+      return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
+    case AArch64::LDRXui:
+    case AArch64::LDURXi:
+      return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
+    }
+    // These instructions can't be paired based on their opcodes.
     return false;
+  }
 
-  // Convert the byte-offset used by unscaled into an "element" offset used
-  // by the scaled pair load/store instructions.
-  Offset /= Scale;
-  return true;
-}
-
-static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
-  if (FirstOpc == SecondOpc)
-    return true;
-  // We can also pair sign-ext and zero-ext instructions.
-  switch (FirstOpc) {
-  default:
-    return false;
-  case AArch64::STRSui:
-  case AArch64::STURSi:
-    return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
-  case AArch64::STRDui:
-  case AArch64::STURDi:
-    return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
-  case AArch64::STRQui:
-  case AArch64::STURQi:
-    return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
-  case AArch64::STRWui:
-  case AArch64::STURWi:
-    return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
-  case AArch64::STRXui:
-  case AArch64::STURXi:
-    return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
-  case AArch64::LDRSui:
-  case AArch64::LDURSi:
-    return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
-  case AArch64::LDRDui:
-  case AArch64::LDURDi:
-    return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
-  case AArch64::LDRQui:
-  case AArch64::LDURQi:
-    return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
-  case AArch64::LDRWui:
-  case AArch64::LDURWi:
-    return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
-  case AArch64::LDRSWui:
-  case AArch64::LDURSWi:
-    return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
-  case AArch64::LDRXui:
-  case AArch64::LDURXi:
-    return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
-  }
-  // These instructions can't be paired based on their opcodes.
-  return false;
-}
+  static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
+                              int64_t Offset1, unsigned Opcode1, int FI2,
+                              int64_t Offset2, unsigned Opcode2) {
+    // Accesses through fixed stack object frame indices may access a different
+    // fixed stack slot. Check that the object offsets + offsets match.
+    if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
+      int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
+      int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
+      assert(ObjectOffset1 <= ObjectOffset2 &&
+             "Object offsets are not ordered.");
+      // Convert to scaled object offsets.
+      int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
+      if (ObjectOffset1 % Scale1 != 0)
+        return false;
+      ObjectOffset1 /= Scale1;
+      int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
+      if (ObjectOffset2 % Scale2 != 0)
+        return false;
+      ObjectOffset2 /= Scale2;
+      ObjectOffset1 += Offset1;
+      ObjectOffset2 += Offset2;
+      return ObjectOffset1 + 1 == ObjectOffset2;
+    }
 
-static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
-                            int64_t Offset1, unsigned Opcode1, int FI2,
-                            int64_t Offset2, unsigned Opcode2) {
-  // Accesses through fixed stack object frame indices may access a different
-  // fixed stack slot. Check that the object offsets + offsets match.
-  if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
-    int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
-    int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
-    assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
-    // Convert to scaled object offsets.
-    int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
-    if (ObjectOffset1 % Scale1 != 0)
+    return FI1 == FI2;
+  }
+
+  /// Detect opportunities for ldp/stp formation.
+  ///
+  /// Only called for LdSt for which getMemOperandWithOffset returns true.
+  bool AArch64InstrInfo::shouldClusterMemOps(
+      ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
+      bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+      int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
+      unsigned NumBytes) const {
+    assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
+    const MachineOperand &BaseOp1 = *BaseOps1.front();
+    const MachineOperand &BaseOp2 = *BaseOps2.front();
+    const MachineInstr &FirstLdSt = *BaseOp1.getParent();
+    const MachineInstr &SecondLdSt = *BaseOp2.getParent();
+    if (BaseOp1.getType() != BaseOp2.getType())
       return false;
-    ObjectOffset1 /= Scale1;
-    int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
-    if (ObjectOffset2 % Scale2 != 0)
-      return false;
-    ObjectOffset2 /= Scale2;
-    ObjectOffset1 += Offset1;
-    ObjectOffset2 += Offset2;
-    return ObjectOffset1 + 1 == ObjectOffset2;
-  }
 
-  return FI1 == FI2;
-}
+    assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
+           "Only base registers and frame indices are supported.");
 
-/// Detect opportunities for ldp/stp formation.
-///
-/// Only called for LdSt for which getMemOperandWithOffset returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(
-    ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
-    bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
-    int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
-    unsigned NumBytes) const {
-  assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
-  const MachineOperand &BaseOp1 = *BaseOps1.front();
-  const MachineOperand &BaseOp2 = *BaseOps2.front();
-  const MachineInstr &FirstLdSt = *BaseOp1.getParent();
-  const MachineInstr &SecondLdSt = *BaseOp2.getParent();
-  if (BaseOp1.getType() != BaseOp2.getType())
-    return false;
+    // Check for both base regs and base FI.
+    if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
+      return false;
 
-  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
-         "Only base registers and frame indices are supported.");
+    // Only cluster up to a single pair.
+    if (ClusterSize > 2)
+      return false;
 
-  // Check for both base regs and base FI.
-  if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
-    return false;
+    if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
+      return false;
 
-  // Only cluster up to a single pair.
-  if (ClusterSize > 2)
-    return false;
+    // Can we pair these instructions based on their opcodes?
+    unsigned FirstOpc = FirstLdSt.getOpcode();
+    unsigned SecondOpc = SecondLdSt.getOpcode();
+    if (!canPairLdStOpc(FirstOpc, SecondOpc))
+      return false;
 
-  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
-    return false;
+    // Can't merge volatiles or load/stores that have a hint to avoid pair
+    // formation, for example.
+    if (!isCandidateToMergeOrPair(FirstLdSt) ||
+        !isCandidateToMergeOrPair(SecondLdSt))
+      return false;
 
-  // Can we pair these instructions based on their opcodes?
-  unsigned FirstOpc = FirstLdSt.getOpcode();
-  unsigned SecondOpc = SecondLdSt.getOpcode();
-  if (!canPairLdStOpc(FirstOpc, SecondOpc))
-    return false;
+    // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
+    int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
+    if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
+      return false;
 
-  // Can't merge volatiles or load/stores that have a hint to avoid pair
-  // formation, for example.
-  if (!isCandidateToMergeOrPair(FirstLdSt) ||
-      !isCandidateToMergeOrPair(SecondLdSt))
-    return false;
+    int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
+    if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
+      return false;
 
-  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
-  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
-  if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
-    return false;
+    // Pairwise instructions have a 7-bit signed offset field.
+    if (Offset1 > 63 || Offset1 < -64)
+      return false;
 
-  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
-  if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
-    return false;
+    // The caller should already have ordered First/SecondLdSt by offset.
+    // Note: except for non-equal frame index bases
+    if (BaseOp1.isFI()) {
+      assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
+             "Caller should have ordered offsets.");
 
-  // Pairwise instructions have a 7-bit signed offset field.
-  if (Offset1 > 63 || Offset1 < -64)
-    return false;
+      const MachineFrameInfo &MFI =
+          FirstLdSt.getParent()->getParent()->getFrameInfo();
+      return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
+                             BaseOp2.getIndex(), Offset2, SecondOpc);
+    }
 
-  // The caller should already have ordered First/SecondLdSt by offset.
-  // Note: except for non-equal frame index bases
-  if (BaseOp1.isFI()) {
-    assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
-           "Caller should have ordered offsets.");
+    assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
 
-    const MachineFrameInfo &MFI =
-        FirstLdSt.getParent()->getParent()->getFrameInfo();
-    return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
-                           BaseOp2.getIndex(), Offset2, SecondOpc);
+    return Offset1 + 1 == Offset2;
   }
 
-  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
-
-  return Offset1 + 1 == Offset2;
-}
-
-static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
-                                            MCRegister Reg, unsigned SubIdx,
-                                            unsigned State,
-                                            const TargetRegisterInfo *TRI) {
-  if (!SubIdx)
-    return MIB.addReg(Reg, State);
-
-  if (Reg.isPhysical())
-    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
-  return MIB.addReg(Reg, State, SubIdx);
-}
-
-static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
-                                        unsigned NumRegs) {
-  // We really want the positive remainder mod 32 here, that happens to be
-  // easily obtainable with a mask.
-  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
-}
+  static const MachineInstrBuilder &AddSubReg(
+      const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx,
+      unsigned State, const TargetRegisterInfo *TRI) {
+    if (!SubIdx)
+      return MIB.addReg(Reg, State);
 
-void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I,
-                                        const DebugLoc &DL, MCRegister DestReg,
-                                        MCRegister SrcReg, bool KillSrc,
-                                        unsigned Opcode,
-                                        ArrayRef<unsigned> Indices) const {
-  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
-  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
-  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
-  unsigned NumRegs = Indices.size();
+    if (Reg.isPhysical())
+      return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+    return MIB.addReg(Reg, State, SubIdx);
+  }
 
-  int SubReg = 0, End = NumRegs, Incr = 1;
-  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
-    SubReg = NumRegs - 1;
-    End = -1;
-    Incr = -1;
+  static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
+                                          unsigned NumRegs) {
+    // We really want the positive remainder mod 32 here, that happens to be
+    // easily obtainable with a mask.
+    return ((DestReg - SrcReg) & 0x1f) < NumRegs;
   }
 
-  for (; SubReg != End; SubReg += Incr) {
-    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
-    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
-    AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
-    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+  void AArch64InstrInfo::copyPhysRegTuple(
+      MachineBasicBlock & MBB, MachineBasicBlock::iterator I,
+      const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+      unsigned Opcode, ArrayRef<unsigned> Indices) const {
+    assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
+    const TargetRegisterInfo *TRI = &getRegisterInfo();
+    uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
+    uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
+    unsigned NumRegs = Indices.size();
+
+    int SubReg = 0, End = NumRegs, Incr = 1;
+    if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
+      SubReg = NumRegs - 1;
+      End = -1;
+      Incr = -1;
+    }
+
+    for (; SubReg != End; SubReg += Incr) {
+      const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
+      AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
+      AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
+      AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+    }
   }
-}
 
-void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator I,
-                                       const DebugLoc &DL, MCRegister DestReg,
-                                       MCRegister SrcReg, bool KillSrc,
-                                       unsigned Opcode, unsigned ZeroReg,
-                                       llvm::ArrayRef<unsigned> Indices) const {
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
-  unsigned NumRegs = Indices.size();
+  void AArch64InstrInfo::copyGPRRegTuple(
+      MachineBasicBlock & MBB, MachineBasicBlock::iterator I,
+      const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+      unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef<unsigned> Indices)
+      const {
+    const TargetRegisterInfo *TRI = &getRegisterInfo();
+    unsigned NumRegs = Indices.size();
 
 #ifndef NDEBUG
   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);

``````````

</details>


https://github.com/llvm/llvm-project/pull/150803


More information about the llvm-commits mailing list