[llvm] [ARM][AArch64] Allow the CSE to take into consideration uses of the carry and overflow flags in ARM and AArch64 (PR #150803)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 26 18:06:18 PDT 2025
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index feb7d9695..6529aa277 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1915,3152 +1915,3182 @@ static bool isANDSOpcode(MachineInstr &MI) {
return true;
default:
return false;
-}
-
-/// Check if CmpInstr can be substituted by MI.
-///
-/// CmpInstr can be substituted:
-/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
-/// - and, MI and CmpInstr are from the same MachineBB
-/// - and, condition flags are not alive in successors of the CmpInstr parent
-/// - and, if MI opcode is the S form there must be no defs of flags between
-/// MI and CmpInstr
-/// or if MI opcode is not the S form there must be neither defs of flags
-/// nor uses of flags between MI and CmpInstr.
-/// - and, if C/V flags are not used after CmpInstr
-/// or if N flag is used but MI produces poison value if signed overflow
-/// occurs.
-static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
- const TargetRegisterInfo &TRI) {
- // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
- // that may or may not set flags.
- assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
-
- const unsigned CmpOpcode = CmpInstr.getOpcode();
- if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
- return false;
+ }
- assert((CmpInstr.getOperand(2).isImm() &&
- CmpInstr.getOperand(2).getImm() == 0) &&
- "Caller guarantees that CmpInstr compares with constant 0");
+ /// Check if CmpInstr can be substituted by MI.
+ ///
+ /// CmpInstr can be substituted:
+ /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
+ /// - and, MI and CmpInstr are from the same MachineBB
+ /// - and, condition flags are not alive in successors of the CmpInstr parent
+ /// - and, if MI opcode is the S form there must be no defs of flags between
+ /// MI and CmpInstr
+ /// or if MI opcode is not the S form there must be neither defs of
+ /// flags nor uses of flags between MI and CmpInstr.
+ /// - and, if C/V flags are not used after CmpInstr
+ /// or if N flag is used but MI produces poison value if signed
+ /// overflow occurs.
+ static bool canInstrSubstituteCmpInstr(MachineInstr & MI,
+ MachineInstr & CmpInstr,
+ const TargetRegisterInfo &TRI) {
+ // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
+ // that may or may not set flags.
+ assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
+
+ const unsigned CmpOpcode = CmpInstr.getOpcode();
+ if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
+ return false;
- std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
- if (!NZVCUsed)
- return false;
+ assert((CmpInstr.getOperand(2).isImm() &&
+ CmpInstr.getOperand(2).getImm() == 0) &&
+ "Caller guarantees that CmpInstr compares with constant 0");
- // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
- // '%vreg = add ...' or '%vreg = sub ...'.
- // Condition flag C is used to indicate unsigned overflow.
- // 1) MI and CmpInstr set N and C to the same value if Cmp is an adds
- // 2) ADDS x, 0, always sets C to 0.
- // In practice we should not really get here, as an unsigned comparison with 0
- // should have been optimized out anyway, but just in case.
- if (NZVCUsed->C && !isADDSRegImm(CmpOpcode))
- return false;
+ std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
+ if (!NZVCUsed)
+ return false;
- // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
- // '%vreg = add ...' or '%vreg = sub ...'.
- // Condition flag V is used to indicate signed overflow.
- // 1) MI and CmpInstr set N and V to the same value.
- // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
- // signed overflow occurs, so CmpInstr could still be simplified away.
- // 3) ANDS also always sets V to 0.
- if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDSOpcode(MI))
- return false;
+ // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
+ // '%vreg = add ...' or '%vreg = sub ...'.
+ // Condition flag C is used to indicate unsigned overflow.
+ // 1) MI and CmpInstr set N and C to the same value if Cmp is an adds
+ // 2) ADDS x, 0, always sets C to 0.
+ // In practice we should not really get here, as an unsigned comparison with
+ // 0 should have been optimized out anyway, but just in case.
+ if (NZVCUsed->C && !isADDSRegImm(CmpOpcode))
+ return false;
- AccessKind AccessToCheck = AK_Write;
- if (sForm(MI) != MI.getOpcode())
- AccessToCheck = AK_All;
- return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
-}
+ // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
+ // '%vreg = add ...' or '%vreg = sub ...'.
+ // Condition flag V is used to indicate signed overflow.
+ // 1) MI and CmpInstr set N and V to the same value.
+ // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
+ // signed overflow occurs, so CmpInstr could still be simplified away.
+ // 3) ANDS also always sets V to 0.
+ if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDSOpcode(MI))
+ return false;
-/// Substitute an instruction comparing to zero with another instruction
-/// which produces needed condition flags.
-///
-/// Return true on success.
-bool AArch64InstrInfo::substituteCmpToZero(
- MachineInstr &CmpInstr, unsigned SrcReg,
- const MachineRegisterInfo &MRI) const {
- // Get the unique definition of SrcReg.
- MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
- if (!MI)
- return false;
+ AccessKind AccessToCheck = AK_Write;
+ if (sForm(MI) != MI.getOpcode())
+ AccessToCheck = AK_All;
+ return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
+ }
+
+ /// Substitute an instruction comparing to zero with another instruction
+ /// which produces needed condition flags.
+ ///
+ /// Return true on success.
+ bool AArch64InstrInfo::substituteCmpToZero(
+ MachineInstr & CmpInstr, unsigned SrcReg, const MachineRegisterInfo &MRI)
+ const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
+ if (!MI)
+ return false;
- const TargetRegisterInfo &TRI = getRegisterInfo();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
- unsigned NewOpc = sForm(*MI);
- if (NewOpc == AArch64::INSTRUCTION_LIST_END)
- return false;
+ unsigned NewOpc = sForm(*MI);
+ if (NewOpc == AArch64::INSTRUCTION_LIST_END)
+ return false;
- if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
- return false;
+ if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
+ return false;
- // Update the instruction to set NZCV.
- MI->setDesc(get(NewOpc));
- CmpInstr.eraseFromParent();
- bool succeeded = UpdateOperandRegClass(*MI);
- (void)succeeded;
- assert(succeeded && "Some operands reg class are incompatible!");
- MI->addRegisterDefined(AArch64::NZCV, &TRI);
- return true;
-}
+ // Update the instruction to set NZCV.
+ MI->setDesc(get(NewOpc));
+ CmpInstr.eraseFromParent();
+ bool succeeded = UpdateOperandRegClass(*MI);
+ (void)succeeded;
+ assert(succeeded && "Some operands reg class are incompatible!");
+ MI->addRegisterDefined(AArch64::NZCV, &TRI);
+ return true;
+ }
-/// \returns True if \p CmpInstr can be removed.
-///
-/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
-/// codes used in \p CCUseInstrs must be inverted.
-static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
- int CmpValue, const TargetRegisterInfo &TRI,
- SmallVectorImpl<MachineInstr *> &CCUseInstrs,
- bool &IsInvertCC) {
- assert((CmpValue == 0 || CmpValue == 1) &&
- "Only comparisons to 0 or 1 considered for removal!");
-
- // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
- unsigned MIOpc = MI.getOpcode();
- if (MIOpc == AArch64::CSINCWr) {
- if (MI.getOperand(1).getReg() != AArch64::WZR ||
- MI.getOperand(2).getReg() != AArch64::WZR)
+ /// \returns True if \p CmpInstr can be removed.
+ ///
+ /// \p IsInvertCC is true if, after removing \p CmpInstr, condition
+ /// codes used in \p CCUseInstrs must be inverted.
+ static bool canCmpInstrBeRemoved(MachineInstr & MI, MachineInstr & CmpInstr,
+ int CmpValue, const TargetRegisterInfo &TRI,
+ SmallVectorImpl<MachineInstr *> &CCUseInstrs,
+ bool &IsInvertCC) {
+ assert((CmpValue == 0 || CmpValue == 1) &&
+ "Only comparisons to 0 or 1 considered for removal!");
+
+ // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
+ unsigned MIOpc = MI.getOpcode();
+ if (MIOpc == AArch64::CSINCWr) {
+ if (MI.getOperand(1).getReg() != AArch64::WZR ||
+ MI.getOperand(2).getReg() != AArch64::WZR)
+ return false;
+ } else if (MIOpc == AArch64::CSINCXr) {
+ if (MI.getOperand(1).getReg() != AArch64::XZR ||
+ MI.getOperand(2).getReg() != AArch64::XZR)
+ return false;
+ } else {
return false;
- } else if (MIOpc == AArch64::CSINCXr) {
- if (MI.getOperand(1).getReg() != AArch64::XZR ||
- MI.getOperand(2).getReg() != AArch64::XZR)
+ }
+ AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
+ if (MICC == AArch64CC::Invalid)
return false;
- } else {
- return false;
- }
- AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
- if (MICC == AArch64CC::Invalid)
- return false;
- // NZCV needs to be defined
- if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
- return false;
+ // NZCV needs to be defined
+ if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) !=
+ -1)
+ return false;
- // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
- const unsigned CmpOpcode = CmpInstr.getOpcode();
- bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
- if (CmpValue && !IsSubsRegImm)
- return false;
- if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
- return false;
+ // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
+ const unsigned CmpOpcode = CmpInstr.getOpcode();
+ bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
+ if (CmpValue && !IsSubsRegImm)
+ return false;
+ if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
+ return false;
- // MI conditions allowed: eq, ne, mi, pl
- UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
- if (MIUsedNZCV.C || MIUsedNZCV.V)
- return false;
+ // MI conditions allowed: eq, ne, mi, pl
+ UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
+ if (MIUsedNZCV.C || MIUsedNZCV.V)
+ return false;
- std::optional<UsedNZCV> NZCVUsedAfterCmp =
- examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
- // Condition flags are not used in CmpInstr basic block successors and only
- // Z or N flags allowed to be used after CmpInstr within its basic block
- if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
- return false;
- // Z or N flag used after CmpInstr must correspond to the flag used in MI
- if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
- (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
- return false;
- // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
- if (MIUsedNZCV.N && !CmpValue)
- return false;
+ std::optional<UsedNZCV> NZCVUsedAfterCmp =
+ examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
+ // Condition flags are not used in CmpInstr basic block successors and only
+ // Z or N flags allowed to be used after CmpInstr within its basic block
+ if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
+ return false;
+ // Z or N flag used after CmpInstr must correspond to the flag used in MI
+ if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
+ (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
+ return false;
+ // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
+ if (MIUsedNZCV.N && !CmpValue)
+ return false;
- // There must be no defs of flags between MI and CmpInstr
- if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
- return false;
+ // There must be no defs of flags between MI and CmpInstr
+ if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
+ return false;
- // Condition code is inverted in the following cases:
- // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
- // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
- IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
- (!CmpValue && MICC == AArch64CC::NE);
- return true;
-}
+ // Condition code is inverted in the following cases:
+ // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
+ // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
+ IsInvertCC =
+ (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
+ (!CmpValue && MICC == AArch64CC::NE);
+ return true;
+ }
-/// Remove comparison in csinc-cmp sequence
-///
-/// Examples:
-/// 1. \code
-/// csinc w9, wzr, wzr, ne
-/// cmp w9, #0
-/// b.eq
-/// \endcode
-/// to
-/// \code
-/// csinc w9, wzr, wzr, ne
-/// b.ne
-/// \endcode
-///
-/// 2. \code
-/// csinc x2, xzr, xzr, mi
-/// cmp x2, #1
-/// b.pl
-/// \endcode
-/// to
-/// \code
-/// csinc x2, xzr, xzr, mi
-/// b.pl
-/// \endcode
-///
-/// \param CmpInstr comparison instruction
-/// \return True when comparison removed
-bool AArch64InstrInfo::removeCmpToZeroOrOne(
- MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
- const MachineRegisterInfo &MRI) const {
- MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
- if (!MI)
- return false;
- const TargetRegisterInfo &TRI = getRegisterInfo();
- SmallVector<MachineInstr *, 4> CCUseInstrs;
- bool IsInvertCC = false;
- if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
- IsInvertCC))
- return false;
- // Make transformation
- CmpInstr.eraseFromParent();
- if (IsInvertCC) {
- // Invert condition codes in CmpInstr CC users
- for (MachineInstr *CCUseInstr : CCUseInstrs) {
- int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
- assert(Idx >= 0 && "Unexpected instruction using CC.");
- MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
- AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
- static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
- CCOperand.setImm(CCUse);
+ /// Remove comparison in csinc-cmp sequence
+ ///
+ /// Examples:
+ /// 1. \code
+ /// csinc w9, wzr, wzr, ne
+ /// cmp w9, #0
+ /// b.eq
+ /// \endcode
+ /// to
+ /// \code
+ /// csinc w9, wzr, wzr, ne
+ /// b.ne
+ /// \endcode
+ ///
+ /// 2. \code
+ /// csinc x2, xzr, xzr, mi
+ /// cmp x2, #1
+ /// b.pl
+ /// \endcode
+ /// to
+ /// \code
+ /// csinc x2, xzr, xzr, mi
+ /// b.pl
+ /// \endcode
+ ///
+ /// \param CmpInstr comparison instruction
+ /// \return True when comparison removed
+ bool AArch64InstrInfo::removeCmpToZeroOrOne(
+ MachineInstr & CmpInstr, unsigned SrcReg, int CmpValue,
+ const MachineRegisterInfo &MRI) const {
+ MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
+ if (!MI)
+ return false;
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+ SmallVector<MachineInstr *, 4> CCUseInstrs;
+ bool IsInvertCC = false;
+ if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
+ IsInvertCC))
+ return false;
+ // Make transformation
+ CmpInstr.eraseFromParent();
+ if (IsInvertCC) {
+ // Invert condition codes in CmpInstr CC users
+ for (MachineInstr *CCUseInstr : CCUseInstrs) {
+ int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
+ assert(Idx >= 0 && "Unexpected instruction using CC.");
+ MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
+ AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
+ static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
+ CCOperand.setImm(CCUse);
+ }
}
+ return true;
}
- return true;
-}
-bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
- if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
- MI.getOpcode() != AArch64::CATCHRET)
- return false;
+ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr & MI) const {
+ if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
+ MI.getOpcode() != AArch64::CATCHRET)
+ return false;
- MachineBasicBlock &MBB = *MI.getParent();
- auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
- auto TRI = Subtarget.getRegisterInfo();
- DebugLoc DL = MI.getDebugLoc();
-
- if (MI.getOpcode() == AArch64::CATCHRET) {
- // Skip to the first instruction before the epilog.
- const TargetInstrInfo *TII =
- MBB.getParent()->getSubtarget().getInstrInfo();
- MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
- auto MBBI = MachineBasicBlock::iterator(MI);
- MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
- while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
- FirstEpilogSEH != MBB.begin())
- FirstEpilogSEH = std::prev(FirstEpilogSEH);
- if (FirstEpilogSEH != MBB.begin())
- FirstEpilogSEH = std::next(FirstEpilogSEH);
- BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
- .addReg(AArch64::X0, RegState::Define)
- .addMBB(TargetMBB);
- BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
- .addReg(AArch64::X0, RegState::Define)
- .addReg(AArch64::X0)
- .addMBB(TargetMBB)
- .addImm(0);
- TargetMBB->setMachineBlockAddressTaken();
- return true;
- }
+ MachineBasicBlock &MBB = *MI.getParent();
+ auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
+ auto TRI = Subtarget.getRegisterInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ if (MI.getOpcode() == AArch64::CATCHRET) {
+ // Skip to the first instruction before the epilog.
+ const TargetInstrInfo *TII =
+ MBB.getParent()->getSubtarget().getInstrInfo();
+ MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
+ auto MBBI = MachineBasicBlock::iterator(MI);
+ MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
+ while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
+ FirstEpilogSEH != MBB.begin())
+ FirstEpilogSEH = std::prev(FirstEpilogSEH);
+ if (FirstEpilogSEH != MBB.begin())
+ FirstEpilogSEH = std::next(FirstEpilogSEH);
+ BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
+ .addReg(AArch64::X0, RegState::Define)
+ .addMBB(TargetMBB);
+ BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
+ .addReg(AArch64::X0, RegState::Define)
+ .addReg(AArch64::X0)
+ .addMBB(TargetMBB)
+ .addImm(0);
+ TargetMBB->setMachineBlockAddressTaken();
+ return true;
+ }
- Register Reg = MI.getOperand(0).getReg();
- Module &M = *MBB.getParent()->getFunction().getParent();
- if (M.getStackProtectorGuard() == "sysreg") {
- const AArch64SysReg::SysReg *SrcReg =
- AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
- if (!SrcReg)
- report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
-
- // mrs xN, sysreg
- BuildMI(MBB, MI, DL, get(AArch64::MRS))
- .addDef(Reg, RegState::Renamable)
- .addImm(SrcReg->Encoding);
- int Offset = M.getStackProtectorGuardOffset();
- if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
- // ldr xN, [xN, #offset]
- BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
- .addDef(Reg)
- .addUse(Reg, RegState::Kill)
- .addImm(Offset / 8);
- } else if (Offset >= -256 && Offset <= 255) {
- // ldur xN, [xN, #offset]
- BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
- .addDef(Reg)
- .addUse(Reg, RegState::Kill)
- .addImm(Offset);
- } else if (Offset >= -4095 && Offset <= 4095) {
- if (Offset > 0) {
- // add xN, xN, #offset
- BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
+ Register Reg = MI.getOperand(0).getReg();
+ Module &M = *MBB.getParent()->getFunction().getParent();
+ if (M.getStackProtectorGuard() == "sysreg") {
+ const AArch64SysReg::SysReg *SrcReg =
+ AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
+ if (!SrcReg)
+ report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
+
+ // mrs xN, sysreg
+ BuildMI(MBB, MI, DL, get(AArch64::MRS))
+ .addDef(Reg, RegState::Renamable)
+ .addImm(SrcReg->Encoding);
+ int Offset = M.getStackProtectorGuardOffset();
+ if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
+ // ldr xN, [xN, #offset]
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
.addDef(Reg)
.addUse(Reg, RegState::Kill)
- .addImm(Offset)
- .addImm(0);
- } else {
- // sub xN, xN, #offset
- BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
+ .addImm(Offset / 8);
+ } else if (Offset >= -256 && Offset <= 255) {
+ // ldur xN, [xN, #offset]
+ BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
+ .addDef(Reg)
+ .addUse(Reg, RegState::Kill)
+ .addImm(Offset);
+ } else if (Offset >= -4095 && Offset <= 4095) {
+ if (Offset > 0) {
+ // add xN, xN, #offset
+ BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
+ .addDef(Reg)
+ .addUse(Reg, RegState::Kill)
+ .addImm(Offset)
+ .addImm(0);
+ } else {
+ // sub xN, xN, #offset
+ BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
+ .addDef(Reg)
+ .addUse(Reg, RegState::Kill)
+ .addImm(-Offset)
+ .addImm(0);
+ }
+ // ldr xN, [xN]
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
.addDef(Reg)
.addUse(Reg, RegState::Kill)
- .addImm(-Offset)
.addImm(0);
+ } else {
+ // Cases that are larger than +/- 4095 and not a multiple of 8, or
+ // larger than 23760. It might be nice to use AArch64::MOVi32imm here,
+ // which would get expanded in PreSched2 after PostRA, but our lone
+ // scratch Reg already contains the MRS result.
+ // findScratchNonCalleeSaveRegister() in AArch64FrameLowering might help
+ // us find such a scratch register though. If we failed to find a
+ // scratch register, we could emit a stream of add instructions to build
+ // up the immediate. Or, we could try to insert a AArch64::MOVi32imm
+ // before register allocation so that we didn't need to scavenge for a
+ // scratch register.
+ report_fatal_error("Unable to encode Stack Protector Guard Offset");
}
- // ldr xN, [xN]
- BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
- .addDef(Reg)
- .addUse(Reg, RegState::Kill)
- .addImm(0);
- } else {
- // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
- // than 23760.
- // It might be nice to use AArch64::MOVi32imm here, which would get
- // expanded in PreSched2 after PostRA, but our lone scratch Reg already
- // contains the MRS result. findScratchNonCalleeSaveRegister() in
- // AArch64FrameLowering might help us find such a scratch register
- // though. If we failed to find a scratch register, we could emit a
- // stream of add instructions to build up the immediate. Or, we could try
- // to insert a AArch64::MOVi32imm before register allocation so that we
- // didn't need to scavenge for a scratch register.
- report_fatal_error("Unable to encode Stack Protector Guard Offset");
+ MBB.erase(MI);
+ return true;
}
- MBB.erase(MI);
- return true;
- }
- const GlobalValue *GV =
- cast<GlobalValue>((*MI.memoperands_begin())->getValue());
- const TargetMachine &TM = MBB.getParent()->getTarget();
- unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
- const unsigned char MO_NC = AArch64II::MO_NC;
-
- if ((OpFlags & AArch64II::MO_GOT) != 0) {
- BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
- .addGlobalAddress(GV, 0, OpFlags);
- if (Subtarget.isTargetILP32()) {
- unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
- BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
- .addDef(Reg32, RegState::Dead)
- .addUse(Reg, RegState::Kill)
- .addImm(0)
- .addMemOperand(*MI.memoperands_begin())
- .addDef(Reg, RegState::Implicit);
- } else {
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI.memoperands_begin())->getValue());
+ const TargetMachine &TM = MBB.getParent()->getTarget();
+ unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
+ const unsigned char MO_NC = AArch64II::MO_NC;
+
+ if ((OpFlags & AArch64II::MO_GOT) != 0) {
+ BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
+ .addGlobalAddress(GV, 0, OpFlags);
+ if (Subtarget.isTargetILP32()) {
+ unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+ .addDef(Reg32, RegState::Dead)
+ .addUse(Reg, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(*MI.memoperands_begin())
+ .addDef(Reg, RegState::Implicit);
+ } else {
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(*MI.memoperands_begin());
+ }
+ } else if (TM.getCodeModel() == CodeModel::Large) {
+ assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
+ BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
+ .addImm(0);
+ BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
+ .addImm(16);
+ BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
+ .addImm(32);
+ BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G3)
+ .addImm(48);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
.addMemOperand(*MI.memoperands_begin());
- }
- } else if (TM.getCodeModel() == CodeModel::Large) {
- assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
- BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
- .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
- .addImm(0);
- BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
- .addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
- .addImm(16);
- BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
- .addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
- .addImm(32);
- BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
- .addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G3)
- .addImm(48);
- BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(0)
- .addMemOperand(*MI.memoperands_begin());
- } else if (TM.getCodeModel() == CodeModel::Tiny) {
- BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
- .addGlobalAddress(GV, 0, OpFlags);
- } else {
- BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
- .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
- unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
- if (Subtarget.isTargetILP32()) {
- unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
- BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
- .addDef(Reg32, RegState::Dead)
- .addUse(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, LoFlags)
- .addMemOperand(*MI.memoperands_begin())
- .addDef(Reg, RegState::Implicit);
+ } else if (TM.getCodeModel() == CodeModel::Tiny) {
+ BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
+ .addGlobalAddress(GV, 0, OpFlags);
} else {
- BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
- .addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, LoFlags)
- .addMemOperand(*MI.memoperands_begin());
+ BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
+ .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
+ unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
+ if (Subtarget.isTargetILP32()) {
+ unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+ .addDef(Reg32, RegState::Dead)
+ .addUse(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, LoFlags)
+ .addMemOperand(*MI.memoperands_begin())
+ .addDef(Reg, RegState::Implicit);
+ } else {
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, LoFlags)
+ .addMemOperand(*MI.memoperands_begin());
+ }
}
- }
- MBB.erase(MI);
+ MBB.erase(MI);
- return true;
-}
+ return true;
+ }
-// Return true if this instruction simply sets its single destination register
-// to zero. This is equivalent to a register rename of the zero-register.
-bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::MOVZWi:
- case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
- if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
- assert(MI.getDesc().getNumOperands() == 3 &&
- MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
- return true;
+ // Return true if this instruction simply sets its single destination register
+ // to zero. This is equivalent to a register rename of the zero-register.
+ bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::MOVZWi:
+ case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
+ if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
+ assert(MI.getDesc().getNumOperands() == 3 &&
+ MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
+ return true;
+ }
+ break;
+ case AArch64::ANDWri: // and Rd, Rzr, #imm
+ return MI.getOperand(1).getReg() == AArch64::WZR;
+ case AArch64::ANDXri:
+ return MI.getOperand(1).getReg() == AArch64::XZR;
+ case TargetOpcode::COPY:
+ return MI.getOperand(1).getReg() == AArch64::WZR;
}
- break;
- case AArch64::ANDWri: // and Rd, Rzr, #imm
- return MI.getOperand(1).getReg() == AArch64::WZR;
- case AArch64::ANDXri:
- return MI.getOperand(1).getReg() == AArch64::XZR;
- case TargetOpcode::COPY:
- return MI.getOperand(1).getReg() == AArch64::WZR;
+ return false;
}
- return false;
-}
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- break;
- case TargetOpcode::COPY: {
- // GPR32 copies will by lowered to ORRXrs
- Register DstReg = MI.getOperand(0).getReg();
- return (AArch64::GPR32RegClass.contains(DstReg) ||
- AArch64::GPR64RegClass.contains(DstReg));
- }
- case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
- if (MI.getOperand(1).getReg() == AArch64::XZR) {
- assert(MI.getDesc().getNumOperands() == 4 &&
- MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
- return true;
+ // Return true if this instruction simply renames a general register without
+ // modifying bits.
+ bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::COPY: {
+ // GPR32 copies will by lowered to ORRXrs
+ Register DstReg = MI.getOperand(0).getReg();
+ return (AArch64::GPR32RegClass.contains(DstReg) ||
+ AArch64::GPR64RegClass.contains(DstReg));
}
- break;
- case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
- if (MI.getOperand(2).getImm() == 0) {
- assert(MI.getDesc().getNumOperands() == 4 &&
- MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
- return true;
+ case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
+ if (MI.getOperand(1).getReg() == AArch64::XZR) {
+ assert(MI.getDesc().getNumOperands() == 4 &&
+ MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
+ return true;
+ }
+ break;
+ case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
+ if (MI.getOperand(2).getImm() == 0) {
+ assert(MI.getDesc().getNumOperands() == 4 &&
+ MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
+ return true;
+ }
+ break;
}
- break;
+ return false;
}
- return false;
-}
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- break;
- case TargetOpcode::COPY: {
- Register DstReg = MI.getOperand(0).getReg();
- return AArch64::FPR128RegClass.contains(DstReg);
- }
- case AArch64::ORRv16i8:
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
- "invalid ORRv16i8 operands");
- return true;
+ // Return true if this instruction simply renames a general register without
+ // modifying bits.
+ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::COPY: {
+ Register DstReg = MI.getOperand(0).getReg();
+ return AArch64::FPR128RegClass.contains(DstReg);
}
- break;
+ case AArch64::ORRv16i8:
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
+ "invalid ORRv16i8 operands");
+ return true;
+ }
+ break;
+ }
+ return false;
}
- return false;
-}
-Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
- int &FrameIndex) const {
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::LDRWui:
- case AArch64::LDRXui:
- case AArch64::LDRBui:
- case AArch64::LDRHui:
- case AArch64::LDRSui:
- case AArch64::LDRDui:
- case AArch64::LDRQui:
- case AArch64::LDR_PXI:
- if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
- MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
- FrameIndex = MI.getOperand(1).getIndex();
- return MI.getOperand(0).getReg();
+ Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::LDRWui:
+ case AArch64::LDRXui:
+ case AArch64::LDRBui:
+ case AArch64::LDRHui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ case AArch64::LDR_PXI:
+ if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
+ MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+ break;
}
- break;
- }
- return 0;
-}
+ return 0;
+ }
-Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
- int &FrameIndex) const {
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::STRWui:
- case AArch64::STRXui:
- case AArch64::STRBui:
- case AArch64::STRHui:
- case AArch64::STRSui:
- case AArch64::STRDui:
- case AArch64::STRQui:
- case AArch64::STR_PXI:
- if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
- MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
- FrameIndex = MI.getOperand(1).getIndex();
- return MI.getOperand(0).getReg();
+ Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::STRWui:
+ case AArch64::STRXui:
+ case AArch64::STRBui:
+ case AArch64::STRHui:
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ case AArch64::STR_PXI:
+ if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
+ MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+ break;
}
- break;
+ return 0;
}
- return 0;
-}
-/// Check all MachineMemOperands for a hint to suppress pairing.
-bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
- return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
- return MMO->getFlags() & MOSuppressPair;
- });
-}
+ /// Check all MachineMemOperands for a hint to suppress pairing.
+ bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
+ return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+ return MMO->getFlags() & MOSuppressPair;
+ });
+ }
-/// Set a flag on the first MachineMemOperand to suppress pairing.
-void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
- if (MI.memoperands_empty())
- return;
- (*MI.memoperands_begin())->setFlags(MOSuppressPair);
-}
+ /// Set a flag on the first MachineMemOperand to suppress pairing.
+ void AArch64InstrInfo::suppressLdStPair(MachineInstr & MI) {
+ if (MI.memoperands_empty())
+ return;
+ (*MI.memoperands_begin())->setFlags(MOSuppressPair);
+ }
-/// Check all MachineMemOperands for a hint that the load/store is strided.
-bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
- return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
- return MMO->getFlags() & MOStridedAccess;
- });
-}
+ /// Check all MachineMemOperands for a hint that the load/store is strided.
+ bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
+ return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+ return MMO->getFlags() & MOStridedAccess;
+ });
+ }
-bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
- switch (Opc) {
- default:
- return false;
- case AArch64::STURSi:
- case AArch64::STRSpre:
- case AArch64::STURDi:
- case AArch64::STRDpre:
- case AArch64::STURQi:
- case AArch64::STRQpre:
- case AArch64::STURBBi:
- case AArch64::STURHHi:
- case AArch64::STURWi:
- case AArch64::STRWpre:
- case AArch64::STURXi:
- case AArch64::STRXpre:
- case AArch64::LDURSi:
- case AArch64::LDRSpre:
- case AArch64::LDURDi:
- case AArch64::LDRDpre:
- case AArch64::LDURQi:
- case AArch64::LDRQpre:
- case AArch64::LDURWi:
- case AArch64::LDRWpre:
- case AArch64::LDURXi:
- case AArch64::LDRXpre:
- case AArch64::LDRSWpre:
- case AArch64::LDURSWi:
- case AArch64::LDURHHi:
- case AArch64::LDURBBi:
- case AArch64::LDURSBWi:
- case AArch64::LDURSHWi:
- return true;
+ bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::STURSi:
+ case AArch64::STRSpre:
+ case AArch64::STURDi:
+ case AArch64::STRDpre:
+ case AArch64::STURQi:
+ case AArch64::STRQpre:
+ case AArch64::STURBBi:
+ case AArch64::STURHHi:
+ case AArch64::STURWi:
+ case AArch64::STRWpre:
+ case AArch64::STURXi:
+ case AArch64::STRXpre:
+ case AArch64::LDURSi:
+ case AArch64::LDRSpre:
+ case AArch64::LDURDi:
+ case AArch64::LDRDpre:
+ case AArch64::LDURQi:
+ case AArch64::LDRQpre:
+ case AArch64::LDURWi:
+ case AArch64::LDRWpre:
+ case AArch64::LDURXi:
+ case AArch64::LDRXpre:
+ case AArch64::LDRSWpre:
+ case AArch64::LDURSWi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
+ return true;
+ }
}
-}
-std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
- switch (Opc) {
- default: return {};
- case AArch64::PRFMui: return AArch64::PRFUMi;
- case AArch64::LDRXui: return AArch64::LDURXi;
- case AArch64::LDRWui: return AArch64::LDURWi;
- case AArch64::LDRBui: return AArch64::LDURBi;
- case AArch64::LDRHui: return AArch64::LDURHi;
- case AArch64::LDRSui: return AArch64::LDURSi;
- case AArch64::LDRDui: return AArch64::LDURDi;
- case AArch64::LDRQui: return AArch64::LDURQi;
- case AArch64::LDRBBui: return AArch64::LDURBBi;
- case AArch64::LDRHHui: return AArch64::LDURHHi;
- case AArch64::LDRSBXui: return AArch64::LDURSBXi;
- case AArch64::LDRSBWui: return AArch64::LDURSBWi;
- case AArch64::LDRSHXui: return AArch64::LDURSHXi;
- case AArch64::LDRSHWui: return AArch64::LDURSHWi;
- case AArch64::LDRSWui: return AArch64::LDURSWi;
- case AArch64::STRXui: return AArch64::STURXi;
- case AArch64::STRWui: return AArch64::STURWi;
- case AArch64::STRBui: return AArch64::STURBi;
- case AArch64::STRHui: return AArch64::STURHi;
- case AArch64::STRSui: return AArch64::STURSi;
- case AArch64::STRDui: return AArch64::STURDi;
- case AArch64::STRQui: return AArch64::STURQi;
- case AArch64::STRBBui: return AArch64::STURBBi;
- case AArch64::STRHHui: return AArch64::STURHHi;
- }
-}
-
-unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
- case AArch64::ADDG:
- case AArch64::LDAPURBi:
- case AArch64::LDAPURHi:
- case AArch64::LDAPURi:
- case AArch64::LDAPURSBWi:
- case AArch64::LDAPURSBXi:
- case AArch64::LDAPURSHWi:
- case AArch64::LDAPURSHXi:
- case AArch64::LDAPURSWi:
- case AArch64::LDAPURXi:
- case AArch64::LDR_PPXI:
- case AArch64::LDR_PXI:
- case AArch64::LDR_ZXI:
- case AArch64::LDR_ZZXI:
- case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
- case AArch64::LDR_ZZZXI:
- case AArch64::LDR_ZZZZXI:
- case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
- case AArch64::LDRBBui:
- case AArch64::LDRBui:
- case AArch64::LDRDui:
- case AArch64::LDRHHui:
- case AArch64::LDRHui:
- case AArch64::LDRQui:
- case AArch64::LDRSBWui:
- case AArch64::LDRSBXui:
- case AArch64::LDRSHWui:
- case AArch64::LDRSHXui:
- case AArch64::LDRSui:
- case AArch64::LDRSWui:
- case AArch64::LDRWui:
- case AArch64::LDRXui:
- case AArch64::LDURBBi:
- case AArch64::LDURBi:
- case AArch64::LDURDi:
- case AArch64::LDURHHi:
- case AArch64::LDURHi:
- case AArch64::LDURQi:
- case AArch64::LDURSBWi:
- case AArch64::LDURSBXi:
- case AArch64::LDURSHWi:
- case AArch64::LDURSHXi:
- case AArch64::LDURSi:
- case AArch64::LDURSWi:
- case AArch64::LDURWi:
- case AArch64::LDURXi:
- case AArch64::PRFMui:
- case AArch64::PRFUMi:
- case AArch64::ST2Gi:
- case AArch64::STGi:
- case AArch64::STLURBi:
- case AArch64::STLURHi:
- case AArch64::STLURWi:
- case AArch64::STLURXi:
- case AArch64::StoreSwiftAsyncContext:
- case AArch64::STR_PPXI:
- case AArch64::STR_PXI:
- case AArch64::STR_ZXI:
- case AArch64::STR_ZZXI:
- case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
- case AArch64::STR_ZZZXI:
- case AArch64::STR_ZZZZXI:
- case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
- case AArch64::STRBBui:
- case AArch64::STRBui:
- case AArch64::STRDui:
- case AArch64::STRHHui:
- case AArch64::STRHui:
- case AArch64::STRQui:
- case AArch64::STRSui:
- case AArch64::STRWui:
- case AArch64::STRXui:
- case AArch64::STURBBi:
- case AArch64::STURBi:
- case AArch64::STURDi:
- case AArch64::STURHHi:
- case AArch64::STURHi:
- case AArch64::STURQi:
- case AArch64::STURSi:
- case AArch64::STURWi:
- case AArch64::STURXi:
- case AArch64::STZ2Gi:
- case AArch64::STZGi:
- case AArch64::TAGPstack:
- case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
- case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
- return 2;
- case AArch64::LD1B_D_IMM:
- case AArch64::LD1B_H_IMM:
- case AArch64::LD1B_IMM:
- case AArch64::LD1B_S_IMM:
- case AArch64::LD1D_IMM:
- case AArch64::LD1H_D_IMM:
- case AArch64::LD1H_IMM:
- case AArch64::LD1H_S_IMM:
- case AArch64::LD1RB_D_IMM:
- case AArch64::LD1RB_H_IMM:
- case AArch64::LD1RB_IMM:
- case AArch64::LD1RB_S_IMM:
- case AArch64::LD1RD_IMM:
- case AArch64::LD1RH_D_IMM:
- case AArch64::LD1RH_IMM:
- case AArch64::LD1RH_S_IMM:
- case AArch64::LD1RSB_D_IMM:
- case AArch64::LD1RSB_H_IMM:
- case AArch64::LD1RSB_S_IMM:
- case AArch64::LD1RSH_D_IMM:
- case AArch64::LD1RSH_S_IMM:
- case AArch64::LD1RSW_IMM:
- case AArch64::LD1RW_D_IMM:
- case AArch64::LD1RW_IMM:
- case AArch64::LD1SB_D_IMM:
- case AArch64::LD1SB_H_IMM:
- case AArch64::LD1SB_S_IMM:
- case AArch64::LD1SH_D_IMM:
- case AArch64::LD1SH_S_IMM:
- case AArch64::LD1SW_D_IMM:
- case AArch64::LD1W_D_IMM:
- case AArch64::LD1W_IMM:
- case AArch64::LD2B_IMM:
- case AArch64::LD2D_IMM:
- case AArch64::LD2H_IMM:
- case AArch64::LD2W_IMM:
- case AArch64::LD3B_IMM:
- case AArch64::LD3D_IMM:
- case AArch64::LD3H_IMM:
- case AArch64::LD3W_IMM:
- case AArch64::LD4B_IMM:
- case AArch64::LD4D_IMM:
- case AArch64::LD4H_IMM:
- case AArch64::LD4W_IMM:
- case AArch64::LDG:
- case AArch64::LDNF1B_D_IMM:
- case AArch64::LDNF1B_H_IMM:
- case AArch64::LDNF1B_IMM:
- case AArch64::LDNF1B_S_IMM:
- case AArch64::LDNF1D_IMM:
- case AArch64::LDNF1H_D_IMM:
- case AArch64::LDNF1H_IMM:
- case AArch64::LDNF1H_S_IMM:
- case AArch64::LDNF1SB_D_IMM:
- case AArch64::LDNF1SB_H_IMM:
- case AArch64::LDNF1SB_S_IMM:
- case AArch64::LDNF1SH_D_IMM:
- case AArch64::LDNF1SH_S_IMM:
- case AArch64::LDNF1SW_D_IMM:
- case AArch64::LDNF1W_D_IMM:
- case AArch64::LDNF1W_IMM:
- case AArch64::LDNPDi:
- case AArch64::LDNPQi:
- case AArch64::LDNPSi:
- case AArch64::LDNPWi:
- case AArch64::LDNPXi:
- case AArch64::LDNT1B_ZRI:
- case AArch64::LDNT1D_ZRI:
- case AArch64::LDNT1H_ZRI:
- case AArch64::LDNT1W_ZRI:
- case AArch64::LDPDi:
- case AArch64::LDPQi:
- case AArch64::LDPSi:
- case AArch64::LDPWi:
- case AArch64::LDPXi:
- case AArch64::LDRBBpost:
- case AArch64::LDRBBpre:
- case AArch64::LDRBpost:
- case AArch64::LDRBpre:
- case AArch64::LDRDpost:
- case AArch64::LDRDpre:
- case AArch64::LDRHHpost:
- case AArch64::LDRHHpre:
- case AArch64::LDRHpost:
- case AArch64::LDRHpre:
- case AArch64::LDRQpost:
- case AArch64::LDRQpre:
- case AArch64::LDRSpost:
- case AArch64::LDRSpre:
- case AArch64::LDRWpost:
- case AArch64::LDRWpre:
- case AArch64::LDRXpost:
- case AArch64::LDRXpre:
- case AArch64::ST1B_D_IMM:
- case AArch64::ST1B_H_IMM:
- case AArch64::ST1B_IMM:
- case AArch64::ST1B_S_IMM:
- case AArch64::ST1D_IMM:
- case AArch64::ST1H_D_IMM:
- case AArch64::ST1H_IMM:
- case AArch64::ST1H_S_IMM:
- case AArch64::ST1W_D_IMM:
- case AArch64::ST1W_IMM:
- case AArch64::ST2B_IMM:
- case AArch64::ST2D_IMM:
- case AArch64::ST2H_IMM:
- case AArch64::ST2W_IMM:
- case AArch64::ST3B_IMM:
- case AArch64::ST3D_IMM:
- case AArch64::ST3H_IMM:
- case AArch64::ST3W_IMM:
- case AArch64::ST4B_IMM:
- case AArch64::ST4D_IMM:
- case AArch64::ST4H_IMM:
- case AArch64::ST4W_IMM:
- case AArch64::STGPi:
- case AArch64::STGPreIndex:
- case AArch64::STZGPreIndex:
- case AArch64::ST2GPreIndex:
- case AArch64::STZ2GPreIndex:
- case AArch64::STGPostIndex:
- case AArch64::STZGPostIndex:
- case AArch64::ST2GPostIndex:
- case AArch64::STZ2GPostIndex:
- case AArch64::STNPDi:
- case AArch64::STNPQi:
- case AArch64::STNPSi:
- case AArch64::STNPWi:
- case AArch64::STNPXi:
- case AArch64::STNT1B_ZRI:
- case AArch64::STNT1D_ZRI:
- case AArch64::STNT1H_ZRI:
- case AArch64::STNT1W_ZRI:
- case AArch64::STPDi:
- case AArch64::STPQi:
- case AArch64::STPSi:
- case AArch64::STPWi:
- case AArch64::STPXi:
- case AArch64::STRBBpost:
- case AArch64::STRBBpre:
- case AArch64::STRBpost:
- case AArch64::STRBpre:
- case AArch64::STRDpost:
- case AArch64::STRDpre:
- case AArch64::STRHHpost:
- case AArch64::STRHHpre:
- case AArch64::STRHpost:
- case AArch64::STRHpre:
- case AArch64::STRQpost:
- case AArch64::STRQpre:
- case AArch64::STRSpost:
- case AArch64::STRSpre:
- case AArch64::STRWpost:
- case AArch64::STRWpre:
- case AArch64::STRXpost:
- case AArch64::STRXpre:
- return 3;
- case AArch64::LDPDpost:
- case AArch64::LDPDpre:
- case AArch64::LDPQpost:
- case AArch64::LDPQpre:
- case AArch64::LDPSpost:
- case AArch64::LDPSpre:
- case AArch64::LDPWpost:
- case AArch64::LDPWpre:
- case AArch64::LDPXpost:
- case AArch64::LDPXpre:
- case AArch64::STGPpre:
- case AArch64::STGPpost:
- case AArch64::STPDpost:
- case AArch64::STPDpre:
- case AArch64::STPQpost:
- case AArch64::STPQpre:
- case AArch64::STPSpost:
- case AArch64::STPSpre:
- case AArch64::STPWpost:
- case AArch64::STPWpre:
- case AArch64::STPXpost:
- case AArch64::STPXpre:
- return 4;
- }
-}
-
-bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- return false;
- // Scaled instructions.
- case AArch64::STRSui:
- case AArch64::STRDui:
- case AArch64::STRQui:
- case AArch64::STRXui:
- case AArch64::STRWui:
- case AArch64::LDRSui:
- case AArch64::LDRDui:
- case AArch64::LDRQui:
- case AArch64::LDRXui:
- case AArch64::LDRWui:
- case AArch64::LDRSWui:
- // Unscaled instructions.
- case AArch64::STURSi:
- case AArch64::STRSpre:
- case AArch64::STURDi:
- case AArch64::STRDpre:
- case AArch64::STURQi:
- case AArch64::STRQpre:
- case AArch64::STURWi:
- case AArch64::STRWpre:
- case AArch64::STURXi:
- case AArch64::STRXpre:
- case AArch64::LDURSi:
- case AArch64::LDRSpre:
- case AArch64::LDURDi:
- case AArch64::LDRDpre:
- case AArch64::LDURQi:
- case AArch64::LDRQpre:
- case AArch64::LDURWi:
- case AArch64::LDRWpre:
- case AArch64::LDURXi:
- case AArch64::LDRXpre:
- case AArch64::LDURSWi:
- case AArch64::LDRSWpre:
- // SVE instructions.
- case AArch64::LDR_ZXI:
- case AArch64::STR_ZXI:
- return true;
+ std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return {};
+ case AArch64::PRFMui:
+ return AArch64::PRFUMi;
+ case AArch64::LDRXui:
+ return AArch64::LDURXi;
+ case AArch64::LDRWui:
+ return AArch64::LDURWi;
+ case AArch64::LDRBui:
+ return AArch64::LDURBi;
+ case AArch64::LDRHui:
+ return AArch64::LDURHi;
+ case AArch64::LDRSui:
+ return AArch64::LDURSi;
+ case AArch64::LDRDui:
+ return AArch64::LDURDi;
+ case AArch64::LDRQui:
+ return AArch64::LDURQi;
+ case AArch64::LDRBBui:
+ return AArch64::LDURBBi;
+ case AArch64::LDRHHui:
+ return AArch64::LDURHHi;
+ case AArch64::LDRSBXui:
+ return AArch64::LDURSBXi;
+ case AArch64::LDRSBWui:
+ return AArch64::LDURSBWi;
+ case AArch64::LDRSHXui:
+ return AArch64::LDURSHXi;
+ case AArch64::LDRSHWui:
+ return AArch64::LDURSHWi;
+ case AArch64::LDRSWui:
+ return AArch64::LDURSWi;
+ case AArch64::STRXui:
+ return AArch64::STURXi;
+ case AArch64::STRWui:
+ return AArch64::STURWi;
+ case AArch64::STRBui:
+ return AArch64::STURBi;
+ case AArch64::STRHui:
+ return AArch64::STURHi;
+ case AArch64::STRSui:
+ return AArch64::STURSi;
+ case AArch64::STRDui:
+ return AArch64::STURDi;
+ case AArch64::STRQui:
+ return AArch64::STURQi;
+ case AArch64::STRBBui:
+ return AArch64::STURBBi;
+ case AArch64::STRHHui:
+ return AArch64::STURHHi;
+ }
}
-}
-bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- assert((!MI.isCall() || !MI.isReturn()) &&
- "Unexpected instruction - was a new tail call opcode introduced?");
- return false;
- case AArch64::TCRETURNdi:
- case AArch64::TCRETURNri:
- case AArch64::TCRETURNrix16x17:
- case AArch64::TCRETURNrix17:
- case AArch64::TCRETURNrinotx16:
- case AArch64::TCRETURNriALL:
- case AArch64::AUTH_TCRETURN:
- case AArch64::AUTH_TCRETURN_BTI:
- return true;
+ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
+ case AArch64::ADDG:
+ case AArch64::LDAPURBi:
+ case AArch64::LDAPURHi:
+ case AArch64::LDAPURi:
+ case AArch64::LDAPURSBWi:
+ case AArch64::LDAPURSBXi:
+ case AArch64::LDAPURSHWi:
+ case AArch64::LDAPURSHXi:
+ case AArch64::LDAPURSWi:
+ case AArch64::LDAPURXi:
+ case AArch64::LDR_PPXI:
+ case AArch64::LDR_PXI:
+ case AArch64::LDR_ZXI:
+ case AArch64::LDR_ZZXI:
+ case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
+ case AArch64::LDR_ZZZXI:
+ case AArch64::LDR_ZZZZXI:
+ case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
+ case AArch64::LDRBBui:
+ case AArch64::LDRBui:
+ case AArch64::LDRDui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRHui:
+ case AArch64::LDRQui:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSui:
+ case AArch64::LDRSWui:
+ case AArch64::LDRWui:
+ case AArch64::LDRXui:
+ case AArch64::LDURBBi:
+ case AArch64::LDURBi:
+ case AArch64::LDURDi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURHi:
+ case AArch64::LDURQi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSHWi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSi:
+ case AArch64::LDURSWi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi:
+ case AArch64::PRFMui:
+ case AArch64::PRFUMi:
+ case AArch64::ST2Gi:
+ case AArch64::STGi:
+ case AArch64::STLURBi:
+ case AArch64::STLURHi:
+ case AArch64::STLURWi:
+ case AArch64::STLURXi:
+ case AArch64::StoreSwiftAsyncContext:
+ case AArch64::STR_PPXI:
+ case AArch64::STR_PXI:
+ case AArch64::STR_ZXI:
+ case AArch64::STR_ZZXI:
+ case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
+ case AArch64::STR_ZZZXI:
+ case AArch64::STR_ZZZZXI:
+ case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
+ case AArch64::STRBBui:
+ case AArch64::STRBui:
+ case AArch64::STRDui:
+ case AArch64::STRHHui:
+ case AArch64::STRHui:
+ case AArch64::STRQui:
+ case AArch64::STRSui:
+ case AArch64::STRWui:
+ case AArch64::STRXui:
+ case AArch64::STURBBi:
+ case AArch64::STURBi:
+ case AArch64::STURDi:
+ case AArch64::STURHHi:
+ case AArch64::STURHi:
+ case AArch64::STURQi:
+ case AArch64::STURSi:
+ case AArch64::STURWi:
+ case AArch64::STURXi:
+ case AArch64::STZ2Gi:
+ case AArch64::STZGi:
+ case AArch64::TAGPstack:
+ case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+ case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+ return 2;
+ case AArch64::LD1B_D_IMM:
+ case AArch64::LD1B_H_IMM:
+ case AArch64::LD1B_IMM:
+ case AArch64::LD1B_S_IMM:
+ case AArch64::LD1D_IMM:
+ case AArch64::LD1H_D_IMM:
+ case AArch64::LD1H_IMM:
+ case AArch64::LD1H_S_IMM:
+ case AArch64::LD1RB_D_IMM:
+ case AArch64::LD1RB_H_IMM:
+ case AArch64::LD1RB_IMM:
+ case AArch64::LD1RB_S_IMM:
+ case AArch64::LD1RD_IMM:
+ case AArch64::LD1RH_D_IMM:
+ case AArch64::LD1RH_IMM:
+ case AArch64::LD1RH_S_IMM:
+ case AArch64::LD1RSB_D_IMM:
+ case AArch64::LD1RSB_H_IMM:
+ case AArch64::LD1RSB_S_IMM:
+ case AArch64::LD1RSH_D_IMM:
+ case AArch64::LD1RSH_S_IMM:
+ case AArch64::LD1RSW_IMM:
+ case AArch64::LD1RW_D_IMM:
+ case AArch64::LD1RW_IMM:
+ case AArch64::LD1SB_D_IMM:
+ case AArch64::LD1SB_H_IMM:
+ case AArch64::LD1SB_S_IMM:
+ case AArch64::LD1SH_D_IMM:
+ case AArch64::LD1SH_S_IMM:
+ case AArch64::LD1SW_D_IMM:
+ case AArch64::LD1W_D_IMM:
+ case AArch64::LD1W_IMM:
+ case AArch64::LD2B_IMM:
+ case AArch64::LD2D_IMM:
+ case AArch64::LD2H_IMM:
+ case AArch64::LD2W_IMM:
+ case AArch64::LD3B_IMM:
+ case AArch64::LD3D_IMM:
+ case AArch64::LD3H_IMM:
+ case AArch64::LD3W_IMM:
+ case AArch64::LD4B_IMM:
+ case AArch64::LD4D_IMM:
+ case AArch64::LD4H_IMM:
+ case AArch64::LD4W_IMM:
+ case AArch64::LDG:
+ case AArch64::LDNF1B_D_IMM:
+ case AArch64::LDNF1B_H_IMM:
+ case AArch64::LDNF1B_IMM:
+ case AArch64::LDNF1B_S_IMM:
+ case AArch64::LDNF1D_IMM:
+ case AArch64::LDNF1H_D_IMM:
+ case AArch64::LDNF1H_IMM:
+ case AArch64::LDNF1H_S_IMM:
+ case AArch64::LDNF1SB_D_IMM:
+ case AArch64::LDNF1SB_H_IMM:
+ case AArch64::LDNF1SB_S_IMM:
+ case AArch64::LDNF1SH_D_IMM:
+ case AArch64::LDNF1SH_S_IMM:
+ case AArch64::LDNF1SW_D_IMM:
+ case AArch64::LDNF1W_D_IMM:
+ case AArch64::LDNF1W_IMM:
+ case AArch64::LDNPDi:
+ case AArch64::LDNPQi:
+ case AArch64::LDNPSi:
+ case AArch64::LDNPWi:
+ case AArch64::LDNPXi:
+ case AArch64::LDNT1B_ZRI:
+ case AArch64::LDNT1D_ZRI:
+ case AArch64::LDNT1H_ZRI:
+ case AArch64::LDNT1W_ZRI:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPSi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::LDRBBpost:
+ case AArch64::LDRBBpre:
+ case AArch64::LDRBpost:
+ case AArch64::LDRBpre:
+ case AArch64::LDRDpost:
+ case AArch64::LDRDpre:
+ case AArch64::LDRHHpost:
+ case AArch64::LDRHHpre:
+ case AArch64::LDRHpost:
+ case AArch64::LDRHpre:
+ case AArch64::LDRQpost:
+ case AArch64::LDRQpre:
+ case AArch64::LDRSpost:
+ case AArch64::LDRSpre:
+ case AArch64::LDRWpost:
+ case AArch64::LDRWpre:
+ case AArch64::LDRXpost:
+ case AArch64::LDRXpre:
+ case AArch64::ST1B_D_IMM:
+ case AArch64::ST1B_H_IMM:
+ case AArch64::ST1B_IMM:
+ case AArch64::ST1B_S_IMM:
+ case AArch64::ST1D_IMM:
+ case AArch64::ST1H_D_IMM:
+ case AArch64::ST1H_IMM:
+ case AArch64::ST1H_S_IMM:
+ case AArch64::ST1W_D_IMM:
+ case AArch64::ST1W_IMM:
+ case AArch64::ST2B_IMM:
+ case AArch64::ST2D_IMM:
+ case AArch64::ST2H_IMM:
+ case AArch64::ST2W_IMM:
+ case AArch64::ST3B_IMM:
+ case AArch64::ST3D_IMM:
+ case AArch64::ST3H_IMM:
+ case AArch64::ST3W_IMM:
+ case AArch64::ST4B_IMM:
+ case AArch64::ST4D_IMM:
+ case AArch64::ST4H_IMM:
+ case AArch64::ST4W_IMM:
+ case AArch64::STGPi:
+ case AArch64::STGPreIndex:
+ case AArch64::STZGPreIndex:
+ case AArch64::ST2GPreIndex:
+ case AArch64::STZ2GPreIndex:
+ case AArch64::STGPostIndex:
+ case AArch64::STZGPostIndex:
+ case AArch64::ST2GPostIndex:
+ case AArch64::STZ2GPostIndex:
+ case AArch64::STNPDi:
+ case AArch64::STNPQi:
+ case AArch64::STNPSi:
+ case AArch64::STNPWi:
+ case AArch64::STNPXi:
+ case AArch64::STNT1B_ZRI:
+ case AArch64::STNT1D_ZRI:
+ case AArch64::STNT1H_ZRI:
+ case AArch64::STNT1W_ZRI:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
+ case AArch64::STPSi:
+ case AArch64::STPWi:
+ case AArch64::STPXi:
+ case AArch64::STRBBpost:
+ case AArch64::STRBBpre:
+ case AArch64::STRBpost:
+ case AArch64::STRBpre:
+ case AArch64::STRDpost:
+ case AArch64::STRDpre:
+ case AArch64::STRHHpost:
+ case AArch64::STRHHpre:
+ case AArch64::STRHpost:
+ case AArch64::STRHpre:
+ case AArch64::STRQpost:
+ case AArch64::STRQpre:
+ case AArch64::STRSpost:
+ case AArch64::STRSpre:
+ case AArch64::STRWpost:
+ case AArch64::STRWpre:
+ case AArch64::STRXpost:
+ case AArch64::STRXpre:
+ return 3;
+ case AArch64::LDPDpost:
+ case AArch64::LDPDpre:
+ case AArch64::LDPQpost:
+ case AArch64::LDPQpre:
+ case AArch64::LDPSpost:
+ case AArch64::LDPSpre:
+ case AArch64::LDPWpost:
+ case AArch64::LDPWpre:
+ case AArch64::LDPXpost:
+ case AArch64::LDPXpre:
+ case AArch64::STGPpre:
+ case AArch64::STGPpost:
+ case AArch64::STPDpost:
+ case AArch64::STPDpre:
+ case AArch64::STPQpost:
+ case AArch64::STPQpre:
+ case AArch64::STPSpost:
+ case AArch64::STPSpre:
+ case AArch64::STPWpost:
+ case AArch64::STPWpre:
+ case AArch64::STPXpost:
+ case AArch64::STPXpre:
+ return 4;
+ }
}
-}
-unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
- switch (Opc) {
- default:
- llvm_unreachable("Opcode has no flag setting equivalent!");
- // 32-bit cases:
- case AArch64::ADDWri:
- return AArch64::ADDSWri;
- case AArch64::ADDWrr:
- return AArch64::ADDSWrr;
- case AArch64::ADDWrs:
- return AArch64::ADDSWrs;
- case AArch64::ADDWrx:
- return AArch64::ADDSWrx;
- case AArch64::ANDWri:
- return AArch64::ANDSWri;
- case AArch64::ANDWrr:
- return AArch64::ANDSWrr;
- case AArch64::ANDWrs:
- return AArch64::ANDSWrs;
- case AArch64::BICWrr:
- return AArch64::BICSWrr;
- case AArch64::BICWrs:
- return AArch64::BICSWrs;
- case AArch64::SUBWri:
- return AArch64::SUBSWri;
- case AArch64::SUBWrr:
- return AArch64::SUBSWrr;
- case AArch64::SUBWrs:
- return AArch64::SUBSWrs;
- case AArch64::SUBWrx:
- return AArch64::SUBSWrx;
- // 64-bit cases:
- case AArch64::ADDXri:
- return AArch64::ADDSXri;
- case AArch64::ADDXrr:
- return AArch64::ADDSXrr;
- case AArch64::ADDXrs:
- return AArch64::ADDSXrs;
- case AArch64::ADDXrx:
- return AArch64::ADDSXrx;
- case AArch64::ANDXri:
- return AArch64::ANDSXri;
- case AArch64::ANDXrr:
- return AArch64::ANDSXrr;
- case AArch64::ANDXrs:
- return AArch64::ANDSXrs;
- case AArch64::BICXrr:
- return AArch64::BICSXrr;
- case AArch64::BICXrs:
- return AArch64::BICSXrs;
- case AArch64::SUBXri:
- return AArch64::SUBSXri;
- case AArch64::SUBXrr:
- return AArch64::SUBSXrr;
- case AArch64::SUBXrs:
- return AArch64::SUBSXrs;
- case AArch64::SUBXrx:
- return AArch64::SUBSXrx;
- // SVE instructions:
- case AArch64::AND_PPzPP:
- return AArch64::ANDS_PPzPP;
- case AArch64::BIC_PPzPP:
- return AArch64::BICS_PPzPP;
- case AArch64::EOR_PPzPP:
- return AArch64::EORS_PPzPP;
- case AArch64::NAND_PPzPP:
- return AArch64::NANDS_PPzPP;
- case AArch64::NOR_PPzPP:
- return AArch64::NORS_PPzPP;
- case AArch64::ORN_PPzPP:
- return AArch64::ORNS_PPzPP;
- case AArch64::ORR_PPzPP:
- return AArch64::ORRS_PPzPP;
- case AArch64::BRKA_PPzP:
- return AArch64::BRKAS_PPzP;
- case AArch64::BRKPA_PPzPP:
- return AArch64::BRKPAS_PPzPP;
- case AArch64::BRKB_PPzP:
- return AArch64::BRKBS_PPzP;
- case AArch64::BRKPB_PPzPP:
- return AArch64::BRKPBS_PPzPP;
- case AArch64::BRKN_PPzP:
- return AArch64::BRKNS_PPzP;
- case AArch64::RDFFR_PPz:
- return AArch64::RDFFRS_PPz;
- case AArch64::PTRUE_B:
- return AArch64::PTRUES_B;
+ bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ // Scaled instructions.
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ case AArch64::STRXui:
+ case AArch64::STRWui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ case AArch64::LDRXui:
+ case AArch64::LDRWui:
+ case AArch64::LDRSWui:
+ // Unscaled instructions.
+ case AArch64::STURSi:
+ case AArch64::STRSpre:
+ case AArch64::STURDi:
+ case AArch64::STRDpre:
+ case AArch64::STURQi:
+ case AArch64::STRQpre:
+ case AArch64::STURWi:
+ case AArch64::STRWpre:
+ case AArch64::STURXi:
+ case AArch64::STRXpre:
+ case AArch64::LDURSi:
+ case AArch64::LDRSpre:
+ case AArch64::LDURDi:
+ case AArch64::LDRDpre:
+ case AArch64::LDURQi:
+ case AArch64::LDRQpre:
+ case AArch64::LDURWi:
+ case AArch64::LDRWpre:
+ case AArch64::LDURXi:
+ case AArch64::LDRXpre:
+ case AArch64::LDURSWi:
+ case AArch64::LDRSWpre:
+ // SVE instructions.
+ case AArch64::LDR_ZXI:
+ case AArch64::STR_ZXI:
+ return true;
+ }
}
-}
-// Is this a candidate for ld/st merging or pairing? For example, we don't
-// touch volatiles or load/stores that have a hint to avoid pair formation.
-bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
+ bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ assert((!MI.isCall() || !MI.isReturn()) &&
+ "Unexpected instruction - was a new tail call opcode introduced?");
+ return false;
+ case AArch64::TCRETURNdi:
+ case AArch64::TCRETURNri:
+ case AArch64::TCRETURNrix16x17:
+ case AArch64::TCRETURNrix17:
+ case AArch64::TCRETURNrinotx16:
+ case AArch64::TCRETURNriALL:
+ case AArch64::AUTH_TCRETURN:
+ case AArch64::AUTH_TCRETURN_BTI:
+ return true;
+ }
+ }
- bool IsPreLdSt = isPreLdSt(MI);
+ unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Opcode has no flag setting equivalent!");
+ // 32-bit cases:
+ case AArch64::ADDWri:
+ return AArch64::ADDSWri;
+ case AArch64::ADDWrr:
+ return AArch64::ADDSWrr;
+ case AArch64::ADDWrs:
+ return AArch64::ADDSWrs;
+ case AArch64::ADDWrx:
+ return AArch64::ADDSWrx;
+ case AArch64::ANDWri:
+ return AArch64::ANDSWri;
+ case AArch64::ANDWrr:
+ return AArch64::ANDSWrr;
+ case AArch64::ANDWrs:
+ return AArch64::ANDSWrs;
+ case AArch64::BICWrr:
+ return AArch64::BICSWrr;
+ case AArch64::BICWrs:
+ return AArch64::BICSWrs;
+ case AArch64::SUBWri:
+ return AArch64::SUBSWri;
+ case AArch64::SUBWrr:
+ return AArch64::SUBSWrr;
+ case AArch64::SUBWrs:
+ return AArch64::SUBSWrs;
+ case AArch64::SUBWrx:
+ return AArch64::SUBSWrx;
+ // 64-bit cases:
+ case AArch64::ADDXri:
+ return AArch64::ADDSXri;
+ case AArch64::ADDXrr:
+ return AArch64::ADDSXrr;
+ case AArch64::ADDXrs:
+ return AArch64::ADDSXrs;
+ case AArch64::ADDXrx:
+ return AArch64::ADDSXrx;
+ case AArch64::ANDXri:
+ return AArch64::ANDSXri;
+ case AArch64::ANDXrr:
+ return AArch64::ANDSXrr;
+ case AArch64::ANDXrs:
+ return AArch64::ANDSXrs;
+ case AArch64::BICXrr:
+ return AArch64::BICSXrr;
+ case AArch64::BICXrs:
+ return AArch64::BICSXrs;
+ case AArch64::SUBXri:
+ return AArch64::SUBSXri;
+ case AArch64::SUBXrr:
+ return AArch64::SUBSXrr;
+ case AArch64::SUBXrs:
+ return AArch64::SUBSXrs;
+ case AArch64::SUBXrx:
+ return AArch64::SUBSXrx;
+ // SVE instructions:
+ case AArch64::AND_PPzPP:
+ return AArch64::ANDS_PPzPP;
+ case AArch64::BIC_PPzPP:
+ return AArch64::BICS_PPzPP;
+ case AArch64::EOR_PPzPP:
+ return AArch64::EORS_PPzPP;
+ case AArch64::NAND_PPzPP:
+ return AArch64::NANDS_PPzPP;
+ case AArch64::NOR_PPzPP:
+ return AArch64::NORS_PPzPP;
+ case AArch64::ORN_PPzPP:
+ return AArch64::ORNS_PPzPP;
+ case AArch64::ORR_PPzPP:
+ return AArch64::ORRS_PPzPP;
+ case AArch64::BRKA_PPzP:
+ return AArch64::BRKAS_PPzP;
+ case AArch64::BRKPA_PPzPP:
+ return AArch64::BRKPAS_PPzPP;
+ case AArch64::BRKB_PPzP:
+ return AArch64::BRKBS_PPzP;
+ case AArch64::BRKPB_PPzPP:
+ return AArch64::BRKPBS_PPzPP;
+ case AArch64::BRKN_PPzP:
+ return AArch64::BRKNS_PPzP;
+ case AArch64::RDFFR_PPz:
+ return AArch64::RDFFRS_PPz;
+ case AArch64::PTRUE_B:
+ return AArch64::PTRUES_B;
+ }
+ }
- // If this is a volatile load/store, don't mess with it.
- if (MI.hasOrderedMemoryRef())
- return false;
+ // Is this a candidate for ld/st merging or pairing? For example, we don't
+ // touch volatiles or load/stores that have a hint to avoid pair formation.
+ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI)
+ const {
+
+ bool IsPreLdSt = isPreLdSt(MI);
- // Make sure this is a reg/fi+imm (as opposed to an address reloc).
- // For Pre-inc LD/ST, the operand is shifted by one.
- assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
- MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
- "Expected a reg or frame index operand.");
+ // If this is a volatile load/store, don't mess with it.
+ if (MI.hasOrderedMemoryRef())
+ return false;
- // For Pre-indexed addressing quadword instructions, the third operand is the
- // immediate value.
- bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
+ // Make sure this is a reg/fi+imm (as opposed to an address reloc).
+ // For Pre-inc LD/ST, the operand is shifted by one.
+ assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
+ MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
+ "Expected a reg or frame index operand.");
- if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
- return false;
+ // For Pre-indexed addressing quadword instructions, the third operand is
+ // the immediate value.
+ bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
- // Can't merge/pair if the instruction modifies the base register.
- // e.g., ldr x0, [x0]
- // This case will never occur with an FI base.
- // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
- // STR<S,D,Q,W,X>pre, it can be merged.
- // For example:
- // ldr q0, [x11, #32]!
- // ldr q1, [x11, #16]
- // to
- // ldp q0, q1, [x11, #32]!
- if (MI.getOperand(1).isReg() && !IsPreLdSt) {
- Register BaseReg = MI.getOperand(1).getReg();
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- if (MI.modifiesRegister(BaseReg, TRI))
+ if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
+ return false;
+
+ // Can't merge/pair if the instruction modifies the base register.
+ // e.g., ldr x0, [x0]
+ // This case will never occur with an FI base.
+ // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
+ // STR<S,D,Q,W,X>pre, it can be merged.
+ // For example:
+ // ldr q0, [x11, #32]!
+ // ldr q1, [x11, #16]
+ // to
+ // ldp q0, q1, [x11, #32]!
+ if (MI.getOperand(1).isReg() && !IsPreLdSt) {
+ Register BaseReg = MI.getOperand(1).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (MI.modifiesRegister(BaseReg, TRI))
+ return false;
+ }
+
+ // Pairing SVE fills/spills is only valid for little-endian targets that
+ // implement VLS 128.
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::LDR_ZXI:
+ case AArch64::STR_ZXI:
+ if (!Subtarget.isLittleEndian() ||
+ Subtarget.getSVEVectorSizeInBits() != 128)
+ return false;
+ }
+
+ // Check if this load/store has a hint to avoid pair formation.
+ // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
+ if (isLdStPairSuppressed(MI))
return false;
+
+ // Do not pair any callee-save store/reload instructions in the
+ // prologue/epilogue if the CFI information encoded the operations as
+ // separate instructions, as that will cause the size of the actual prologue
+ // to mismatch with the prologue size recorded in the Windows CFI.
+ const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
+ bool NeedsWinCFI = MAI->usesWindowsCFI() &&
+ MI.getMF()->getFunction().needsUnwindTableEntry();
+ if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
+ MI.getFlag(MachineInstr::FrameDestroy)))
+ return false;
+
+ // On some CPUs quad load/store pairs are slower than two single
+ // load/stores.
+ if (Subtarget.isPaired128Slow()) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ return false;
+ }
+ }
+
+ return true;
}
- // Pairing SVE fills/spills is only valid for little-endian targets that
- // implement VLS 128.
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::LDR_ZXI:
- case AArch64::STR_ZXI:
- if (!Subtarget.isLittleEndian() ||
- Subtarget.getSVEVectorSizeInBits() != 128)
+ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, LocationSize &Width,
+ const TargetRegisterInfo *TRI) const {
+ if (!LdSt.mayLoadOrStore())
+ return false;
+
+ const MachineOperand *BaseOp;
+ TypeSize WidthN(0, false);
+ if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
+ WidthN, TRI))
return false;
+ // The maximum vscale is 16 under AArch64, return the maximal extent for the
+ // vector.
+ Width = LocationSize::precise(WidthN);
+ BaseOps.push_back(BaseOp);
+ return true;
}
- // Check if this load/store has a hint to avoid pair formation.
- // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
- if (isLdStPairSuppressed(MI))
- return false;
+ std::optional<ExtAddrMode> AArch64InstrInfo::getAddrModeFromMemoryOp(
+ const MachineInstr &MemI, const TargetRegisterInfo *TRI) const {
+ const MachineOperand *Base; // Filled with the base operand of MI.
+ int64_t Offset; // Filled with the offset of MI.
+ bool OffsetIsScalable;
+ if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
+ return std::nullopt;
- // Do not pair any callee-save store/reload instructions in the
- // prologue/epilogue if the CFI information encoded the operations as separate
- // instructions, as that will cause the size of the actual prologue to mismatch
- // with the prologue size recorded in the Windows CFI.
- const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
- bool NeedsWinCFI = MAI->usesWindowsCFI() &&
- MI.getMF()->getFunction().needsUnwindTableEntry();
- if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
- MI.getFlag(MachineInstr::FrameDestroy)))
- return false;
+ if (!Base->isReg())
+ return std::nullopt;
+ ExtAddrMode AM;
+ AM.BaseReg = Base->getReg();
+ AM.Displacement = Offset;
+ AM.ScaledReg = 0;
+ AM.Scale = 0;
+ return AM;
+ }
- // On some CPUs quad load/store pairs are slower than two single load/stores.
- if (Subtarget.isPaired128Slow()) {
- switch (MI.getOpcode()) {
+ bool AArch64InstrInfo::canFoldIntoAddrMode(
+ const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI,
+ ExtAddrMode &AM) const {
+ // Filter out instructions into which we cannot fold.
+ unsigned NumBytes;
+ int64_t OffsetScale = 1;
+ switch (MemI.getOpcode()) {
default:
- break;
+ return false;
+
case AArch64::LDURQi:
case AArch64::STURQi:
+ NumBytes = 16;
+ break;
+
+ case AArch64::LDURDi:
+ case AArch64::STURDi:
+ case AArch64::LDURXi:
+ case AArch64::STURXi:
+ NumBytes = 8;
+ break;
+
+ case AArch64::LDURWi:
+ case AArch64::LDURSWi:
+ case AArch64::STURWi:
+ NumBytes = 4;
+ break;
+
+ case AArch64::LDURHi:
+ case AArch64::STURHi:
+ case AArch64::LDURHHi:
+ case AArch64::STURHHi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSHWi:
+ NumBytes = 2;
+ break;
+
+ case AArch64::LDRBroX:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSBWroX:
+ case AArch64::STRBroX:
+ case AArch64::STRBBroX:
+ case AArch64::LDURBi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSBWi:
+ case AArch64::STURBi:
+ case AArch64::STURBBi:
+ case AArch64::LDRBui:
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSBWui:
+ case AArch64::STRBui:
+ case AArch64::STRBBui:
+ NumBytes = 1;
+ break;
+
+ case AArch64::LDRQroX:
+ case AArch64::STRQroX:
case AArch64::LDRQui:
case AArch64::STRQui:
- return false;
- }
- }
+ NumBytes = 16;
+ OffsetScale = 16;
+ break;
- return true;
-}
+ case AArch64::LDRDroX:
+ case AArch64::STRDroX:
+ case AArch64::LDRXroX:
+ case AArch64::STRXroX:
+ case AArch64::LDRDui:
+ case AArch64::STRDui:
+ case AArch64::LDRXui:
+ case AArch64::STRXui:
+ NumBytes = 8;
+ OffsetScale = 8;
+ break;
-bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
- const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
- const TargetRegisterInfo *TRI) const {
- if (!LdSt.mayLoadOrStore())
- return false;
+ case AArch64::LDRWroX:
+ case AArch64::LDRSWroX:
+ case AArch64::STRWroX:
+ case AArch64::LDRWui:
+ case AArch64::LDRSWui:
+ case AArch64::STRWui:
+ NumBytes = 4;
+ OffsetScale = 4;
+ break;
- const MachineOperand *BaseOp;
- TypeSize WidthN(0, false);
- if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
- WidthN, TRI))
- return false;
- // The maximum vscale is 16 under AArch64, return the maximal extent for the
- // vector.
- Width = LocationSize::precise(WidthN);
- BaseOps.push_back(BaseOp);
- return true;
-}
+ case AArch64::LDRHroX:
+ case AArch64::STRHroX:
+ case AArch64::LDRHHroX:
+ case AArch64::STRHHroX:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRHui:
+ case AArch64::STRHui:
+ case AArch64::LDRHHui:
+ case AArch64::STRHHui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSHWui:
+ NumBytes = 2;
+ OffsetScale = 2;
+ break;
+ }
-std::optional<ExtAddrMode>
-AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
- const TargetRegisterInfo *TRI) const {
- const MachineOperand *Base; // Filled with the base operand of MI.
- int64_t Offset; // Filled with the offset of MI.
- bool OffsetIsScalable;
- if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
- return std::nullopt;
+ // Check the fold operand is not the loaded/stored value.
+ const MachineOperand &BaseRegOp = MemI.getOperand(0);
+ if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
+ return false;
- if (!Base->isReg())
- return std::nullopt;
- ExtAddrMode AM;
- AM.BaseReg = Base->getReg();
- AM.Displacement = Offset;
- AM.ScaledReg = 0;
- AM.Scale = 0;
- return AM;
-}
-
-bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
- Register Reg,
- const MachineInstr &AddrI,
- ExtAddrMode &AM) const {
- // Filter out instructions into which we cannot fold.
- unsigned NumBytes;
- int64_t OffsetScale = 1;
- switch (MemI.getOpcode()) {
- default:
- return false;
+ // Handle memory instructions with a [Reg, Reg] addressing mode.
+ if (MemI.getOperand(2).isReg()) {
+ // Bail if the addressing mode already includes extension of the offset
+ // register.
+ if (MemI.getOperand(3).getImm())
+ return false;
- case AArch64::LDURQi:
- case AArch64::STURQi:
- NumBytes = 16;
- break;
+ // Check if we actually have a scaled offset.
+ if (MemI.getOperand(4).getImm() == 0)
+ OffsetScale = 1;
- case AArch64::LDURDi:
- case AArch64::STURDi:
- case AArch64::LDURXi:
- case AArch64::STURXi:
- NumBytes = 8;
- break;
+ // If the address instructions is folded into the base register, then the
+ // addressing mode must not have a scale. Then we can swap the base and
+ // the scaled registers.
+ if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
+ return false;
- case AArch64::LDURWi:
- case AArch64::LDURSWi:
- case AArch64::STURWi:
- NumBytes = 4;
- break;
+ switch (AddrI.getOpcode()) {
+ default:
+ return false;
- case AArch64::LDURHi:
- case AArch64::STURHi:
- case AArch64::LDURHHi:
- case AArch64::STURHHi:
- case AArch64::LDURSHXi:
- case AArch64::LDURSHWi:
- NumBytes = 2;
- break;
+ case AArch64::SBFMXri:
+ // sxtw Xa, Wm
+ // ldr Xd, [Xn, Xa, lsl #N]
+ // ->
+ // ldr Xd, [Xn, Wm, sxtw #N]
+ if (AddrI.getOperand(2).getImm() != 0 ||
+ AddrI.getOperand(3).getImm() != 31)
+ return false;
- case AArch64::LDRBroX:
- case AArch64::LDRBBroX:
- case AArch64::LDRSBXroX:
- case AArch64::LDRSBWroX:
- case AArch64::STRBroX:
- case AArch64::STRBBroX:
- case AArch64::LDURBi:
- case AArch64::LDURBBi:
- case AArch64::LDURSBXi:
- case AArch64::LDURSBWi:
- case AArch64::STURBi:
- case AArch64::STURBBi:
- case AArch64::LDRBui:
- case AArch64::LDRBBui:
- case AArch64::LDRSBXui:
- case AArch64::LDRSBWui:
- case AArch64::STRBui:
- case AArch64::STRBBui:
- NumBytes = 1;
- break;
+ AM.BaseReg = MemI.getOperand(1).getReg();
+ if (AM.BaseReg == Reg)
+ AM.BaseReg = MemI.getOperand(2).getReg();
+ AM.ScaledReg = AddrI.getOperand(1).getReg();
+ AM.Scale = OffsetScale;
+ AM.Displacement = 0;
+ AM.Form = ExtAddrMode::Formula::SExtScaledReg;
+ return true;
- case AArch64::LDRQroX:
- case AArch64::STRQroX:
- case AArch64::LDRQui:
- case AArch64::STRQui:
- NumBytes = 16;
- OffsetScale = 16;
- break;
+ case TargetOpcode::SUBREG_TO_REG: {
+ // mov Wa, Wm
+ // ldr Xd, [Xn, Xa, lsl #N]
+ // ->
+ // ldr Xd, [Xn, Wm, uxtw #N]
- case AArch64::LDRDroX:
- case AArch64::STRDroX:
- case AArch64::LDRXroX:
- case AArch64::STRXroX:
- case AArch64::LDRDui:
- case AArch64::STRDui:
- case AArch64::LDRXui:
- case AArch64::STRXui:
- NumBytes = 8;
- OffsetScale = 8;
- break;
+ // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
+ if (AddrI.getOperand(1).getImm() != 0 ||
+ AddrI.getOperand(3).getImm() != AArch64::sub_32)
+ return false;
- case AArch64::LDRWroX:
- case AArch64::LDRSWroX:
- case AArch64::STRWroX:
- case AArch64::LDRWui:
- case AArch64::LDRSWui:
- case AArch64::STRWui:
- NumBytes = 4;
- OffsetScale = 4;
- break;
+ const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
+ Register OffsetReg = AddrI.getOperand(2).getReg();
+ if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
+ return false;
- case AArch64::LDRHroX:
- case AArch64::STRHroX:
- case AArch64::LDRHHroX:
- case AArch64::STRHHroX:
- case AArch64::LDRSHXroX:
- case AArch64::LDRSHWroX:
- case AArch64::LDRHui:
- case AArch64::STRHui:
- case AArch64::LDRHHui:
- case AArch64::STRHHui:
- case AArch64::LDRSHXui:
- case AArch64::LDRSHWui:
- NumBytes = 2;
- OffsetScale = 2;
- break;
- }
+ const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
+ if (DefMI.getOpcode() != AArch64::ORRWrs ||
+ DefMI.getOperand(1).getReg() != AArch64::WZR ||
+ DefMI.getOperand(3).getImm() != 0)
+ return false;
- // Check the fold operand is not the loaded/stored value.
- const MachineOperand &BaseRegOp = MemI.getOperand(0);
- if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
- return false;
+ AM.BaseReg = MemI.getOperand(1).getReg();
+ if (AM.BaseReg == Reg)
+ AM.BaseReg = MemI.getOperand(2).getReg();
+ AM.ScaledReg = DefMI.getOperand(2).getReg();
+ AM.Scale = OffsetScale;
+ AM.Displacement = 0;
+ AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
+ return true;
+ }
+ }
+ }
- // Handle memory instructions with a [Reg, Reg] addressing mode.
- if (MemI.getOperand(2).isReg()) {
- // Bail if the addressing mode already includes extension of the offset
- // register.
- if (MemI.getOperand(3).getImm())
- return false;
+ // Handle memory instructions with a [Reg, #Imm] addressing mode.
- // Check if we actually have a scaled offset.
- if (MemI.getOperand(4).getImm() == 0)
- OffsetScale = 1;
+ // Check we are not breaking a potential conversion to an LDP.
+ auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
+ int64_t NewOffset) -> bool {
+ int64_t MinOffset, MaxOffset;
+ switch (NumBytes) {
+ default:
+ return true;
+ case 4:
+ MinOffset = -256;
+ MaxOffset = 252;
+ break;
+ case 8:
+ MinOffset = -512;
+ MaxOffset = 504;
+ break;
+ case 16:
+ MinOffset = -1024;
+ MaxOffset = 1008;
+ break;
+ }
+ return OldOffset < MinOffset || OldOffset > MaxOffset ||
+ (NewOffset >= MinOffset && NewOffset <= MaxOffset);
+ };
+ auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
+ int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
+ int64_t NewOffset = OldOffset + Disp;
+ if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
+ return false;
+ // If the old offset would fit into an LDP, but the new offset wouldn't,
+ // bail out.
+ if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
+ return false;
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = 0;
+ AM.Scale = 0;
+ AM.Displacement = NewOffset;
+ AM.Form = ExtAddrMode::Formula::Basic;
+ return true;
+ };
- // If the address instructions is folded into the base register, then the
- // addressing mode must not have a scale. Then we can swap the base and the
- // scaled registers.
- if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
- return false;
+ auto canFoldAddRegIntoAddrMode =
+ [&](int64_t Scale,
+ ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
+ if (MemI.getOperand(2).getImm() != 0)
+ return false;
+ if ((unsigned)Scale != Scale)
+ return false;
+ if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
+ return false;
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = AddrI.getOperand(2).getReg();
+ AM.Scale = Scale;
+ AM.Displacement = 0;
+ AM.Form = Form;
+ return true;
+ };
+
+ auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
+ unsigned Opcode = MemI.getOpcode();
+ return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
+ Subtarget.isSTRQroSlow();
+ };
+ int64_t Disp = 0;
+ const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
switch (AddrI.getOpcode()) {
default:
return false;
- case AArch64::SBFMXri:
- // sxtw Xa, Wm
- // ldr Xd, [Xn, Xa, lsl #N]
+ case AArch64::ADDXri:
+ // add Xa, Xn, #N
+ // ldr Xd, [Xa, #M]
// ->
- // ldr Xd, [Xn, Wm, sxtw #N]
- if (AddrI.getOperand(2).getImm() != 0 ||
- AddrI.getOperand(3).getImm() != 31)
- return false;
+ // ldr Xd, [Xn, #N'+M]
+ Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+ return canFoldAddSubImmIntoAddrMode(Disp);
- AM.BaseReg = MemI.getOperand(1).getReg();
- if (AM.BaseReg == Reg)
- AM.BaseReg = MemI.getOperand(2).getReg();
- AM.ScaledReg = AddrI.getOperand(1).getReg();
- AM.Scale = OffsetScale;
- AM.Displacement = 0;
- AM.Form = ExtAddrMode::Formula::SExtScaledReg;
- return true;
+ case AArch64::SUBXri:
+ // sub Xa, Xn, #N
+ // ldr Xd, [Xa, #M]
+ // ->
+ // ldr Xd, [Xn, #N'+M]
+ Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+ return canFoldAddSubImmIntoAddrMode(-Disp);
- case TargetOpcode::SUBREG_TO_REG: {
- // mov Wa, Wm
- // ldr Xd, [Xn, Xa, lsl #N]
+ case AArch64::ADDXrs: {
+ // add Xa, Xn, Xm, lsl #N
+ // ldr Xd, [Xa]
// ->
- // ldr Xd, [Xn, Wm, uxtw #N]
+ // ldr Xd, [Xn, Xm, lsl #N]
- // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
- if (AddrI.getOperand(1).getImm() != 0 ||
- AddrI.getOperand(3).getImm() != AArch64::sub_32)
+ // Don't fold the add if the result would be slower, unless optimising for
+ // size.
+ unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
+ if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
return false;
+ Shift = AArch64_AM::getShiftValue(Shift);
+ if (!OptSize) {
+ if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
+ return false;
+ if (avoidSlowSTRQ(MemI))
+ return false;
+ }
+ return canFoldAddRegIntoAddrMode(1ULL << Shift);
+ }
+
+ case AArch64::ADDXrr:
+ // add Xa, Xn, Xm
+ // ldr Xd, [Xa]
+ // ->
+ // ldr Xd, [Xn, Xm, lsl #0]
- const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
- Register OffsetReg = AddrI.getOperand(2).getReg();
- if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
+ // Don't fold the add if the result would be slower, unless optimising for
+ // size.
+ if (!OptSize && avoidSlowSTRQ(MemI))
return false;
+ return canFoldAddRegIntoAddrMode(1);
- const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
- if (DefMI.getOpcode() != AArch64::ORRWrs ||
- DefMI.getOperand(1).getReg() != AArch64::WZR ||
- DefMI.getOperand(3).getImm() != 0)
+ case AArch64::ADDXrx:
+ // add Xa, Xn, Wm, {s,u}xtw #N
+ // ldr Xd, [Xa]
+ // ->
+ // ldr Xd, [Xn, Wm, {s,u}xtw #N]
+
+ // Don't fold the add if the result would be slower, unless optimising for
+ // size.
+ if (!OptSize && avoidSlowSTRQ(MemI))
return false;
- AM.BaseReg = MemI.getOperand(1).getReg();
- if (AM.BaseReg == Reg)
- AM.BaseReg = MemI.getOperand(2).getReg();
- AM.ScaledReg = DefMI.getOperand(2).getReg();
- AM.Scale = OffsetScale;
- AM.Displacement = 0;
- AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
- return true;
+ // Can fold only sign-/zero-extend of a word.
+ unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
+ AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
+ if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
+ return false;
+
+ return canFoldAddRegIntoAddrMode(
+ 1ULL << AArch64_AM::getArithShiftValue(Imm),
+ (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
+ : ExtAddrMode::Formula::ZExtScaledReg);
}
+ }
+
+ // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+ // return the opcode of an instruction performing the same operation, but
+ // using the [Reg, Reg] addressing mode.
+ static unsigned regOffsetOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Address folding not implemented for instruction");
+
+ case AArch64::LDURQi:
+ case AArch64::LDRQui:
+ return AArch64::LDRQroX;
+ case AArch64::STURQi:
+ case AArch64::STRQui:
+ return AArch64::STRQroX;
+ case AArch64::LDURDi:
+ case AArch64::LDRDui:
+ return AArch64::LDRDroX;
+ case AArch64::STURDi:
+ case AArch64::STRDui:
+ return AArch64::STRDroX;
+ case AArch64::LDURXi:
+ case AArch64::LDRXui:
+ return AArch64::LDRXroX;
+ case AArch64::STURXi:
+ case AArch64::STRXui:
+ return AArch64::STRXroX;
+ case AArch64::LDURWi:
+ case AArch64::LDRWui:
+ return AArch64::LDRWroX;
+ case AArch64::LDURSWi:
+ case AArch64::LDRSWui:
+ return AArch64::LDRSWroX;
+ case AArch64::STURWi:
+ case AArch64::STRWui:
+ return AArch64::STRWroX;
+ case AArch64::LDURHi:
+ case AArch64::LDRHui:
+ return AArch64::LDRHroX;
+ case AArch64::STURHi:
+ case AArch64::STRHui:
+ return AArch64::STRHroX;
+ case AArch64::LDURHHi:
+ case AArch64::LDRHHui:
+ return AArch64::LDRHHroX;
+ case AArch64::STURHHi:
+ case AArch64::STRHHui:
+ return AArch64::STRHHroX;
+ case AArch64::LDURSHXi:
+ case AArch64::LDRSHXui:
+ return AArch64::LDRSHXroX;
+ case AArch64::LDURSHWi:
+ case AArch64::LDRSHWui:
+ return AArch64::LDRSHWroX;
+ case AArch64::LDURBi:
+ case AArch64::LDRBui:
+ return AArch64::LDRBroX;
+ case AArch64::LDURBBi:
+ case AArch64::LDRBBui:
+ return AArch64::LDRBBroX;
+ case AArch64::LDURSBXi:
+ case AArch64::LDRSBXui:
+ return AArch64::LDRSBXroX;
+ case AArch64::LDURSBWi:
+ case AArch64::LDRSBWui:
+ return AArch64::LDRSBWroX;
+ case AArch64::STURBi:
+ case AArch64::STRBui:
+ return AArch64::STRBroX;
+ case AArch64::STURBBi:
+ case AArch64::STRBBui:
+ return AArch64::STRBBroX;
}
}
- // Handle memory instructions with a [Reg, #Imm] addressing mode.
+ // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+ // return the opcode of an instruction performing the same operation, but
+ // using the [Reg, #Imm] addressing mode with scaled offset.
+ unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Address folding not implemented for instruction");
- // Check we are not breaking a potential conversion to an LDP.
- auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
- int64_t NewOffset) -> bool {
- int64_t MinOffset, MaxOffset;
- switch (NumBytes) {
+ case AArch64::LDURQi:
+ Scale = 16;
+ return AArch64::LDRQui;
+ case AArch64::STURQi:
+ Scale = 16;
+ return AArch64::STRQui;
+ case AArch64::LDURDi:
+ Scale = 8;
+ return AArch64::LDRDui;
+ case AArch64::STURDi:
+ Scale = 8;
+ return AArch64::STRDui;
+ case AArch64::LDURXi:
+ Scale = 8;
+ return AArch64::LDRXui;
+ case AArch64::STURXi:
+ Scale = 8;
+ return AArch64::STRXui;
+ case AArch64::LDURWi:
+ Scale = 4;
+ return AArch64::LDRWui;
+ case AArch64::LDURSWi:
+ Scale = 4;
+ return AArch64::LDRSWui;
+ case AArch64::STURWi:
+ Scale = 4;
+ return AArch64::STRWui;
+ case AArch64::LDURHi:
+ Scale = 2;
+ return AArch64::LDRHui;
+ case AArch64::STURHi:
+ Scale = 2;
+ return AArch64::STRHui;
+ case AArch64::LDURHHi:
+ Scale = 2;
+ return AArch64::LDRHHui;
+ case AArch64::STURHHi:
+ Scale = 2;
+ return AArch64::STRHHui;
+ case AArch64::LDURSHXi:
+ Scale = 2;
+ return AArch64::LDRSHXui;
+ case AArch64::LDURSHWi:
+ Scale = 2;
+ return AArch64::LDRSHWui;
+ case AArch64::LDURBi:
+ Scale = 1;
+ return AArch64::LDRBui;
+ case AArch64::LDURBBi:
+ Scale = 1;
+ return AArch64::LDRBBui;
+ case AArch64::LDURSBXi:
+ Scale = 1;
+ return AArch64::LDRSBXui;
+ case AArch64::LDURSBWi:
+ Scale = 1;
+ return AArch64::LDRSBWui;
+ case AArch64::STURBi:
+ Scale = 1;
+ return AArch64::STRBui;
+ case AArch64::STURBBi:
+ Scale = 1;
+ return AArch64::STRBBui;
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ Scale = 16;
+ return Opcode;
+ case AArch64::LDRDui:
+ case AArch64::STRDui:
+ case AArch64::LDRXui:
+ case AArch64::STRXui:
+ Scale = 8;
+ return Opcode;
+ case AArch64::LDRWui:
+ case AArch64::LDRSWui:
+ case AArch64::STRWui:
+ Scale = 4;
+ return Opcode;
+ case AArch64::LDRHui:
+ case AArch64::STRHui:
+ case AArch64::LDRHHui:
+ case AArch64::STRHHui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSHWui:
+ Scale = 2;
+ return Opcode;
+ case AArch64::LDRBui:
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSBWui:
+ case AArch64::STRBui:
+ case AArch64::STRBBui:
+ Scale = 1;
+ return Opcode;
+ }
+ }
+
+ // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
+ // return the opcode of an instruction performing the same operation, but
+ // using the [Reg, #Imm] addressing mode with unscaled offset.
+ unsigned unscaledOffsetOpcode(unsigned Opcode) {
+ switch (Opcode) {
default:
- return true;
- case 4:
- MinOffset = -256;
- MaxOffset = 252;
- break;
- case 8:
- MinOffset = -512;
- MaxOffset = 504;
- break;
- case 16:
- MinOffset = -1024;
- MaxOffset = 1008;
- break;
+ llvm_unreachable("Address folding not implemented for instruction");
+
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ case AArch64::LDURDi:
+ case AArch64::STURDi:
+ case AArch64::LDURXi:
+ case AArch64::STURXi:
+ case AArch64::LDURWi:
+ case AArch64::LDURSWi:
+ case AArch64::STURWi:
+ case AArch64::LDURHi:
+ case AArch64::STURHi:
+ case AArch64::LDURHHi:
+ case AArch64::STURHHi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSHWi:
+ case AArch64::LDURBi:
+ case AArch64::STURBi:
+ case AArch64::LDURBBi:
+ case AArch64::STURBBi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSBXi:
+ return Opcode;
+ case AArch64::LDRQui:
+ return AArch64::LDURQi;
+ case AArch64::STRQui:
+ return AArch64::STURQi;
+ case AArch64::LDRDui:
+ return AArch64::LDURDi;
+ case AArch64::STRDui:
+ return AArch64::STURDi;
+ case AArch64::LDRXui:
+ return AArch64::LDURXi;
+ case AArch64::STRXui:
+ return AArch64::STURXi;
+ case AArch64::LDRWui:
+ return AArch64::LDURWi;
+ case AArch64::LDRSWui:
+ return AArch64::LDURSWi;
+ case AArch64::STRWui:
+ return AArch64::STURWi;
+ case AArch64::LDRHui:
+ return AArch64::LDURHi;
+ case AArch64::STRHui:
+ return AArch64::STURHi;
+ case AArch64::LDRHHui:
+ return AArch64::LDURHHi;
+ case AArch64::STRHHui:
+ return AArch64::STURHHi;
+ case AArch64::LDRSHXui:
+ return AArch64::LDURSHXi;
+ case AArch64::LDRSHWui:
+ return AArch64::LDURSHWi;
+ case AArch64::LDRBBui:
+ return AArch64::LDURBBi;
+ case AArch64::LDRBui:
+ return AArch64::LDURBi;
+ case AArch64::STRBBui:
+ return AArch64::STURBBi;
+ case AArch64::STRBui:
+ return AArch64::STURBi;
+ case AArch64::LDRSBWui:
+ return AArch64::LDURSBWi;
+ case AArch64::LDRSBXui:
+ return AArch64::LDURSBXi;
}
- return OldOffset < MinOffset || OldOffset > MaxOffset ||
- (NewOffset >= MinOffset && NewOffset <= MaxOffset);
- };
- auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
- int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
- int64_t NewOffset = OldOffset + Disp;
- if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
- return false;
- // If the old offset would fit into an LDP, but the new offset wouldn't,
- // bail out.
- if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
- return false;
- AM.BaseReg = AddrI.getOperand(1).getReg();
- AM.ScaledReg = 0;
- AM.Scale = 0;
- AM.Displacement = NewOffset;
- AM.Form = ExtAddrMode::Formula::Basic;
- return true;
- };
+ }
+
+ // Given the opcode of a memory load/store instruction, return the opcode of
+ // an instruction performing the same operation, but using the [Reg, Reg,
+ // {s,u}xtw #N] addressing mode with sign-/zero-extend of the offset register.
+ static unsigned offsetExtendOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Address folding not implemented for instruction");
- auto canFoldAddRegIntoAddrMode =
- [&](int64_t Scale,
- ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
- if (MemI.getOperand(2).getImm() != 0)
+ case AArch64::LDRQroX:
+ case AArch64::LDURQi:
+ case AArch64::LDRQui:
+ return AArch64::LDRQroW;
+ case AArch64::STRQroX:
+ case AArch64::STURQi:
+ case AArch64::STRQui:
+ return AArch64::STRQroW;
+ case AArch64::LDRDroX:
+ case AArch64::LDURDi:
+ case AArch64::LDRDui:
+ return AArch64::LDRDroW;
+ case AArch64::STRDroX:
+ case AArch64::STURDi:
+ case AArch64::STRDui:
+ return AArch64::STRDroW;
+ case AArch64::LDRXroX:
+ case AArch64::LDURXi:
+ case AArch64::LDRXui:
+ return AArch64::LDRXroW;
+ case AArch64::STRXroX:
+ case AArch64::STURXi:
+ case AArch64::STRXui:
+ return AArch64::STRXroW;
+ case AArch64::LDRWroX:
+ case AArch64::LDURWi:
+ case AArch64::LDRWui:
+ return AArch64::LDRWroW;
+ case AArch64::LDRSWroX:
+ case AArch64::LDURSWi:
+ case AArch64::LDRSWui:
+ return AArch64::LDRSWroW;
+ case AArch64::STRWroX:
+ case AArch64::STURWi:
+ case AArch64::STRWui:
+ return AArch64::STRWroW;
+ case AArch64::LDRHroX:
+ case AArch64::LDURHi:
+ case AArch64::LDRHui:
+ return AArch64::LDRHroW;
+ case AArch64::STRHroX:
+ case AArch64::STURHi:
+ case AArch64::STRHui:
+ return AArch64::STRHroW;
+ case AArch64::LDRHHroX:
+ case AArch64::LDURHHi:
+ case AArch64::LDRHHui:
+ return AArch64::LDRHHroW;
+ case AArch64::STRHHroX:
+ case AArch64::STURHHi:
+ case AArch64::STRHHui:
+ return AArch64::STRHHroW;
+ case AArch64::LDRSHXroX:
+ case AArch64::LDURSHXi:
+ case AArch64::LDRSHXui:
+ return AArch64::LDRSHXroW;
+ case AArch64::LDRSHWroX:
+ case AArch64::LDURSHWi:
+ case AArch64::LDRSHWui:
+ return AArch64::LDRSHWroW;
+ case AArch64::LDRBroX:
+ case AArch64::LDURBi:
+ case AArch64::LDRBui:
+ return AArch64::LDRBroW;
+ case AArch64::LDRBBroX:
+ case AArch64::LDURBBi:
+ case AArch64::LDRBBui:
+ return AArch64::LDRBBroW;
+ case AArch64::LDRSBXroX:
+ case AArch64::LDURSBXi:
+ case AArch64::LDRSBXui:
+ return AArch64::LDRSBXroW;
+ case AArch64::LDRSBWroX:
+ case AArch64::LDURSBWi:
+ case AArch64::LDRSBWui:
+ return AArch64::LDRSBWroW;
+ case AArch64::STRBroX:
+ case AArch64::STURBi:
+ case AArch64::STRBui:
+ return AArch64::STRBroW;
+ case AArch64::STRBBroX:
+ case AArch64::STURBBi:
+ case AArch64::STRBBui:
+ return AArch64::STRBBroW;
+ }
+ }
+
+ MachineInstr *AArch64InstrInfo::emitLdStWithAddr(
+ MachineInstr & MemI, const ExtAddrMode &AM) const {
+
+ const DebugLoc &DL = MemI.getDebugLoc();
+ MachineBasicBlock &MBB = *MemI.getParent();
+ MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
+
+ if (AM.Form == ExtAddrMode::Formula::Basic) {
+ if (AM.ScaledReg) {
+ // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
+ unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
+ MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
+ auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addReg(AM.ScaledReg)
+ .addImm(0)
+ .addImm(AM.Scale > 1)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ return B.getInstr();
+ }
+
+ assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+ "Addressing mode not supported for folding");
+
+ // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
+ unsigned Scale = 1;
+ unsigned Opcode = MemI.getOpcode();
+ if (isInt<9>(AM.Displacement))
+ Opcode = unscaledOffsetOpcode(Opcode);
+ else
+ Opcode = scaledOffsetOpcode(Opcode, Scale);
+
+ auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement / Scale)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ return B.getInstr();
+ }
+
+ if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||
+ AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
+ // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw
+ // #N]`.
+ assert(AM.ScaledReg && !AM.Displacement &&
+ "Address offset can be a register or an immediate, but not both");
+ unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
+ MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
+ // Make sure the offset register is in the correct register class.
+ Register OffsetReg = AM.ScaledReg;
+ const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
+ if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
+ OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
+ .addReg(AM.ScaledReg, 0, AArch64::sub_32);
+ }
+ auto B = BuildMI(MBB, MemI, DL, get(Opcode))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addReg(OffsetReg)
+ .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)
+ .addImm(AM.Scale != 1)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+
+ return B.getInstr();
+ }
+
+ llvm_unreachable(
+ "Function must not be called with an addressing mode it can't handle");
+ }
+
+ /// Return true if the opcode is a post-index ld/st instruction, which really
+ /// loads from base+0.
+ static bool isPostIndexLdStOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
return false;
- if ((unsigned)Scale != Scale)
+ case AArch64::LD1Fourv16b_POST:
+ case AArch64::LD1Fourv1d_POST:
+ case AArch64::LD1Fourv2d_POST:
+ case AArch64::LD1Fourv2s_POST:
+ case AArch64::LD1Fourv4h_POST:
+ case AArch64::LD1Fourv4s_POST:
+ case AArch64::LD1Fourv8b_POST:
+ case AArch64::LD1Fourv8h_POST:
+ case AArch64::LD1Onev16b_POST:
+ case AArch64::LD1Onev1d_POST:
+ case AArch64::LD1Onev2d_POST:
+ case AArch64::LD1Onev2s_POST:
+ case AArch64::LD1Onev4h_POST:
+ case AArch64::LD1Onev4s_POST:
+ case AArch64::LD1Onev8b_POST:
+ case AArch64::LD1Onev8h_POST:
+ case AArch64::LD1Rv16b_POST:
+ case AArch64::LD1Rv1d_POST:
+ case AArch64::LD1Rv2d_POST:
+ case AArch64::LD1Rv2s_POST:
+ case AArch64::LD1Rv4h_POST:
+ case AArch64::LD1Rv4s_POST:
+ case AArch64::LD1Rv8b_POST:
+ case AArch64::LD1Rv8h_POST:
+ case AArch64::LD1Threev16b_POST:
+ case AArch64::LD1Threev1d_POST:
+ case AArch64::LD1Threev2d_POST:
+ case AArch64::LD1Threev2s_POST:
+ case AArch64::LD1Threev4h_POST:
+ case AArch64::LD1Threev4s_POST:
+ case AArch64::LD1Threev8b_POST:
+ case AArch64::LD1Threev8h_POST:
+ case AArch64::LD1Twov16b_POST:
+ case AArch64::LD1Twov1d_POST:
+ case AArch64::LD1Twov2d_POST:
+ case AArch64::LD1Twov2s_POST:
+ case AArch64::LD1Twov4h_POST:
+ case AArch64::LD1Twov4s_POST:
+ case AArch64::LD1Twov8b_POST:
+ case AArch64::LD1Twov8h_POST:
+ case AArch64::LD1i16_POST:
+ case AArch64::LD1i32_POST:
+ case AArch64::LD1i64_POST:
+ case AArch64::LD1i8_POST:
+ case AArch64::LD2Rv16b_POST:
+ case AArch64::LD2Rv1d_POST:
+ case AArch64::LD2Rv2d_POST:
+ case AArch64::LD2Rv2s_POST:
+ case AArch64::LD2Rv4h_POST:
+ case AArch64::LD2Rv4s_POST:
+ case AArch64::LD2Rv8b_POST:
+ case AArch64::LD2Rv8h_POST:
+ case AArch64::LD2Twov16b_POST:
+ case AArch64::LD2Twov2d_POST:
+ case AArch64::LD2Twov2s_POST:
+ case AArch64::LD2Twov4h_POST:
+ case AArch64::LD2Twov4s_POST:
+ case AArch64::LD2Twov8b_POST:
+ case AArch64::LD2Twov8h_POST:
+ case AArch64::LD2i16_POST:
+ case AArch64::LD2i32_POST:
+ case AArch64::LD2i64_POST:
+ case AArch64::LD2i8_POST:
+ case AArch64::LD3Rv16b_POST:
+ case AArch64::LD3Rv1d_POST:
+ case AArch64::LD3Rv2d_POST:
+ case AArch64::LD3Rv2s_POST:
+ case AArch64::LD3Rv4h_POST:
+ case AArch64::LD3Rv4s_POST:
+ case AArch64::LD3Rv8b_POST:
+ case AArch64::LD3Rv8h_POST:
+ case AArch64::LD3Threev16b_POST:
+ case AArch64::LD3Threev2d_POST:
+ case AArch64::LD3Threev2s_POST:
+ case AArch64::LD3Threev4h_POST:
+ case AArch64::LD3Threev4s_POST:
+ case AArch64::LD3Threev8b_POST:
+ case AArch64::LD3Threev8h_POST:
+ case AArch64::LD3i16_POST:
+ case AArch64::LD3i32_POST:
+ case AArch64::LD3i64_POST:
+ case AArch64::LD3i8_POST:
+ case AArch64::LD4Fourv16b_POST:
+ case AArch64::LD4Fourv2d_POST:
+ case AArch64::LD4Fourv2s_POST:
+ case AArch64::LD4Fourv4h_POST:
+ case AArch64::LD4Fourv4s_POST:
+ case AArch64::LD4Fourv8b_POST:
+ case AArch64::LD4Fourv8h_POST:
+ case AArch64::LD4Rv16b_POST:
+ case AArch64::LD4Rv1d_POST:
+ case AArch64::LD4Rv2d_POST:
+ case AArch64::LD4Rv2s_POST:
+ case AArch64::LD4Rv4h_POST:
+ case AArch64::LD4Rv4s_POST:
+ case AArch64::LD4Rv8b_POST:
+ case AArch64::LD4Rv8h_POST:
+ case AArch64::LD4i16_POST:
+ case AArch64::LD4i32_POST:
+ case AArch64::LD4i64_POST:
+ case AArch64::LD4i8_POST:
+ case AArch64::LDAPRWpost:
+ case AArch64::LDAPRXpost:
+ case AArch64::LDIAPPWpost:
+ case AArch64::LDIAPPXpost:
+ case AArch64::LDPDpost:
+ case AArch64::LDPQpost:
+ case AArch64::LDPSWpost:
+ case AArch64::LDPSpost:
+ case AArch64::LDPWpost:
+ case AArch64::LDPXpost:
+ case AArch64::LDRBBpost:
+ case AArch64::LDRBpost:
+ case AArch64::LDRDpost:
+ case AArch64::LDRHHpost:
+ case AArch64::LDRHpost:
+ case AArch64::LDRQpost:
+ case AArch64::LDRSBWpost:
+ case AArch64::LDRSBXpost:
+ case AArch64::LDRSHWpost:
+ case AArch64::LDRSHXpost:
+ case AArch64::LDRSWpost:
+ case AArch64::LDRSpost:
+ case AArch64::LDRWpost:
+ case AArch64::LDRXpost:
+ case AArch64::ST1Fourv16b_POST:
+ case AArch64::ST1Fourv1d_POST:
+ case AArch64::ST1Fourv2d_POST:
+ case AArch64::ST1Fourv2s_POST:
+ case AArch64::ST1Fourv4h_POST:
+ case AArch64::ST1Fourv4s_POST:
+ case AArch64::ST1Fourv8b_POST:
+ case AArch64::ST1Fourv8h_POST:
+ case AArch64::ST1Onev16b_POST:
+ case AArch64::ST1Onev1d_POST:
+ case AArch64::ST1Onev2d_POST:
+ case AArch64::ST1Onev2s_POST:
+ case AArch64::ST1Onev4h_POST:
+ case AArch64::ST1Onev4s_POST:
+ case AArch64::ST1Onev8b_POST:
+ case AArch64::ST1Onev8h_POST:
+ case AArch64::ST1Threev16b_POST:
+ case AArch64::ST1Threev1d_POST:
+ case AArch64::ST1Threev2d_POST:
+ case AArch64::ST1Threev2s_POST:
+ case AArch64::ST1Threev4h_POST:
+ case AArch64::ST1Threev4s_POST:
+ case AArch64::ST1Threev8b_POST:
+ case AArch64::ST1Threev8h_POST:
+ case AArch64::ST1Twov16b_POST:
+ case AArch64::ST1Twov1d_POST:
+ case AArch64::ST1Twov2d_POST:
+ case AArch64::ST1Twov2s_POST:
+ case AArch64::ST1Twov4h_POST:
+ case AArch64::ST1Twov4s_POST:
+ case AArch64::ST1Twov8b_POST:
+ case AArch64::ST1Twov8h_POST:
+ case AArch64::ST1i16_POST:
+ case AArch64::ST1i32_POST:
+ case AArch64::ST1i64_POST:
+ case AArch64::ST1i8_POST:
+ case AArch64::ST2GPostIndex:
+ case AArch64::ST2Twov16b_POST:
+ case AArch64::ST2Twov2d_POST:
+ case AArch64::ST2Twov2s_POST:
+ case AArch64::ST2Twov4h_POST:
+ case AArch64::ST2Twov4s_POST:
+ case AArch64::ST2Twov8b_POST:
+ case AArch64::ST2Twov8h_POST:
+ case AArch64::ST2i16_POST:
+ case AArch64::ST2i32_POST:
+ case AArch64::ST2i64_POST:
+ case AArch64::ST2i8_POST:
+ case AArch64::ST3Threev16b_POST:
+ case AArch64::ST3Threev2d_POST:
+ case AArch64::ST3Threev2s_POST:
+ case AArch64::ST3Threev4h_POST:
+ case AArch64::ST3Threev4s_POST:
+ case AArch64::ST3Threev8b_POST:
+ case AArch64::ST3Threev8h_POST:
+ case AArch64::ST3i16_POST:
+ case AArch64::ST3i32_POST:
+ case AArch64::ST3i64_POST:
+ case AArch64::ST3i8_POST:
+ case AArch64::ST4Fourv16b_POST:
+ case AArch64::ST4Fourv2d_POST:
+ case AArch64::ST4Fourv2s_POST:
+ case AArch64::ST4Fourv4h_POST:
+ case AArch64::ST4Fourv4s_POST:
+ case AArch64::ST4Fourv8b_POST:
+ case AArch64::ST4Fourv8h_POST:
+ case AArch64::ST4i16_POST:
+ case AArch64::ST4i32_POST:
+ case AArch64::ST4i64_POST:
+ case AArch64::ST4i8_POST:
+ case AArch64::STGPostIndex:
+ case AArch64::STGPpost:
+ case AArch64::STPDpost:
+ case AArch64::STPQpost:
+ case AArch64::STPSpost:
+ case AArch64::STPWpost:
+ case AArch64::STPXpost:
+ case AArch64::STRBBpost:
+ case AArch64::STRBpost:
+ case AArch64::STRDpost:
+ case AArch64::STRHHpost:
+ case AArch64::STRHpost:
+ case AArch64::STRQpost:
+ case AArch64::STRSpost:
+ case AArch64::STRWpost:
+ case AArch64::STRXpost:
+ case AArch64::STZ2GPostIndex:
+ case AArch64::STZGPostIndex:
+ return true;
+ }
+ }
+
+ bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
+ const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
+ bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI)
+ const {
+ assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+ // Handle only loads/stores with base register followed by immediate offset.
+ if (LdSt.getNumExplicitOperands() == 3) {
+ // Non-paired instruction (e.g., ldr x1, [x0, #8]).
+ if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
+ !LdSt.getOperand(2).isImm())
+ return false;
+ } else if (LdSt.getNumExplicitOperands() == 4) {
+ // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
+ if (!LdSt.getOperand(1).isReg() ||
+ (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
+ !LdSt.getOperand(3).isImm())
+ return false;
+ } else
return false;
- if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
+
+ // Get the scaling factor for the instruction and set the width for the
+ // instruction.
+ TypeSize Scale(0U, false);
+ int64_t Dummy1, Dummy2;
+
+ // If this returns false, then it's an instruction we don't want to handle.
+ if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
return false;
- AM.BaseReg = AddrI.getOperand(1).getReg();
- AM.ScaledReg = AddrI.getOperand(2).getReg();
- AM.Scale = Scale;
- AM.Displacement = 0;
- AM.Form = Form;
- return true;
- };
- auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
- unsigned Opcode = MemI.getOpcode();
- return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
- Subtarget.isSTRQroSlow();
- };
+ // Compute the offset. Offset is calculated as the immediate operand
+ // multiplied by the scaling factor. Unscaled instructions have scaling
+ // factor set to 1. Postindex are a special case which have an offset of 0.
+ if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
+ BaseOp = &LdSt.getOperand(2);
+ Offset = 0;
+ } else if (LdSt.getNumExplicitOperands() == 3) {
+ BaseOp = &LdSt.getOperand(1);
+ Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
+ } else {
+ assert(LdSt.getNumExplicitOperands() == 4 &&
+ "invalid number of operands");
+ BaseOp = &LdSt.getOperand(2);
+ Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
+ }
+ OffsetIsScalable = Scale.isScalable();
+
+ return BaseOp->isReg() || BaseOp->isFI();
+ }
+
+ MachineOperand &AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(
+ MachineInstr & LdSt) const {
+ assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+ MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
+ assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
+ return OfsOp;
+ }
+
+ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
+ TypeSize &Width, int64_t &MinOffset,
+ int64_t &MaxOffset) {
+ switch (Opcode) {
+ // Not a memory operation or something we want to handle.
+ default:
+ Scale = TypeSize::getFixed(0);
+ Width = TypeSize::getFixed(0);
+ MinOffset = MaxOffset = 0;
+ return false;
+ // LDR / STR
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(16);
+ MinOffset = 0;
+ MaxOffset = 4095;
+ break;
+ case AArch64::LDRXui:
+ case AArch64::LDRDui:
+ case AArch64::STRXui:
+ case AArch64::STRDui:
+ case AArch64::PRFMui:
+ Scale = TypeSize::getFixed(8);
+ Width = TypeSize::getFixed(8);
+ MinOffset = 0;
+ MaxOffset = 4095;
+ break;
+ case AArch64::LDRWui:
+ case AArch64::LDRSui:
+ case AArch64::LDRSWui:
+ case AArch64::STRWui:
+ case AArch64::STRSui:
+ Scale = TypeSize::getFixed(4);
+ Width = TypeSize::getFixed(4);
+ MinOffset = 0;
+ MaxOffset = 4095;
+ break;
+ case AArch64::LDRHui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRSHXui:
+ case AArch64::STRHui:
+ case AArch64::STRHHui:
+ Scale = TypeSize::getFixed(2);
+ Width = TypeSize::getFixed(2);
+ MinOffset = 0;
+ MaxOffset = 4095;
+ break;
+ case AArch64::LDRBui:
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSBXui:
+ case AArch64::STRBui:
+ case AArch64::STRBBui:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(1);
+ MinOffset = 0;
+ MaxOffset = 4095;
+ break;
+ // post/pre inc
+ case AArch64::STRQpre:
+ case AArch64::LDRQpost:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(16);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDRDpost:
+ case AArch64::LDRDpre:
+ case AArch64::LDRXpost:
+ case AArch64::LDRXpre:
+ case AArch64::STRDpost:
+ case AArch64::STRDpre:
+ case AArch64::STRXpost:
+ case AArch64::STRXpre:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(8);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::STRWpost:
+ case AArch64::STRWpre:
+ case AArch64::LDRWpost:
+ case AArch64::LDRWpre:
+ case AArch64::STRSpost:
+ case AArch64::STRSpre:
+ case AArch64::LDRSpost:
+ case AArch64::LDRSpre:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(4);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDRHpost:
+ case AArch64::LDRHpre:
+ case AArch64::STRHpost:
+ case AArch64::STRHpre:
+ case AArch64::LDRHHpost:
+ case AArch64::LDRHHpre:
+ case AArch64::STRHHpost:
+ case AArch64::STRHHpre:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(2);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDRBpost:
+ case AArch64::LDRBpre:
+ case AArch64::STRBpost:
+ case AArch64::STRBpre:
+ case AArch64::LDRBBpost:
+ case AArch64::LDRBBpre:
+ case AArch64::STRBBpost:
+ case AArch64::STRBBpre:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(1);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ // Unscaled
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(16);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDURXi:
+ case AArch64::LDURDi:
+ case AArch64::LDAPURXi:
+ case AArch64::STURXi:
+ case AArch64::STURDi:
+ case AArch64::STLURXi:
+ case AArch64::PRFUMi:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(8);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDURWi:
+ case AArch64::LDURSi:
+ case AArch64::LDURSWi:
+ case AArch64::LDAPURi:
+ case AArch64::LDAPURSWi:
+ case AArch64::STURWi:
+ case AArch64::STURSi:
+ case AArch64::STLURWi:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(4);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDURHi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSHWi:
+ case AArch64::LDAPURHi:
+ case AArch64::LDAPURSHWi:
+ case AArch64::LDAPURSHXi:
+ case AArch64::STURHi:
+ case AArch64::STURHHi:
+ case AArch64::STLURHi:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(2);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDURBi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDAPURBi:
+ case AArch64::LDAPURSBWi:
+ case AArch64::LDAPURSBXi:
+ case AArch64::STURBi:
+ case AArch64::STURBBi:
+ case AArch64::STLURBi:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(1);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ // LDP / STP (including pre/post inc)
+ case AArch64::LDPQi:
+ case AArch64::LDNPQi:
+ case AArch64::STPQi:
+ case AArch64::STNPQi:
+ case AArch64::LDPQpost:
+ case AArch64::LDPQpre:
+ case AArch64::STPQpost:
+ case AArch64::STPQpre:
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(16 * 2);
+ MinOffset = -64;
+ MaxOffset = 63;
+ break;
+ case AArch64::LDPXi:
+ case AArch64::LDPDi:
+ case AArch64::LDNPXi:
+ case AArch64::LDNPDi:
+ case AArch64::STPXi:
+ case AArch64::STPDi:
+ case AArch64::STNPXi:
+ case AArch64::STNPDi:
+ case AArch64::LDPDpost:
+ case AArch64::LDPDpre:
+ case AArch64::LDPXpost:
+ case AArch64::LDPXpre:
+ case AArch64::STPDpost:
+ case AArch64::STPDpre:
+ case AArch64::STPXpost:
+ case AArch64::STPXpre:
+ Scale = TypeSize::getFixed(8);
+ Width = TypeSize::getFixed(8 * 2);
+ MinOffset = -64;
+ MaxOffset = 63;
+ break;
+ case AArch64::LDPWi:
+ case AArch64::LDPSi:
+ case AArch64::LDNPWi:
+ case AArch64::LDNPSi:
+ case AArch64::STPWi:
+ case AArch64::STPSi:
+ case AArch64::STNPWi:
+ case AArch64::STNPSi:
+ case AArch64::LDPSpost:
+ case AArch64::LDPSpre:
+ case AArch64::LDPWpost:
+ case AArch64::LDPWpre:
+ case AArch64::STPSpost:
+ case AArch64::STPSpre:
+ case AArch64::STPWpost:
+ case AArch64::STPWpre:
+ Scale = TypeSize::getFixed(4);
+ Width = TypeSize::getFixed(4 * 2);
+ MinOffset = -64;
+ MaxOffset = 63;
+ break;
+ case AArch64::StoreSwiftAsyncContext:
+ // Store is an STRXui, but there might be an ADDXri in the expansion too.
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(8);
+ MinOffset = 0;
+ MaxOffset = 4095;
+ break;
+ case AArch64::ADDG:
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(0);
+ MinOffset = 0;
+ MaxOffset = 63;
+ break;
+ case AArch64::TAGPstack:
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(0);
+ // TAGP with a negative offset turns into SUBP, which has a maximum offset
+ // of 63 (not 64!).
+ MinOffset = -63;
+ MaxOffset = 63;
+ break;
+ case AArch64::LDG:
+ case AArch64::STGi:
+ case AArch64::STGPreIndex:
+ case AArch64::STGPostIndex:
+ case AArch64::STZGi:
+ case AArch64::STZGPreIndex:
+ case AArch64::STZGPostIndex:
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(16);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ // SVE
+ case AArch64::STR_ZZZZXI:
+ case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
+ case AArch64::LDR_ZZZZXI:
+ case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16 * 4);
+ MinOffset = -256;
+ MaxOffset = 252;
+ break;
+ case AArch64::STR_ZZZXI:
+ case AArch64::LDR_ZZZXI:
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16 * 3);
+ MinOffset = -256;
+ MaxOffset = 253;
+ break;
+ case AArch64::STR_ZZXI:
+ case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
+ case AArch64::LDR_ZZXI:
+ case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16 * 2);
+ MinOffset = -256;
+ MaxOffset = 254;
+ break;
+ case AArch64::LDR_PXI:
+ case AArch64::STR_PXI:
+ Scale = TypeSize::getScalable(2);
+ Width = TypeSize::getScalable(2);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LDR_PPXI:
+ case AArch64::STR_PPXI:
+ Scale = TypeSize::getScalable(2);
+ Width = TypeSize::getScalable(2 * 2);
+ MinOffset = -256;
+ MaxOffset = 254;
+ break;
+ case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
+ case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+ case AArch64::LDR_ZXI:
+ case AArch64::STR_ZXI:
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::LD1B_IMM:
+ case AArch64::LD1H_IMM:
+ case AArch64::LD1W_IMM:
+ case AArch64::LD1D_IMM:
+ case AArch64::LDNT1B_ZRI:
+ case AArch64::LDNT1H_ZRI:
+ case AArch64::LDNT1W_ZRI:
+ case AArch64::LDNT1D_ZRI:
+ case AArch64::ST1B_IMM:
+ case AArch64::ST1H_IMM:
+ case AArch64::ST1W_IMM:
+ case AArch64::ST1D_IMM:
+ case AArch64::STNT1B_ZRI:
+ case AArch64::STNT1H_ZRI:
+ case AArch64::STNT1W_ZRI:
+ case AArch64::STNT1D_ZRI:
+ case AArch64::LDNF1B_IMM:
+ case AArch64::LDNF1H_IMM:
+ case AArch64::LDNF1W_IMM:
+ case AArch64::LDNF1D_IMM:
+ // A full vectors worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::getScalable(16);
+ Width = TypeSize::getScalable(16);
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD2B_IMM:
+ case AArch64::LD2H_IMM:
+ case AArch64::LD2W_IMM:
+ case AArch64::LD2D_IMM:
+ case AArch64::ST2B_IMM:
+ case AArch64::ST2H_IMM:
+ case AArch64::ST2W_IMM:
+ case AArch64::ST2D_IMM:
+ Scale = TypeSize::getScalable(32);
+ Width = TypeSize::getScalable(16 * 2);
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD3B_IMM:
+ case AArch64::LD3H_IMM:
+ case AArch64::LD3W_IMM:
+ case AArch64::LD3D_IMM:
+ case AArch64::ST3B_IMM:
+ case AArch64::ST3H_IMM:
+ case AArch64::ST3W_IMM:
+ case AArch64::ST3D_IMM:
+ Scale = TypeSize::getScalable(48);
+ Width = TypeSize::getScalable(16 * 3);
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD4B_IMM:
+ case AArch64::LD4H_IMM:
+ case AArch64::LD4W_IMM:
+ case AArch64::LD4D_IMM:
+ case AArch64::ST4B_IMM:
+ case AArch64::ST4H_IMM:
+ case AArch64::ST4W_IMM:
+ case AArch64::ST4D_IMM:
+ Scale = TypeSize::getScalable(64);
+ Width = TypeSize::getScalable(16 * 4);
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD1B_H_IMM:
+ case AArch64::LD1SB_H_IMM:
+ case AArch64::LD1H_S_IMM:
+ case AArch64::LD1SH_S_IMM:
+ case AArch64::LD1W_D_IMM:
+ case AArch64::LD1SW_D_IMM:
+ case AArch64::ST1B_H_IMM:
+ case AArch64::ST1H_S_IMM:
+ case AArch64::ST1W_D_IMM:
+ case AArch64::LDNF1B_H_IMM:
+ case AArch64::LDNF1SB_H_IMM:
+ case AArch64::LDNF1H_S_IMM:
+ case AArch64::LDNF1SH_S_IMM:
+ case AArch64::LDNF1W_D_IMM:
+ case AArch64::LDNF1SW_D_IMM:
+ // A half vector worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::getScalable(8);
+ Width = TypeSize::getScalable(8);
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD1B_S_IMM:
+ case AArch64::LD1SB_S_IMM:
+ case AArch64::LD1H_D_IMM:
+ case AArch64::LD1SH_D_IMM:
+ case AArch64::ST1B_S_IMM:
+ case AArch64::ST1H_D_IMM:
+ case AArch64::LDNF1B_S_IMM:
+ case AArch64::LDNF1SB_S_IMM:
+ case AArch64::LDNF1H_D_IMM:
+ case AArch64::LDNF1SH_D_IMM:
+ // A quarter vector worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::getScalable(4);
+ Width = TypeSize::getScalable(4);
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD1B_D_IMM:
+ case AArch64::LD1SB_D_IMM:
+ case AArch64::ST1B_D_IMM:
+ case AArch64::LDNF1B_D_IMM:
+ case AArch64::LDNF1SB_D_IMM:
+ // A eighth vector worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::getScalable(2);
+ Width = TypeSize::getScalable(2);
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::ST2Gi:
+ case AArch64::ST2GPreIndex:
+ case AArch64::ST2GPostIndex:
+ case AArch64::STZ2Gi:
+ case AArch64::STZ2GPreIndex:
+ case AArch64::STZ2GPostIndex:
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(32);
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::STGPi:
+ case AArch64::STGPpost:
+ case AArch64::STGPpre:
+ Scale = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(16);
+ MinOffset = -64;
+ MaxOffset = 63;
+ break;
+ case AArch64::LD1RB_IMM:
+ case AArch64::LD1RB_H_IMM:
+ case AArch64::LD1RB_S_IMM:
+ case AArch64::LD1RB_D_IMM:
+ case AArch64::LD1RSB_H_IMM:
+ case AArch64::LD1RSB_S_IMM:
+ case AArch64::LD1RSB_D_IMM:
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(1);
+ MinOffset = 0;
+ MaxOffset = 63;
+ break;
+ case AArch64::LD1RH_IMM:
+ case AArch64::LD1RH_S_IMM:
+ case AArch64::LD1RH_D_IMM:
+ case AArch64::LD1RSH_S_IMM:
+ case AArch64::LD1RSH_D_IMM:
+ Scale = TypeSize::getFixed(2);
+ Width = TypeSize::getFixed(2);
+ MinOffset = 0;
+ MaxOffset = 63;
+ break;
+ case AArch64::LD1RW_IMM:
+ case AArch64::LD1RW_D_IMM:
+ case AArch64::LD1RSW_IMM:
+ Scale = TypeSize::getFixed(4);
+ Width = TypeSize::getFixed(4);
+ MinOffset = 0;
+ MaxOffset = 63;
+ break;
+ case AArch64::LD1RD_IMM:
+ Scale = TypeSize::getFixed(8);
+ Width = TypeSize::getFixed(8);
+ MinOffset = 0;
+ MaxOffset = 63;
+ break;
+ }
- int64_t Disp = 0;
- const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
- switch (AddrI.getOpcode()) {
- default:
- return false;
+ return true;
+ }
- case AArch64::ADDXri:
- // add Xa, Xn, #N
- // ldr Xd, [Xa, #M]
- // ->
- // ldr Xd, [Xn, #N'+M]
- Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
- return canFoldAddSubImmIntoAddrMode(Disp);
+ // Scaling factor for unscaled load or store.
+ int AArch64InstrInfo::getMemScale(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Opcode has unknown scale!");
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
+ case AArch64::STRBBui:
+ case AArch64::STURBBi:
+ return 1;
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ case AArch64::STRHHui:
+ case AArch64::STURHHi:
+ return 2;
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
+ case AArch64::LDRSpre:
+ case AArch64::LDRSWui:
+ case AArch64::LDURSWi:
+ case AArch64::LDRSWpre:
+ case AArch64::LDRWpre:
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
+ case AArch64::STRSui:
+ case AArch64::STURSi:
+ case AArch64::STRSpre:
+ case AArch64::STRWui:
+ case AArch64::STURWi:
+ case AArch64::STRWpre:
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPWi:
+ case AArch64::STPSi:
+ case AArch64::STPWi:
+ return 4;
+ case AArch64::LDRDui:
+ case AArch64::LDURDi:
+ case AArch64::LDRDpre:
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ case AArch64::LDRXpre:
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ case AArch64::STRDpre:
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ case AArch64::STRXpre:
+ case AArch64::LDPDi:
+ case AArch64::LDPXi:
+ case AArch64::STPDi:
+ case AArch64::STPXi:
+ return 8;
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ case AArch64::STRQpre:
+ case AArch64::LDPQi:
+ case AArch64::LDRQpre:
+ case AArch64::STPQi:
+ case AArch64::STGi:
+ case AArch64::STZGi:
+ case AArch64::ST2Gi:
+ case AArch64::STZ2Gi:
+ case AArch64::STGPi:
+ return 16;
+ }
+ }
- case AArch64::SUBXri:
- // sub Xa, Xn, #N
- // ldr Xd, [Xa, #M]
- // ->
- // ldr Xd, [Xn, #N'+M]
- Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
- return canFoldAddSubImmIntoAddrMode(-Disp);
-
- case AArch64::ADDXrs: {
- // add Xa, Xn, Xm, lsl #N
- // ldr Xd, [Xa]
- // ->
- // ldr Xd, [Xn, Xm, lsl #N]
-
- // Don't fold the add if the result would be slower, unless optimising for
- // size.
- unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
- if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
+ bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
return false;
- Shift = AArch64_AM::getShiftValue(Shift);
- if (!OptSize) {
- if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
- return false;
- if (avoidSlowSTRQ(MemI))
- return false;
+ case AArch64::LDRWpre:
+ case AArch64::LDRXpre:
+ case AArch64::LDRSWpre:
+ case AArch64::LDRSpre:
+ case AArch64::LDRDpre:
+ case AArch64::LDRQpre:
+ return true;
}
- return canFoldAddRegIntoAddrMode(1ULL << Shift);
}
- case AArch64::ADDXrr:
- // add Xa, Xn, Xm
- // ldr Xd, [Xa]
- // ->
- // ldr Xd, [Xn, Xm, lsl #0]
-
- // Don't fold the add if the result would be slower, unless optimising for
- // size.
- if (!OptSize && avoidSlowSTRQ(MemI))
+ bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
return false;
- return canFoldAddRegIntoAddrMode(1);
+ case AArch64::STRWpre:
+ case AArch64::STRXpre:
+ case AArch64::STRSpre:
+ case AArch64::STRDpre:
+ case AArch64::STRQpre:
+ return true;
+ }
+ }
- case AArch64::ADDXrx:
- // add Xa, Xn, Wm, {s,u}xtw #N
- // ldr Xd, [Xa]
- // ->
- // ldr Xd, [Xn, Wm, {s,u}xtw #N]
-
- // Don't fold the add if the result would be slower, unless optimising for
- // size.
- if (!OptSize && avoidSlowSTRQ(MemI))
- return false;
+ bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
+ return isPreLd(MI) || isPreSt(MI);
+ }
- // Can fold only sign-/zero-extend of a word.
- unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
- AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
- if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
+ bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
return false;
-
- return canFoldAddRegIntoAddrMode(
- 1ULL << AArch64_AM::getArithShiftValue(Imm),
- (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
- : ExtAddrMode::Formula::ZExtScaledReg);
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::STPSi:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
+ case AArch64::STPWi:
+ case AArch64::STPXi:
+ case AArch64::STGPi:
+ return true;
+ }
}
-}
-
-// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
-// return the opcode of an instruction performing the same operation, but using
-// the [Reg, Reg] addressing mode.
-static unsigned regOffsetOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- llvm_unreachable("Address folding not implemented for instruction");
-
- case AArch64::LDURQi:
- case AArch64::LDRQui:
- return AArch64::LDRQroX;
- case AArch64::STURQi:
- case AArch64::STRQui:
- return AArch64::STRQroX;
- case AArch64::LDURDi:
- case AArch64::LDRDui:
- return AArch64::LDRDroX;
- case AArch64::STURDi:
- case AArch64::STRDui:
- return AArch64::STRDroX;
- case AArch64::LDURXi:
- case AArch64::LDRXui:
- return AArch64::LDRXroX;
- case AArch64::STURXi:
- case AArch64::STRXui:
- return AArch64::STRXroX;
- case AArch64::LDURWi:
- case AArch64::LDRWui:
- return AArch64::LDRWroX;
- case AArch64::LDURSWi:
- case AArch64::LDRSWui:
- return AArch64::LDRSWroX;
- case AArch64::STURWi:
- case AArch64::STRWui:
- return AArch64::STRWroX;
- case AArch64::LDURHi:
- case AArch64::LDRHui:
- return AArch64::LDRHroX;
- case AArch64::STURHi:
- case AArch64::STRHui:
- return AArch64::STRHroX;
- case AArch64::LDURHHi:
- case AArch64::LDRHHui:
- return AArch64::LDRHHroX;
- case AArch64::STURHHi:
- case AArch64::STRHHui:
- return AArch64::STRHHroX;
- case AArch64::LDURSHXi:
- case AArch64::LDRSHXui:
- return AArch64::LDRSHXroX;
- case AArch64::LDURSHWi:
- case AArch64::LDRSHWui:
- return AArch64::LDRSHWroX;
- case AArch64::LDURBi:
- case AArch64::LDRBui:
- return AArch64::LDRBroX;
- case AArch64::LDURBBi:
- case AArch64::LDRBBui:
- return AArch64::LDRBBroX;
- case AArch64::LDURSBXi:
- case AArch64::LDRSBXui:
- return AArch64::LDRSBXroX;
- case AArch64::LDURSBWi:
- case AArch64::LDRSBWui:
- return AArch64::LDRSBWroX;
- case AArch64::STURBi:
- case AArch64::STRBui:
- return AArch64::STRBroX;
- case AArch64::STURBBi:
- case AArch64::STRBBui:
- return AArch64::STRBBroX;
- }
-}
-
-// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
-// the opcode of an instruction performing the same operation, but using the
-// [Reg, #Imm] addressing mode with scaled offset.
-unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
- switch (Opcode) {
- default:
- llvm_unreachable("Address folding not implemented for instruction");
-
- case AArch64::LDURQi:
- Scale = 16;
- return AArch64::LDRQui;
- case AArch64::STURQi:
- Scale = 16;
- return AArch64::STRQui;
- case AArch64::LDURDi:
- Scale = 8;
- return AArch64::LDRDui;
- case AArch64::STURDi:
- Scale = 8;
- return AArch64::STRDui;
- case AArch64::LDURXi:
- Scale = 8;
- return AArch64::LDRXui;
- case AArch64::STURXi:
- Scale = 8;
- return AArch64::STRXui;
- case AArch64::LDURWi:
- Scale = 4;
- return AArch64::LDRWui;
- case AArch64::LDURSWi:
- Scale = 4;
- return AArch64::LDRSWui;
- case AArch64::STURWi:
- Scale = 4;
- return AArch64::STRWui;
- case AArch64::LDURHi:
- Scale = 2;
- return AArch64::LDRHui;
- case AArch64::STURHi:
- Scale = 2;
- return AArch64::STRHui;
- case AArch64::LDURHHi:
- Scale = 2;
- return AArch64::LDRHHui;
- case AArch64::STURHHi:
- Scale = 2;
- return AArch64::STRHHui;
- case AArch64::LDURSHXi:
- Scale = 2;
- return AArch64::LDRSHXui;
- case AArch64::LDURSHWi:
- Scale = 2;
- return AArch64::LDRSHWui;
- case AArch64::LDURBi:
- Scale = 1;
- return AArch64::LDRBui;
- case AArch64::LDURBBi:
- Scale = 1;
- return AArch64::LDRBBui;
- case AArch64::LDURSBXi:
- Scale = 1;
- return AArch64::LDRSBXui;
- case AArch64::LDURSBWi:
- Scale = 1;
- return AArch64::LDRSBWui;
- case AArch64::STURBi:
- Scale = 1;
- return AArch64::STRBui;
- case AArch64::STURBBi:
- Scale = 1;
- return AArch64::STRBBui;
- case AArch64::LDRQui:
- case AArch64::STRQui:
- Scale = 16;
- return Opcode;
- case AArch64::LDRDui:
- case AArch64::STRDui:
- case AArch64::LDRXui:
- case AArch64::STRXui:
- Scale = 8;
- return Opcode;
- case AArch64::LDRWui:
- case AArch64::LDRSWui:
- case AArch64::STRWui:
- Scale = 4;
- return Opcode;
- case AArch64::LDRHui:
- case AArch64::STRHui:
- case AArch64::LDRHHui:
- case AArch64::STRHHui:
- case AArch64::LDRSHXui:
- case AArch64::LDRSHWui:
- Scale = 2;
- return Opcode;
- case AArch64::LDRBui:
- case AArch64::LDRBBui:
- case AArch64::LDRSBXui:
- case AArch64::LDRSBWui:
- case AArch64::STRBui:
- case AArch64::STRBBui:
- Scale = 1;
- return Opcode;
- }
-}
-
-// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
-// the opcode of an instruction performing the same operation, but using the
-// [Reg, #Imm] addressing mode with unscaled offset.
-unsigned unscaledOffsetOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- llvm_unreachable("Address folding not implemented for instruction");
-
- case AArch64::LDURQi:
- case AArch64::STURQi:
- case AArch64::LDURDi:
- case AArch64::STURDi:
- case AArch64::LDURXi:
- case AArch64::STURXi:
- case AArch64::LDURWi:
- case AArch64::LDURSWi:
- case AArch64::STURWi:
- case AArch64::LDURHi:
- case AArch64::STURHi:
- case AArch64::LDURHHi:
- case AArch64::STURHHi:
- case AArch64::LDURSHXi:
- case AArch64::LDURSHWi:
- case AArch64::LDURBi:
- case AArch64::STURBi:
- case AArch64::LDURBBi:
- case AArch64::STURBBi:
- case AArch64::LDURSBWi:
- case AArch64::LDURSBXi:
- return Opcode;
- case AArch64::LDRQui:
- return AArch64::LDURQi;
- case AArch64::STRQui:
- return AArch64::STURQi;
- case AArch64::LDRDui:
- return AArch64::LDURDi;
- case AArch64::STRDui:
- return AArch64::STURDi;
- case AArch64::LDRXui:
- return AArch64::LDURXi;
- case AArch64::STRXui:
- return AArch64::STURXi;
- case AArch64::LDRWui:
- return AArch64::LDURWi;
- case AArch64::LDRSWui:
- return AArch64::LDURSWi;
- case AArch64::STRWui:
- return AArch64::STURWi;
- case AArch64::LDRHui:
- return AArch64::LDURHi;
- case AArch64::STRHui:
- return AArch64::STURHi;
- case AArch64::LDRHHui:
- return AArch64::LDURHHi;
- case AArch64::STRHHui:
- return AArch64::STURHHi;
- case AArch64::LDRSHXui:
- return AArch64::LDURSHXi;
- case AArch64::LDRSHWui:
- return AArch64::LDURSHWi;
- case AArch64::LDRBBui:
- return AArch64::LDURBBi;
- case AArch64::LDRBui:
- return AArch64::LDURBi;
- case AArch64::STRBBui:
- return AArch64::STURBBi;
- case AArch64::STRBui:
- return AArch64::STURBi;
- case AArch64::LDRSBWui:
- return AArch64::LDURSBWi;
- case AArch64::LDRSBXui:
- return AArch64::LDURSBXi;
- }
-}
-
-// Given the opcode of a memory load/store instruction, return the opcode of an
-// instruction performing the same operation, but using
-// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
-// offset register.
-static unsigned offsetExtendOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- llvm_unreachable("Address folding not implemented for instruction");
- case AArch64::LDRQroX:
- case AArch64::LDURQi:
- case AArch64::LDRQui:
- return AArch64::LDRQroW;
- case AArch64::STRQroX:
- case AArch64::STURQi:
- case AArch64::STRQui:
- return AArch64::STRQroW;
- case AArch64::LDRDroX:
- case AArch64::LDURDi:
- case AArch64::LDRDui:
- return AArch64::LDRDroW;
- case AArch64::STRDroX:
- case AArch64::STURDi:
- case AArch64::STRDui:
- return AArch64::STRDroW;
- case AArch64::LDRXroX:
- case AArch64::LDURXi:
- case AArch64::LDRXui:
- return AArch64::LDRXroW;
- case AArch64::STRXroX:
- case AArch64::STURXi:
- case AArch64::STRXui:
- return AArch64::STRXroW;
- case AArch64::LDRWroX:
- case AArch64::LDURWi:
- case AArch64::LDRWui:
- return AArch64::LDRWroW;
- case AArch64::LDRSWroX:
- case AArch64::LDURSWi:
- case AArch64::LDRSWui:
- return AArch64::LDRSWroW;
- case AArch64::STRWroX:
- case AArch64::STURWi:
- case AArch64::STRWui:
- return AArch64::STRWroW;
- case AArch64::LDRHroX:
- case AArch64::LDURHi:
- case AArch64::LDRHui:
- return AArch64::LDRHroW;
- case AArch64::STRHroX:
- case AArch64::STURHi:
- case AArch64::STRHui:
- return AArch64::STRHroW;
- case AArch64::LDRHHroX:
- case AArch64::LDURHHi:
- case AArch64::LDRHHui:
- return AArch64::LDRHHroW;
- case AArch64::STRHHroX:
- case AArch64::STURHHi:
- case AArch64::STRHHui:
- return AArch64::STRHHroW;
- case AArch64::LDRSHXroX:
- case AArch64::LDURSHXi:
- case AArch64::LDRSHXui:
- return AArch64::LDRSHXroW;
- case AArch64::LDRSHWroX:
- case AArch64::LDURSHWi:
- case AArch64::LDRSHWui:
- return AArch64::LDRSHWroW;
- case AArch64::LDRBroX:
- case AArch64::LDURBi:
- case AArch64::LDRBui:
- return AArch64::LDRBroW;
- case AArch64::LDRBBroX:
- case AArch64::LDURBBi:
- case AArch64::LDRBBui:
- return AArch64::LDRBBroW;
- case AArch64::LDRSBXroX:
- case AArch64::LDURSBXi:
- case AArch64::LDRSBXui:
- return AArch64::LDRSBXroW;
- case AArch64::LDRSBWroX:
- case AArch64::LDURSBWi:
- case AArch64::LDRSBWui:
- return AArch64::LDRSBWroW;
- case AArch64::STRBroX:
- case AArch64::STURBi:
- case AArch64::STRBui:
- return AArch64::STRBroW;
- case AArch64::STRBBroX:
- case AArch64::STURBBi:
- case AArch64::STRBBui:
- return AArch64::STRBBroW;
+ const MachineOperand &AArch64InstrInfo::getLdStBaseOp(
+ const MachineInstr &MI) {
+ assert(MI.mayLoadOrStore() && "Load or store instruction expected");
+ unsigned Idx =
+ AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI)
+ ? 2
+ : 1;
+ return MI.getOperand(Idx);
}
-}
-
-MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
- const ExtAddrMode &AM) const {
- const DebugLoc &DL = MemI.getDebugLoc();
- MachineBasicBlock &MBB = *MemI.getParent();
- MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
+ const MachineOperand &AArch64InstrInfo::getLdStOffsetOp(
+ const MachineInstr &MI) {
+ assert(MI.mayLoadOrStore() && "Load or store instruction expected");
+ unsigned Idx =
+ AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI)
+ ? 3
+ : 2;
+ return MI.getOperand(Idx);
+ }
- if (AM.Form == ExtAddrMode::Formula::Basic) {
- if (AM.ScaledReg) {
- // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
- unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
- MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
- auto B = BuildMI(MBB, MemI, DL, get(Opcode))
- .addReg(MemI.getOperand(0).getReg(),
- MemI.mayLoad() ? RegState::Define : 0)
- .addReg(AM.BaseReg)
- .addReg(AM.ScaledReg)
- .addImm(0)
- .addImm(AM.Scale > 1)
- .setMemRefs(MemI.memoperands())
- .setMIFlags(MemI.getFlags());
- return B.getInstr();
+ const MachineOperand &AArch64InstrInfo::getLdStAmountOp(
+ const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case AArch64::LDRBroX:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSBWroX:
+ case AArch64::LDRHroX:
+ case AArch64::LDRHHroX:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRWroX:
+ case AArch64::LDRSroX:
+ case AArch64::LDRSWroX:
+ case AArch64::LDRDroX:
+ case AArch64::LDRXroX:
+ case AArch64::LDRQroX:
+ return MI.getOperand(4);
}
-
- assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
- "Addressing mode not supported for folding");
-
- // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
- unsigned Scale = 1;
- unsigned Opcode = MemI.getOpcode();
- if (isInt<9>(AM.Displacement))
- Opcode = unscaledOffsetOpcode(Opcode);
- else
- Opcode = scaledOffsetOpcode(Opcode, Scale);
-
- auto B = BuildMI(MBB, MemI, DL, get(Opcode))
- .addReg(MemI.getOperand(0).getReg(),
- MemI.mayLoad() ? RegState::Define : 0)
- .addReg(AM.BaseReg)
- .addImm(AM.Displacement / Scale)
- .setMemRefs(MemI.memoperands())
- .setMIFlags(MemI.getFlags());
- return B.getInstr();
- }
-
- if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||
- AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
- // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
- assert(AM.ScaledReg && !AM.Displacement &&
- "Address offset can be a register or an immediate, but not both");
- unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
- MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
- // Make sure the offset register is in the correct register class.
- Register OffsetReg = AM.ScaledReg;
- const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
- if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
- OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
- .addReg(AM.ScaledReg, 0, AArch64::sub_32);
- }
- auto B = BuildMI(MBB, MemI, DL, get(Opcode))
- .addReg(MemI.getOperand(0).getReg(),
- MemI.mayLoad() ? RegState::Define : 0)
- .addReg(AM.BaseReg)
- .addReg(OffsetReg)
- .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)
- .addImm(AM.Scale != 1)
- .setMemRefs(MemI.memoperands())
- .setMIFlags(MemI.getFlags());
-
- return B.getInstr();
- }
-
- llvm_unreachable(
- "Function must not be called with an addressing mode it can't handle");
-}
-
-/// Return true if the opcode is a post-index ld/st instruction, which really
-/// loads from base+0.
-static bool isPostIndexLdStOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case AArch64::LD1Fourv16b_POST:
- case AArch64::LD1Fourv1d_POST:
- case AArch64::LD1Fourv2d_POST:
- case AArch64::LD1Fourv2s_POST:
- case AArch64::LD1Fourv4h_POST:
- case AArch64::LD1Fourv4s_POST:
- case AArch64::LD1Fourv8b_POST:
- case AArch64::LD1Fourv8h_POST:
- case AArch64::LD1Onev16b_POST:
- case AArch64::LD1Onev1d_POST:
- case AArch64::LD1Onev2d_POST:
- case AArch64::LD1Onev2s_POST:
- case AArch64::LD1Onev4h_POST:
- case AArch64::LD1Onev4s_POST:
- case AArch64::LD1Onev8b_POST:
- case AArch64::LD1Onev8h_POST:
- case AArch64::LD1Rv16b_POST:
- case AArch64::LD1Rv1d_POST:
- case AArch64::LD1Rv2d_POST:
- case AArch64::LD1Rv2s_POST:
- case AArch64::LD1Rv4h_POST:
- case AArch64::LD1Rv4s_POST:
- case AArch64::LD1Rv8b_POST:
- case AArch64::LD1Rv8h_POST:
- case AArch64::LD1Threev16b_POST:
- case AArch64::LD1Threev1d_POST:
- case AArch64::LD1Threev2d_POST:
- case AArch64::LD1Threev2s_POST:
- case AArch64::LD1Threev4h_POST:
- case AArch64::LD1Threev4s_POST:
- case AArch64::LD1Threev8b_POST:
- case AArch64::LD1Threev8h_POST:
- case AArch64::LD1Twov16b_POST:
- case AArch64::LD1Twov1d_POST:
- case AArch64::LD1Twov2d_POST:
- case AArch64::LD1Twov2s_POST:
- case AArch64::LD1Twov4h_POST:
- case AArch64::LD1Twov4s_POST:
- case AArch64::LD1Twov8b_POST:
- case AArch64::LD1Twov8h_POST:
- case AArch64::LD1i16_POST:
- case AArch64::LD1i32_POST:
- case AArch64::LD1i64_POST:
- case AArch64::LD1i8_POST:
- case AArch64::LD2Rv16b_POST:
- case AArch64::LD2Rv1d_POST:
- case AArch64::LD2Rv2d_POST:
- case AArch64::LD2Rv2s_POST:
- case AArch64::LD2Rv4h_POST:
- case AArch64::LD2Rv4s_POST:
- case AArch64::LD2Rv8b_POST:
- case AArch64::LD2Rv8h_POST:
- case AArch64::LD2Twov16b_POST:
- case AArch64::LD2Twov2d_POST:
- case AArch64::LD2Twov2s_POST:
- case AArch64::LD2Twov4h_POST:
- case AArch64::LD2Twov4s_POST:
- case AArch64::LD2Twov8b_POST:
- case AArch64::LD2Twov8h_POST:
- case AArch64::LD2i16_POST:
- case AArch64::LD2i32_POST:
- case AArch64::LD2i64_POST:
- case AArch64::LD2i8_POST:
- case AArch64::LD3Rv16b_POST:
- case AArch64::LD3Rv1d_POST:
- case AArch64::LD3Rv2d_POST:
- case AArch64::LD3Rv2s_POST:
- case AArch64::LD3Rv4h_POST:
- case AArch64::LD3Rv4s_POST:
- case AArch64::LD3Rv8b_POST:
- case AArch64::LD3Rv8h_POST:
- case AArch64::LD3Threev16b_POST:
- case AArch64::LD3Threev2d_POST:
- case AArch64::LD3Threev2s_POST:
- case AArch64::LD3Threev4h_POST:
- case AArch64::LD3Threev4s_POST:
- case AArch64::LD3Threev8b_POST:
- case AArch64::LD3Threev8h_POST:
- case AArch64::LD3i16_POST:
- case AArch64::LD3i32_POST:
- case AArch64::LD3i64_POST:
- case AArch64::LD3i8_POST:
- case AArch64::LD4Fourv16b_POST:
- case AArch64::LD4Fourv2d_POST:
- case AArch64::LD4Fourv2s_POST:
- case AArch64::LD4Fourv4h_POST:
- case AArch64::LD4Fourv4s_POST:
- case AArch64::LD4Fourv8b_POST:
- case AArch64::LD4Fourv8h_POST:
- case AArch64::LD4Rv16b_POST:
- case AArch64::LD4Rv1d_POST:
- case AArch64::LD4Rv2d_POST:
- case AArch64::LD4Rv2s_POST:
- case AArch64::LD4Rv4h_POST:
- case AArch64::LD4Rv4s_POST:
- case AArch64::LD4Rv8b_POST:
- case AArch64::LD4Rv8h_POST:
- case AArch64::LD4i16_POST:
- case AArch64::LD4i32_POST:
- case AArch64::LD4i64_POST:
- case AArch64::LD4i8_POST:
- case AArch64::LDAPRWpost:
- case AArch64::LDAPRXpost:
- case AArch64::LDIAPPWpost:
- case AArch64::LDIAPPXpost:
- case AArch64::LDPDpost:
- case AArch64::LDPQpost:
- case AArch64::LDPSWpost:
- case AArch64::LDPSpost:
- case AArch64::LDPWpost:
- case AArch64::LDPXpost:
- case AArch64::LDRBBpost:
- case AArch64::LDRBpost:
- case AArch64::LDRDpost:
- case AArch64::LDRHHpost:
- case AArch64::LDRHpost:
- case AArch64::LDRQpost:
- case AArch64::LDRSBWpost:
- case AArch64::LDRSBXpost:
- case AArch64::LDRSHWpost:
- case AArch64::LDRSHXpost:
- case AArch64::LDRSWpost:
- case AArch64::LDRSpost:
- case AArch64::LDRWpost:
- case AArch64::LDRXpost:
- case AArch64::ST1Fourv16b_POST:
- case AArch64::ST1Fourv1d_POST:
- case AArch64::ST1Fourv2d_POST:
- case AArch64::ST1Fourv2s_POST:
- case AArch64::ST1Fourv4h_POST:
- case AArch64::ST1Fourv4s_POST:
- case AArch64::ST1Fourv8b_POST:
- case AArch64::ST1Fourv8h_POST:
- case AArch64::ST1Onev16b_POST:
- case AArch64::ST1Onev1d_POST:
- case AArch64::ST1Onev2d_POST:
- case AArch64::ST1Onev2s_POST:
- case AArch64::ST1Onev4h_POST:
- case AArch64::ST1Onev4s_POST:
- case AArch64::ST1Onev8b_POST:
- case AArch64::ST1Onev8h_POST:
- case AArch64::ST1Threev16b_POST:
- case AArch64::ST1Threev1d_POST:
- case AArch64::ST1Threev2d_POST:
- case AArch64::ST1Threev2s_POST:
- case AArch64::ST1Threev4h_POST:
- case AArch64::ST1Threev4s_POST:
- case AArch64::ST1Threev8b_POST:
- case AArch64::ST1Threev8h_POST:
- case AArch64::ST1Twov16b_POST:
- case AArch64::ST1Twov1d_POST:
- case AArch64::ST1Twov2d_POST:
- case AArch64::ST1Twov2s_POST:
- case AArch64::ST1Twov4h_POST:
- case AArch64::ST1Twov4s_POST:
- case AArch64::ST1Twov8b_POST:
- case AArch64::ST1Twov8h_POST:
- case AArch64::ST1i16_POST:
- case AArch64::ST1i32_POST:
- case AArch64::ST1i64_POST:
- case AArch64::ST1i8_POST:
- case AArch64::ST2GPostIndex:
- case AArch64::ST2Twov16b_POST:
- case AArch64::ST2Twov2d_POST:
- case AArch64::ST2Twov2s_POST:
- case AArch64::ST2Twov4h_POST:
- case AArch64::ST2Twov4s_POST:
- case AArch64::ST2Twov8b_POST:
- case AArch64::ST2Twov8h_POST:
- case AArch64::ST2i16_POST:
- case AArch64::ST2i32_POST:
- case AArch64::ST2i64_POST:
- case AArch64::ST2i8_POST:
- case AArch64::ST3Threev16b_POST:
- case AArch64::ST3Threev2d_POST:
- case AArch64::ST3Threev2s_POST:
- case AArch64::ST3Threev4h_POST:
- case AArch64::ST3Threev4s_POST:
- case AArch64::ST3Threev8b_POST:
- case AArch64::ST3Threev8h_POST:
- case AArch64::ST3i16_POST:
- case AArch64::ST3i32_POST:
- case AArch64::ST3i64_POST:
- case AArch64::ST3i8_POST:
- case AArch64::ST4Fourv16b_POST:
- case AArch64::ST4Fourv2d_POST:
- case AArch64::ST4Fourv2s_POST:
- case AArch64::ST4Fourv4h_POST:
- case AArch64::ST4Fourv4s_POST:
- case AArch64::ST4Fourv8b_POST:
- case AArch64::ST4Fourv8h_POST:
- case AArch64::ST4i16_POST:
- case AArch64::ST4i32_POST:
- case AArch64::ST4i64_POST:
- case AArch64::ST4i8_POST:
- case AArch64::STGPostIndex:
- case AArch64::STGPpost:
- case AArch64::STPDpost:
- case AArch64::STPQpost:
- case AArch64::STPSpost:
- case AArch64::STPWpost:
- case AArch64::STPXpost:
- case AArch64::STRBBpost:
- case AArch64::STRBpost:
- case AArch64::STRDpost:
- case AArch64::STRHHpost:
- case AArch64::STRHpost:
- case AArch64::STRQpost:
- case AArch64::STRSpost:
- case AArch64::STRWpost:
- case AArch64::STRXpost:
- case AArch64::STZ2GPostIndex:
- case AArch64::STZGPostIndex:
- return true;
}
-}
-
-bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
- const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
- bool &OffsetIsScalable, TypeSize &Width,
- const TargetRegisterInfo *TRI) const {
- assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
- // Handle only loads/stores with base register followed by immediate offset.
- if (LdSt.getNumExplicitOperands() == 3) {
- // Non-paired instruction (e.g., ldr x1, [x0, #8]).
- if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
- !LdSt.getOperand(2).isImm())
- return false;
- } else if (LdSt.getNumExplicitOperands() == 4) {
- // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
- if (!LdSt.getOperand(1).isReg() ||
- (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
- !LdSt.getOperand(3).isImm())
- return false;
- } else
- return false;
-
- // Get the scaling factor for the instruction and set the width for the
- // instruction.
- TypeSize Scale(0U, false);
- int64_t Dummy1, Dummy2;
-
- // If this returns false, then it's an instruction we don't want to handle.
- if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
- return false;
- // Compute the offset. Offset is calculated as the immediate operand
- // multiplied by the scaling factor. Unscaled instructions have scaling factor
- // set to 1. Postindex are a special case which have an offset of 0.
- if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
- BaseOp = &LdSt.getOperand(2);
- Offset = 0;
- } else if (LdSt.getNumExplicitOperands() == 3) {
- BaseOp = &LdSt.getOperand(1);
- Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
- } else {
- assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
- BaseOp = &LdSt.getOperand(2);
- Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
+ static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
+ Register Reg) {
+ if (MI.getParent() == nullptr)
+ return nullptr;
+ const MachineFunction *MF = MI.getParent()->getParent();
+ return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
}
- OffsetIsScalable = Scale.isScalable();
-
- return BaseOp->isReg() || BaseOp->isFI();
-}
-MachineOperand &
-AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
- assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
- MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
- assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
- return OfsOp;
-}
-
-bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
- TypeSize &Width, int64_t &MinOffset,
- int64_t &MaxOffset) {
- switch (Opcode) {
- // Not a memory operation or something we want to handle.
- default:
- Scale = TypeSize::getFixed(0);
- Width = TypeSize::getFixed(0);
- MinOffset = MaxOffset = 0;
- return false;
- // LDR / STR
- case AArch64::LDRQui:
- case AArch64::STRQui:
- Scale = TypeSize::getFixed(16);
- Width = TypeSize::getFixed(16);
- MinOffset = 0;
- MaxOffset = 4095;
- break;
- case AArch64::LDRXui:
- case AArch64::LDRDui:
- case AArch64::STRXui:
- case AArch64::STRDui:
- case AArch64::PRFMui:
- Scale = TypeSize::getFixed(8);
- Width = TypeSize::getFixed(8);
- MinOffset = 0;
- MaxOffset = 4095;
- break;
- case AArch64::LDRWui:
- case AArch64::LDRSui:
- case AArch64::LDRSWui:
- case AArch64::STRWui:
- case AArch64::STRSui:
- Scale = TypeSize::getFixed(4);
- Width = TypeSize::getFixed(4);
- MinOffset = 0;
- MaxOffset = 4095;
- break;
- case AArch64::LDRHui:
- case AArch64::LDRHHui:
- case AArch64::LDRSHWui:
- case AArch64::LDRSHXui:
- case AArch64::STRHui:
- case AArch64::STRHHui:
- Scale = TypeSize::getFixed(2);
- Width = TypeSize::getFixed(2);
- MinOffset = 0;
- MaxOffset = 4095;
- break;
- case AArch64::LDRBui:
- case AArch64::LDRBBui:
- case AArch64::LDRSBWui:
- case AArch64::LDRSBXui:
- case AArch64::STRBui:
- case AArch64::STRBBui:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(1);
- MinOffset = 0;
- MaxOffset = 4095;
- break;
- // post/pre inc
- case AArch64::STRQpre:
- case AArch64::LDRQpost:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(16);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::LDRDpost:
- case AArch64::LDRDpre:
- case AArch64::LDRXpost:
- case AArch64::LDRXpre:
- case AArch64::STRDpost:
- case AArch64::STRDpre:
- case AArch64::STRXpost:
- case AArch64::STRXpre:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(8);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::STRWpost:
- case AArch64::STRWpre:
- case AArch64::LDRWpost:
- case AArch64::LDRWpre:
- case AArch64::STRSpost:
- case AArch64::STRSpre:
- case AArch64::LDRSpost:
- case AArch64::LDRSpre:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(4);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::LDRHpost:
- case AArch64::LDRHpre:
- case AArch64::STRHpost:
- case AArch64::STRHpre:
- case AArch64::LDRHHpost:
- case AArch64::LDRHHpre:
- case AArch64::STRHHpost:
- case AArch64::STRHHpre:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(2);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::LDRBpost:
- case AArch64::LDRBpre:
- case AArch64::STRBpost:
- case AArch64::STRBpre:
- case AArch64::LDRBBpost:
- case AArch64::LDRBBpre:
- case AArch64::STRBBpost:
- case AArch64::STRBBpre:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(1);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- // Unscaled
- case AArch64::LDURQi:
- case AArch64::STURQi:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(16);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::LDURXi:
- case AArch64::LDURDi:
- case AArch64::LDAPURXi:
- case AArch64::STURXi:
- case AArch64::STURDi:
- case AArch64::STLURXi:
- case AArch64::PRFUMi:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(8);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::LDURWi:
- case AArch64::LDURSi:
- case AArch64::LDURSWi:
- case AArch64::LDAPURi:
- case AArch64::LDAPURSWi:
- case AArch64::STURWi:
- case AArch64::STURSi:
- case AArch64::STLURWi:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(4);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::LDURHi:
- case AArch64::LDURHHi:
- case AArch64::LDURSHXi:
- case AArch64::LDURSHWi:
- case AArch64::LDAPURHi:
- case AArch64::LDAPURSHWi:
- case AArch64::LDAPURSHXi:
- case AArch64::STURHi:
- case AArch64::STURHHi:
- case AArch64::STLURHi:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(2);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::LDURBi:
- case AArch64::LDURBBi:
- case AArch64::LDURSBXi:
- case AArch64::LDURSBWi:
- case AArch64::LDAPURBi:
- case AArch64::LDAPURSBWi:
- case AArch64::LDAPURSBXi:
- case AArch64::STURBi:
- case AArch64::STURBBi:
- case AArch64::STLURBi:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(1);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- // LDP / STP (including pre/post inc)
- case AArch64::LDPQi:
- case AArch64::LDNPQi:
- case AArch64::STPQi:
- case AArch64::STNPQi:
- case AArch64::LDPQpost:
- case AArch64::LDPQpre:
- case AArch64::STPQpost:
- case AArch64::STPQpre:
- Scale = TypeSize::getFixed(16);
- Width = TypeSize::getFixed(16 * 2);
- MinOffset = -64;
- MaxOffset = 63;
- break;
- case AArch64::LDPXi:
- case AArch64::LDPDi:
- case AArch64::LDNPXi:
- case AArch64::LDNPDi:
- case AArch64::STPXi:
- case AArch64::STPDi:
- case AArch64::STNPXi:
- case AArch64::STNPDi:
- case AArch64::LDPDpost:
- case AArch64::LDPDpre:
- case AArch64::LDPXpost:
- case AArch64::LDPXpre:
- case AArch64::STPDpost:
- case AArch64::STPDpre:
- case AArch64::STPXpost:
- case AArch64::STPXpre:
- Scale = TypeSize::getFixed(8);
- Width = TypeSize::getFixed(8 * 2);
- MinOffset = -64;
- MaxOffset = 63;
- break;
- case AArch64::LDPWi:
- case AArch64::LDPSi:
- case AArch64::LDNPWi:
- case AArch64::LDNPSi:
- case AArch64::STPWi:
- case AArch64::STPSi:
- case AArch64::STNPWi:
- case AArch64::STNPSi:
- case AArch64::LDPSpost:
- case AArch64::LDPSpre:
- case AArch64::LDPWpost:
- case AArch64::LDPWpre:
- case AArch64::STPSpost:
- case AArch64::STPSpre:
- case AArch64::STPWpost:
- case AArch64::STPWpre:
- Scale = TypeSize::getFixed(4);
- Width = TypeSize::getFixed(4 * 2);
- MinOffset = -64;
- MaxOffset = 63;
- break;
- case AArch64::StoreSwiftAsyncContext:
- // Store is an STRXui, but there might be an ADDXri in the expansion too.
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(8);
- MinOffset = 0;
- MaxOffset = 4095;
- break;
- case AArch64::ADDG:
- Scale = TypeSize::getFixed(16);
- Width = TypeSize::getFixed(0);
- MinOffset = 0;
- MaxOffset = 63;
- break;
- case AArch64::TAGPstack:
- Scale = TypeSize::getFixed(16);
- Width = TypeSize::getFixed(0);
- // TAGP with a negative offset turns into SUBP, which has a maximum offset
- // of 63 (not 64!).
- MinOffset = -63;
- MaxOffset = 63;
- break;
- case AArch64::LDG:
- case AArch64::STGi:
- case AArch64::STGPreIndex:
- case AArch64::STGPostIndex:
- case AArch64::STZGi:
- case AArch64::STZGPreIndex:
- case AArch64::STZGPostIndex:
- Scale = TypeSize::getFixed(16);
- Width = TypeSize::getFixed(16);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- // SVE
- case AArch64::STR_ZZZZXI:
- case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
- case AArch64::LDR_ZZZZXI:
- case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
- Scale = TypeSize::getScalable(16);
- Width = TypeSize::getScalable(16 * 4);
- MinOffset = -256;
- MaxOffset = 252;
- break;
- case AArch64::STR_ZZZXI:
- case AArch64::LDR_ZZZXI:
- Scale = TypeSize::getScalable(16);
- Width = TypeSize::getScalable(16 * 3);
- MinOffset = -256;
- MaxOffset = 253;
- break;
- case AArch64::STR_ZZXI:
- case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
- case AArch64::LDR_ZZXI:
- case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
- Scale = TypeSize::getScalable(16);
- Width = TypeSize::getScalable(16 * 2);
- MinOffset = -256;
- MaxOffset = 254;
- break;
- case AArch64::LDR_PXI:
- case AArch64::STR_PXI:
- Scale = TypeSize::getScalable(2);
- Width = TypeSize::getScalable(2);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::LDR_PPXI:
- case AArch64::STR_PPXI:
- Scale = TypeSize::getScalable(2);
- Width = TypeSize::getScalable(2 * 2);
- MinOffset = -256;
- MaxOffset = 254;
- break;
- case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
- case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
- case AArch64::LDR_ZXI:
- case AArch64::STR_ZXI:
- Scale = TypeSize::getScalable(16);
- Width = TypeSize::getScalable(16);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::LD1B_IMM:
- case AArch64::LD1H_IMM:
- case AArch64::LD1W_IMM:
- case AArch64::LD1D_IMM:
- case AArch64::LDNT1B_ZRI:
- case AArch64::LDNT1H_ZRI:
- case AArch64::LDNT1W_ZRI:
- case AArch64::LDNT1D_ZRI:
- case AArch64::ST1B_IMM:
- case AArch64::ST1H_IMM:
- case AArch64::ST1W_IMM:
- case AArch64::ST1D_IMM:
- case AArch64::STNT1B_ZRI:
- case AArch64::STNT1H_ZRI:
- case AArch64::STNT1W_ZRI:
- case AArch64::STNT1D_ZRI:
- case AArch64::LDNF1B_IMM:
- case AArch64::LDNF1H_IMM:
- case AArch64::LDNF1W_IMM:
- case AArch64::LDNF1D_IMM:
- // A full vectors worth of data
- // Width = mbytes * elements
- Scale = TypeSize::getScalable(16);
- Width = TypeSize::getScalable(16);
- MinOffset = -8;
- MaxOffset = 7;
- break;
- case AArch64::LD2B_IMM:
- case AArch64::LD2H_IMM:
- case AArch64::LD2W_IMM:
- case AArch64::LD2D_IMM:
- case AArch64::ST2B_IMM:
- case AArch64::ST2H_IMM:
- case AArch64::ST2W_IMM:
- case AArch64::ST2D_IMM:
- Scale = TypeSize::getScalable(32);
- Width = TypeSize::getScalable(16 * 2);
- MinOffset = -8;
- MaxOffset = 7;
- break;
- case AArch64::LD3B_IMM:
- case AArch64::LD3H_IMM:
- case AArch64::LD3W_IMM:
- case AArch64::LD3D_IMM:
- case AArch64::ST3B_IMM:
- case AArch64::ST3H_IMM:
- case AArch64::ST3W_IMM:
- case AArch64::ST3D_IMM:
- Scale = TypeSize::getScalable(48);
- Width = TypeSize::getScalable(16 * 3);
- MinOffset = -8;
- MaxOffset = 7;
- break;
- case AArch64::LD4B_IMM:
- case AArch64::LD4H_IMM:
- case AArch64::LD4W_IMM:
- case AArch64::LD4D_IMM:
- case AArch64::ST4B_IMM:
- case AArch64::ST4H_IMM:
- case AArch64::ST4W_IMM:
- case AArch64::ST4D_IMM:
- Scale = TypeSize::getScalable(64);
- Width = TypeSize::getScalable(16 * 4);
- MinOffset = -8;
- MaxOffset = 7;
- break;
- case AArch64::LD1B_H_IMM:
- case AArch64::LD1SB_H_IMM:
- case AArch64::LD1H_S_IMM:
- case AArch64::LD1SH_S_IMM:
- case AArch64::LD1W_D_IMM:
- case AArch64::LD1SW_D_IMM:
- case AArch64::ST1B_H_IMM:
- case AArch64::ST1H_S_IMM:
- case AArch64::ST1W_D_IMM:
- case AArch64::LDNF1B_H_IMM:
- case AArch64::LDNF1SB_H_IMM:
- case AArch64::LDNF1H_S_IMM:
- case AArch64::LDNF1SH_S_IMM:
- case AArch64::LDNF1W_D_IMM:
- case AArch64::LDNF1SW_D_IMM:
- // A half vector worth of data
- // Width = mbytes * elements
- Scale = TypeSize::getScalable(8);
- Width = TypeSize::getScalable(8);
- MinOffset = -8;
- MaxOffset = 7;
- break;
- case AArch64::LD1B_S_IMM:
- case AArch64::LD1SB_S_IMM:
- case AArch64::LD1H_D_IMM:
- case AArch64::LD1SH_D_IMM:
- case AArch64::ST1B_S_IMM:
- case AArch64::ST1H_D_IMM:
- case AArch64::LDNF1B_S_IMM:
- case AArch64::LDNF1SB_S_IMM:
- case AArch64::LDNF1H_D_IMM:
- case AArch64::LDNF1SH_D_IMM:
- // A quarter vector worth of data
- // Width = mbytes * elements
- Scale = TypeSize::getScalable(4);
- Width = TypeSize::getScalable(4);
- MinOffset = -8;
- MaxOffset = 7;
- break;
- case AArch64::LD1B_D_IMM:
- case AArch64::LD1SB_D_IMM:
- case AArch64::ST1B_D_IMM:
- case AArch64::LDNF1B_D_IMM:
- case AArch64::LDNF1SB_D_IMM:
- // A eighth vector worth of data
- // Width = mbytes * elements
- Scale = TypeSize::getScalable(2);
- Width = TypeSize::getScalable(2);
- MinOffset = -8;
- MaxOffset = 7;
- break;
- case AArch64::ST2Gi:
- case AArch64::ST2GPreIndex:
- case AArch64::ST2GPostIndex:
- case AArch64::STZ2Gi:
- case AArch64::STZ2GPreIndex:
- case AArch64::STZ2GPostIndex:
- Scale = TypeSize::getFixed(16);
- Width = TypeSize::getFixed(32);
- MinOffset = -256;
- MaxOffset = 255;
- break;
- case AArch64::STGPi:
- case AArch64::STGPpost:
- case AArch64::STGPpre:
- Scale = TypeSize::getFixed(16);
- Width = TypeSize::getFixed(16);
- MinOffset = -64;
- MaxOffset = 63;
- break;
- case AArch64::LD1RB_IMM:
- case AArch64::LD1RB_H_IMM:
- case AArch64::LD1RB_S_IMM:
- case AArch64::LD1RB_D_IMM:
- case AArch64::LD1RSB_H_IMM:
- case AArch64::LD1RSB_S_IMM:
- case AArch64::LD1RSB_D_IMM:
- Scale = TypeSize::getFixed(1);
- Width = TypeSize::getFixed(1);
- MinOffset = 0;
- MaxOffset = 63;
- break;
- case AArch64::LD1RH_IMM:
- case AArch64::LD1RH_S_IMM:
- case AArch64::LD1RH_D_IMM:
- case AArch64::LD1RSH_S_IMM:
- case AArch64::LD1RSH_D_IMM:
- Scale = TypeSize::getFixed(2);
- Width = TypeSize::getFixed(2);
- MinOffset = 0;
- MaxOffset = 63;
- break;
- case AArch64::LD1RW_IMM:
- case AArch64::LD1RW_D_IMM:
- case AArch64::LD1RSW_IMM:
- Scale = TypeSize::getFixed(4);
- Width = TypeSize::getFixed(4);
- MinOffset = 0;
- MaxOffset = 63;
- break;
- case AArch64::LD1RD_IMM:
- Scale = TypeSize::getFixed(8);
- Width = TypeSize::getFixed(8);
- MinOffset = 0;
- MaxOffset = 63;
- break;
+ bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
+ auto IsHFPR = [&](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return AArch64::FPR16RegClass.contains(Reg);
+ const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+ return TRC == &AArch64::FPR16RegClass ||
+ TRC == &AArch64::FPR16_loRegClass;
+ };
+ return llvm::any_of(MI.operands(), IsHFPR);
}
- return true;
-}
-
-// Scaling factor for unscaled load or store.
-int AArch64InstrInfo::getMemScale(unsigned Opc) {
- switch (Opc) {
- default:
- llvm_unreachable("Opcode has unknown scale!");
- case AArch64::LDRBBui:
- case AArch64::LDURBBi:
- case AArch64::LDRSBWui:
- case AArch64::LDURSBWi:
- case AArch64::STRBBui:
- case AArch64::STURBBi:
- return 1;
- case AArch64::LDRHHui:
- case AArch64::LDURHHi:
- case AArch64::LDRSHWui:
- case AArch64::LDURSHWi:
- case AArch64::STRHHui:
- case AArch64::STURHHi:
- return 2;
- case AArch64::LDRSui:
- case AArch64::LDURSi:
- case AArch64::LDRSpre:
- case AArch64::LDRSWui:
- case AArch64::LDURSWi:
- case AArch64::LDRSWpre:
- case AArch64::LDRWpre:
- case AArch64::LDRWui:
- case AArch64::LDURWi:
- case AArch64::STRSui:
- case AArch64::STURSi:
- case AArch64::STRSpre:
- case AArch64::STRWui:
- case AArch64::STURWi:
- case AArch64::STRWpre:
- case AArch64::LDPSi:
- case AArch64::LDPSWi:
- case AArch64::LDPWi:
- case AArch64::STPSi:
- case AArch64::STPWi:
- return 4;
- case AArch64::LDRDui:
- case AArch64::LDURDi:
- case AArch64::LDRDpre:
- case AArch64::LDRXui:
- case AArch64::LDURXi:
- case AArch64::LDRXpre:
- case AArch64::STRDui:
- case AArch64::STURDi:
- case AArch64::STRDpre:
- case AArch64::STRXui:
- case AArch64::STURXi:
- case AArch64::STRXpre:
- case AArch64::LDPDi:
- case AArch64::LDPXi:
- case AArch64::STPDi:
- case AArch64::STPXi:
- return 8;
- case AArch64::LDRQui:
- case AArch64::LDURQi:
- case AArch64::STRQui:
- case AArch64::STURQi:
- case AArch64::STRQpre:
- case AArch64::LDPQi:
- case AArch64::LDRQpre:
- case AArch64::STPQi:
- case AArch64::STGi:
- case AArch64::STZGi:
- case AArch64::ST2Gi:
- case AArch64::STZ2Gi:
- case AArch64::STGPi:
- return 16;
- }
-}
-
-bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- return false;
- case AArch64::LDRWpre:
- case AArch64::LDRXpre:
- case AArch64::LDRSWpre:
- case AArch64::LDRSpre:
- case AArch64::LDRDpre:
- case AArch64::LDRQpre:
- return true;
+ bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
+ auto IsQFPR = [&](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return AArch64::FPR128RegClass.contains(Reg);
+ const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+ return TRC == &AArch64::FPR128RegClass ||
+ TRC == &AArch64::FPR128_loRegClass;
+ };
+ return llvm::any_of(MI.operands(), IsQFPR);
}
-}
-bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- return false;
- case AArch64::STRWpre:
- case AArch64::STRXpre:
- case AArch64::STRSpre:
- case AArch64::STRDpre:
- case AArch64::STRQpre:
- return true;
+ bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AArch64::BRK:
+ case AArch64::HLT:
+ case AArch64::PACIASP:
+ case AArch64::PACIBSP:
+ // Implicit BTI behavior.
+ return true;
+ case AArch64::PAUTH_PROLOGUE:
+ // PAUTH_PROLOGUE expands to PACI(A|B)SP.
+ return true;
+ case AArch64::HINT: {
+ unsigned Imm = MI.getOperand(0).getImm();
+ // Explicit BTI instruction.
+ if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
+ return true;
+ // PACI(A|B)SP instructions.
+ if (Imm == 25 || Imm == 27)
+ return true;
+ return false;
+ }
+ default:
+ return false;
+ }
}
-}
-bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
- return isPreLd(MI) || isPreSt(MI);
-}
-
-bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- return false;
- case AArch64::LDPSi:
- case AArch64::LDPSWi:
- case AArch64::LDPDi:
- case AArch64::LDPQi:
- case AArch64::LDPWi:
- case AArch64::LDPXi:
- case AArch64::STPSi:
- case AArch64::STPDi:
- case AArch64::STPQi:
- case AArch64::STPWi:
- case AArch64::STPXi:
- case AArch64::STGPi:
- return true;
+ bool AArch64InstrInfo::isFpOrNEON(Register Reg) {
+ if (Reg == 0)
+ return false;
+ assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
+ return AArch64::FPR128RegClass.contains(Reg) ||
+ AArch64::FPR64RegClass.contains(Reg) ||
+ AArch64::FPR32RegClass.contains(Reg) ||
+ AArch64::FPR16RegClass.contains(Reg) ||
+ AArch64::FPR8RegClass.contains(Reg);
}
-}
-
-const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
- assert(MI.mayLoadOrStore() && "Load or store instruction expected");
- unsigned Idx =
- AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2
- : 1;
- return MI.getOperand(Idx);
-}
-
-const MachineOperand &
-AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
- assert(MI.mayLoadOrStore() && "Load or store instruction expected");
- unsigned Idx =
- AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3
- : 2;
- return MI.getOperand(Idx);
-}
-const MachineOperand &
-AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode");
- case AArch64::LDRBroX:
- case AArch64::LDRBBroX:
- case AArch64::LDRSBXroX:
- case AArch64::LDRSBWroX:
- case AArch64::LDRHroX:
- case AArch64::LDRHHroX:
- case AArch64::LDRSHXroX:
- case AArch64::LDRSHWroX:
- case AArch64::LDRWroX:
- case AArch64::LDRSroX:
- case AArch64::LDRSWroX:
- case AArch64::LDRDroX:
- case AArch64::LDRXroX:
- case AArch64::LDRQroX:
- return MI.getOperand(4);
+ bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
+ auto IsFPR = [&](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return isFpOrNEON(Reg);
+
+ const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+ return TRC == &AArch64::FPR128RegClass ||
+ TRC == &AArch64::FPR128_loRegClass ||
+ TRC == &AArch64::FPR64RegClass ||
+ TRC == &AArch64::FPR64_loRegClass ||
+ TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
+ TRC == &AArch64::FPR8RegClass;
+ };
+ return llvm::any_of(MI.operands(), IsFPR);
}
-}
-
-static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
- Register Reg) {
- if (MI.getParent() == nullptr)
- return nullptr;
- const MachineFunction *MF = MI.getParent()->getParent();
- return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
-}
-bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
- auto IsHFPR = [&](const MachineOperand &Op) {
- if (!Op.isReg())
- return false;
- auto Reg = Op.getReg();
- if (Reg.isPhysical())
- return AArch64::FPR16RegClass.contains(Reg);
- const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
- return TRC == &AArch64::FPR16RegClass ||
- TRC == &AArch64::FPR16_loRegClass;
- };
- return llvm::any_of(MI.operands(), IsHFPR);
-}
+ // Scale the unscaled offsets. Returns false if the unscaled offset can't be
+ // scaled.
+ static bool scaleOffset(unsigned Opc, int64_t &Offset) {
+ int Scale = AArch64InstrInfo::getMemScale(Opc);
-bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
- auto IsQFPR = [&](const MachineOperand &Op) {
- if (!Op.isReg())
+ // If the byte-offset isn't a multiple of the stride, we can't scale this
+ // offset.
+ if (Offset % Scale != 0)
return false;
- auto Reg = Op.getReg();
- if (Reg.isPhysical())
- return AArch64::FPR128RegClass.contains(Reg);
- const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
- return TRC == &AArch64::FPR128RegClass ||
- TRC == &AArch64::FPR128_loRegClass;
- };
- return llvm::any_of(MI.operands(), IsQFPR);
-}
-bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case AArch64::BRK:
- case AArch64::HLT:
- case AArch64::PACIASP:
- case AArch64::PACIBSP:
- // Implicit BTI behavior.
+ // Convert the byte-offset used by unscaled into an "element" offset used
+ // by the scaled pair load/store instructions.
+ Offset /= Scale;
return true;
- case AArch64::PAUTH_PROLOGUE:
- // PAUTH_PROLOGUE expands to PACI(A|B)SP.
- return true;
- case AArch64::HINT: {
- unsigned Imm = MI.getOperand(0).getImm();
- // Explicit BTI instruction.
- if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
- return true;
- // PACI(A|B)SP instructions.
- if (Imm == 25 || Imm == 27)
- return true;
- return false;
- }
- default:
- return false;
}
-}
-
-bool AArch64InstrInfo::isFpOrNEON(Register Reg) {
- if (Reg == 0)
- return false;
- assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
- return AArch64::FPR128RegClass.contains(Reg) ||
- AArch64::FPR64RegClass.contains(Reg) ||
- AArch64::FPR32RegClass.contains(Reg) ||
- AArch64::FPR16RegClass.contains(Reg) ||
- AArch64::FPR8RegClass.contains(Reg);
-}
-bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
- auto IsFPR = [&](const MachineOperand &Op) {
- if (!Op.isReg())
+ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
+ if (FirstOpc == SecondOpc)
+ return true;
+ // We can also pair sign-ext and zero-ext instructions.
+ switch (FirstOpc) {
+ default:
return false;
- auto Reg = Op.getReg();
- if (Reg.isPhysical())
- return isFpOrNEON(Reg);
-
- const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
- return TRC == &AArch64::FPR128RegClass ||
- TRC == &AArch64::FPR128_loRegClass ||
- TRC == &AArch64::FPR64RegClass ||
- TRC == &AArch64::FPR64_loRegClass ||
- TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
- TRC == &AArch64::FPR8RegClass;
- };
- return llvm::any_of(MI.operands(), IsFPR);
-}
-
-// Scale the unscaled offsets. Returns false if the unscaled offset can't be
-// scaled.
-static bool scaleOffset(unsigned Opc, int64_t &Offset) {
- int Scale = AArch64InstrInfo::getMemScale(Opc);
-
- // If the byte-offset isn't a multiple of the stride, we can't scale this
- // offset.
- if (Offset % Scale != 0)
+ case AArch64::STRSui:
+ case AArch64::STURSi:
+ return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
+ case AArch64::STRWui:
+ case AArch64::STURWi:
+ return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
+ return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
+ case AArch64::LDRDui:
+ case AArch64::LDURDi:
+ return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
+ return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
+ case AArch64::LDRSWui:
+ case AArch64::LDURSWi:
+ return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
+ }
+ // These instructions can't be paired based on their opcodes.
return false;
+ }
- // Convert the byte-offset used by unscaled into an "element" offset used
- // by the scaled pair load/store instructions.
- Offset /= Scale;
- return true;
-}
-
-static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
- if (FirstOpc == SecondOpc)
- return true;
- // We can also pair sign-ext and zero-ext instructions.
- switch (FirstOpc) {
- default:
- return false;
- case AArch64::STRSui:
- case AArch64::STURSi:
- return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
- case AArch64::STRDui:
- case AArch64::STURDi:
- return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
- case AArch64::STRQui:
- case AArch64::STURQi:
- return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
- case AArch64::STRWui:
- case AArch64::STURWi:
- return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
- case AArch64::STRXui:
- case AArch64::STURXi:
- return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
- case AArch64::LDRSui:
- case AArch64::LDURSi:
- return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
- case AArch64::LDRDui:
- case AArch64::LDURDi:
- return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
- case AArch64::LDRQui:
- case AArch64::LDURQi:
- return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
- case AArch64::LDRWui:
- case AArch64::LDURWi:
- return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
- case AArch64::LDRSWui:
- case AArch64::LDURSWi:
- return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
- case AArch64::LDRXui:
- case AArch64::LDURXi:
- return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
- }
- // These instructions can't be paired based on their opcodes.
- return false;
-}
+ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
+ int64_t Offset1, unsigned Opcode1, int FI2,
+ int64_t Offset2, unsigned Opcode2) {
+ // Accesses through fixed stack object frame indices may access a different
+ // fixed stack slot. Check that the object offsets + offsets match.
+ if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
+ int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
+ int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
+ assert(ObjectOffset1 <= ObjectOffset2 &&
+ "Object offsets are not ordered.");
+ // Convert to scaled object offsets.
+ int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
+ if (ObjectOffset1 % Scale1 != 0)
+ return false;
+ ObjectOffset1 /= Scale1;
+ int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
+ if (ObjectOffset2 % Scale2 != 0)
+ return false;
+ ObjectOffset2 /= Scale2;
+ ObjectOffset1 += Offset1;
+ ObjectOffset2 += Offset2;
+ return ObjectOffset1 + 1 == ObjectOffset2;
+ }
-static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
- int64_t Offset1, unsigned Opcode1, int FI2,
- int64_t Offset2, unsigned Opcode2) {
- // Accesses through fixed stack object frame indices may access a different
- // fixed stack slot. Check that the object offsets + offsets match.
- if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
- int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
- int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
- assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
- // Convert to scaled object offsets.
- int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
- if (ObjectOffset1 % Scale1 != 0)
+ return FI1 == FI2;
+ }
+
+ /// Detect opportunities for ldp/stp formation.
+ ///
+ /// Only called for LdSt for which getMemOperandWithOffset returns true.
+ bool AArch64InstrInfo::shouldClusterMemOps(
+ ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
+ bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+ int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
+ unsigned NumBytes) const {
+ assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
+ const MachineOperand &BaseOp1 = *BaseOps1.front();
+ const MachineOperand &BaseOp2 = *BaseOps2.front();
+ const MachineInstr &FirstLdSt = *BaseOp1.getParent();
+ const MachineInstr &SecondLdSt = *BaseOp2.getParent();
+ if (BaseOp1.getType() != BaseOp2.getType())
return false;
- ObjectOffset1 /= Scale1;
- int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
- if (ObjectOffset2 % Scale2 != 0)
- return false;
- ObjectOffset2 /= Scale2;
- ObjectOffset1 += Offset1;
- ObjectOffset2 += Offset2;
- return ObjectOffset1 + 1 == ObjectOffset2;
- }
- return FI1 == FI2;
-}
+ assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
+ "Only base registers and frame indices are supported.");
-/// Detect opportunities for ldp/stp formation.
-///
-/// Only called for LdSt for which getMemOperandWithOffset returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(
- ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
- bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
- int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
- unsigned NumBytes) const {
- assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
- const MachineOperand &BaseOp1 = *BaseOps1.front();
- const MachineOperand &BaseOp2 = *BaseOps2.front();
- const MachineInstr &FirstLdSt = *BaseOp1.getParent();
- const MachineInstr &SecondLdSt = *BaseOp2.getParent();
- if (BaseOp1.getType() != BaseOp2.getType())
- return false;
+ // Check for both base regs and base FI.
+ if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
+ return false;
- assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
- "Only base registers and frame indices are supported.");
+ // Only cluster up to a single pair.
+ if (ClusterSize > 2)
+ return false;
- // Check for both base regs and base FI.
- if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
- return false;
+ if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
+ return false;
- // Only cluster up to a single pair.
- if (ClusterSize > 2)
- return false;
+ // Can we pair these instructions based on their opcodes?
+ unsigned FirstOpc = FirstLdSt.getOpcode();
+ unsigned SecondOpc = SecondLdSt.getOpcode();
+ if (!canPairLdStOpc(FirstOpc, SecondOpc))
+ return false;
- if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
- return false;
+ // Can't merge volatiles or load/stores that have a hint to avoid pair
+ // formation, for example.
+ if (!isCandidateToMergeOrPair(FirstLdSt) ||
+ !isCandidateToMergeOrPair(SecondLdSt))
+ return false;
- // Can we pair these instructions based on their opcodes?
- unsigned FirstOpc = FirstLdSt.getOpcode();
- unsigned SecondOpc = SecondLdSt.getOpcode();
- if (!canPairLdStOpc(FirstOpc, SecondOpc))
- return false;
+ // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
+ int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
+ if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
+ return false;
- // Can't merge volatiles or load/stores that have a hint to avoid pair
- // formation, for example.
- if (!isCandidateToMergeOrPair(FirstLdSt) ||
- !isCandidateToMergeOrPair(SecondLdSt))
- return false;
+ int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
+ if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
+ return false;
- // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
- int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
- if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
- return false;
+ // Pairwise instructions have a 7-bit signed offset field.
+ if (Offset1 > 63 || Offset1 < -64)
+ return false;
- int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
- if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
- return false;
+ // The caller should already have ordered First/SecondLdSt by offset.
+ // Note: except for non-equal frame index bases
+ if (BaseOp1.isFI()) {
+ assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
+ "Caller should have ordered offsets.");
- // Pairwise instructions have a 7-bit signed offset field.
- if (Offset1 > 63 || Offset1 < -64)
- return false;
+ const MachineFrameInfo &MFI =
+ FirstLdSt.getParent()->getParent()->getFrameInfo();
+ return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
+ BaseOp2.getIndex(), Offset2, SecondOpc);
+ }
- // The caller should already have ordered First/SecondLdSt by offset.
- // Note: except for non-equal frame index bases
- if (BaseOp1.isFI()) {
- assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
- "Caller should have ordered offsets.");
+ assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
- const MachineFrameInfo &MFI =
- FirstLdSt.getParent()->getParent()->getFrameInfo();
- return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
- BaseOp2.getIndex(), Offset2, SecondOpc);
+ return Offset1 + 1 == Offset2;
}
- assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
-
- return Offset1 + 1 == Offset2;
-}
-
-static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
- MCRegister Reg, unsigned SubIdx,
- unsigned State,
- const TargetRegisterInfo *TRI) {
- if (!SubIdx)
- return MIB.addReg(Reg, State);
-
- if (Reg.isPhysical())
- return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
- return MIB.addReg(Reg, State, SubIdx);
-}
-
-static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
- unsigned NumRegs) {
- // We really want the positive remainder mod 32 here, that happens to be
- // easily obtainable with a mask.
- return ((DestReg - SrcReg) & 0x1f) < NumRegs;
-}
+ static const MachineInstrBuilder &AddSubReg(
+ const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx,
+ unsigned State, const TargetRegisterInfo *TRI) {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
-void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- const DebugLoc &DL, MCRegister DestReg,
- MCRegister SrcReg, bool KillSrc,
- unsigned Opcode,
- ArrayRef<unsigned> Indices) const {
- assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
- uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
- unsigned NumRegs = Indices.size();
+ if (Reg.isPhysical())
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+ }
- int SubReg = 0, End = NumRegs, Incr = 1;
- if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
- SubReg = NumRegs - 1;
- End = -1;
- Incr = -1;
+ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
+ unsigned NumRegs) {
+ // We really want the positive remainder mod 32 here, that happens to be
+ // easily obtainable with a mask.
+ return ((DestReg - SrcReg) & 0x1f) < NumRegs;
}
- for (; SubReg != End; SubReg += Incr) {
- const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
- AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
- AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
- AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+ void AArch64InstrInfo::copyPhysRegTuple(
+ MachineBasicBlock & MBB, MachineBasicBlock::iterator I,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+ unsigned Opcode, ArrayRef<unsigned> Indices) const {
+ assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
+ uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
+ unsigned NumRegs = Indices.size();
+
+ int SubReg = 0, End = NumRegs, Incr = 1;
+ if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
+ SubReg = NumRegs - 1;
+ End = -1;
+ Incr = -1;
+ }
+
+ for (; SubReg != End; SubReg += Incr) {
+ const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
+ AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
+ AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
+ AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+ }
}
-}
-void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- const DebugLoc &DL, MCRegister DestReg,
- MCRegister SrcReg, bool KillSrc,
- unsigned Opcode, unsigned ZeroReg,
- llvm::ArrayRef<unsigned> Indices) const {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned NumRegs = Indices.size();
+ void AArch64InstrInfo::copyGPRRegTuple(
+ MachineBasicBlock & MBB, MachineBasicBlock::iterator I,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+ unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef<unsigned> Indices)
+ const {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned NumRegs = Indices.size();
#ifndef NDEBUG
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
``````````
</details>
https://github.com/llvm/llvm-project/pull/150803
More information about the llvm-commits
mailing list