[llvm] STACK: GlobalISel: indexed load/stores support for AArch64 (PR #69533)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 18 15:09:58 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Amara Emerson (aemerson)
<details>
<summary>Changes</summary>
This is a stack. See https://github.com/llvm/llvm-project/pull/69532 for the next patch to be reviewed.
- [AArch64][GlobalISel] Add support for post-indexed loads/stores.
- [AArch64][GlobalISel] Add support for pre-indexed loads/stores.
- [AArch64][GlobalISel] Add support for extending indexed loads.
These changes in totality provide some good code size improvements at -Os CTMark:
```
Program size.__text
output3rsutb82 outputy9pewxwy diff
consumer-typeset/consumer-typeset 407484.00 407936.00 0.1%
tramp3d-v4/tramp3d-v4 393300.00 393424.00 0.0%
SPASS/SPASS 410444.00 410340.00 -0.0%
lencod/lencod 427804.00 427676.00 -0.0%
Bullet/bullet 457988.00 457828.00 -0.0%
kimwitu++/kc 453156.00 452988.00 -0.0%
7zip/7zip-benchmark 592268.00 591684.00 -0.1%
mafft/pairlocalalign 243460.00 243156.00 -0.1%
ClamAV/clamscan 380684.00 380176.00 -0.1%
sqlite3/sqlite3 285316.00 284448.00 -0.3%
Geomean difference -0.1%
```
---
Patch is 163.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69533.diff
17 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+5-1)
- (modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+1-1)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+1-1)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+184-49)
- (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+2-1)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+15)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+2)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+197)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+65)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (+53-24)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h (+5)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll (+124-93)
- (added) llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir (+191)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir (+9-8)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll (+234-901)
- (modified) llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll (+357-723)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index d64b414f2747621..65299e852574bd1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -58,6 +58,8 @@ struct IndexedLoadStoreMatchInfo {
Register Addr;
Register Base;
Register Offset;
+ bool RematOffset; // True if Offset is a constant that needs to be
+ // rematerialized before the new load/store.
bool IsPre;
};
@@ -814,12 +816,14 @@ class CombinerHelper {
void applyCommuteBinOpOperands(MachineInstr &MI);
private:
+ /// Checks for legality of an indexed variant of \p LdSt.
+ bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
///
/// \returns true if a candidate is found.
bool findPostIndexCandidate(GLoadStore &MI, Register &Addr, Register &Base,
- Register &Offset);
+ Register &Offset, bool &RematOffset);
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a pre-indexing operation.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6c36b1bbcf8649b..b34b90fd24eb602 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -97,7 +97,7 @@ class GIndexedLoad : public GMemOperation {
/// Get the offset register of the pointer value.
Register getOffsetReg() const { return getOperand(3).getReg(); }
- bool isPre() const { return getOperand(5).getImm() == 1; }
+ bool isPre() const { return getOperand(4).getImm() == 1; }
bool isPost() const { return !isPre(); }
static bool classof(const MachineInstr *MI) {
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 7e0691e1ee95048..bb8223ba3486a8d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1248,7 +1248,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
- combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
+ undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9efb70f28fee3ee..fbfd272ff2086a8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -945,42 +945,171 @@ void CombinerHelper::applySextInRegOfLoad(
MI.eraseFromParent();
}
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+/// Return true if 'MI' is a load or a store that may be fold it's address
+/// operand into the load / store addressing mode.
+static bool canFoldInAddressingMode(GLoadStore *MI,
+ const TargetLowering &TLI,
+ MachineRegisterInfo &MRI) {
+ TargetLowering::AddrMode AM;
+ auto *MF = MI->getMF();
+ auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
+ if (!Addr)
+ return false;
+
+ AM.HasBaseReg = true;
+ auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI);
+ if (CstOff)
+ AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
+ else
+ AM.Scale = 1; // [reg +/- reg]
+
+ return TLI.isLegalAddressingMode(
+ MF->getDataLayout(), AM,
+ getTypeForLLT(MI->getMMO().getMemoryType(),
+ MF->getFunction().getContext()),
+ MI->getMMO().getAddrSpace());
+}
+
+namespace {
+unsigned getIndexedOpc(unsigned LdStOpc) {
+ switch (LdStOpc) {
+ case TargetOpcode::G_LOAD:
+ return TargetOpcode::G_INDEXED_LOAD;
+ case TargetOpcode::G_STORE:
+ return TargetOpcode::G_INDEXED_STORE;
+ case TargetOpcode::G_ZEXTLOAD:
+ return TargetOpcode::G_INDEXED_ZEXTLOAD;
+ case TargetOpcode::G_SEXTLOAD:
+ return TargetOpcode::G_INDEXED_SEXTLOAD;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+} // namespace
+
+bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
+ // Check for legality.
+ LLT PtrTy = MRI.getType(LdSt.getPointerReg());
+ LLT Ty = MRI.getType(LdSt.getReg(0));
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+ {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
+ SmallVector<LLT> OpTys;
+ if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
+ OpTys = {PtrTy, Ty, Ty};
+ else
+ OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
+
+ LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
+ return isLegal(Q);
+}
+
+static cl::opt<unsigned> PostIndexUseThreshold(
+ "post-index-use-threshold", cl::Hidden, cl::init(32),
+ cl::desc("Number of uses of a base pointer to check before it is no longer "
+ "considered for post-indexing."));
+
bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
- Register &Base, Register &Offset) {
+ Register &Base, Register &Offset,
+ bool &RematOffset) {
+ // We're looking for the following pattern, for either load or store:
+ // %baseptr:_(p0) = ...
+ // G_STORE %val(s64), %baseptr(p0)
+ // %offset:_(s64) = G_CONSTANT i64 -256
+ // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
auto &MF = *LdSt.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
- Base = LdSt.getPointerReg();
+ Register Ptr = LdSt.getPointerReg();
+ // If the store is the only use, don't bother.
+ if (MRI.hasOneNonDBGUse(Ptr))
+ return false;
+
+ if (!isIndexedLoadStoreLegal(LdSt))
+ return false;
- if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Base, MRI))
+ if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
return false;
- // FIXME: The following use traversal needs a bail out for patholigical cases.
- for (auto &Use : MRI.use_nodbg_instructions(Base)) {
+ MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
+ auto *PtrDef = MRI.getVRegDef(Ptr);
+
+ unsigned NumUsesChecked = 0;
+ for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
+ if (++NumUsesChecked > PostIndexUseThreshold)
+ return false; // Try to avoid exploding compile time.
+
auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
- if (!PtrAdd)
+ // The use itself might be dead. This can happen during combines if DCE
+ // hasn't had a chance to run yet. Don't allow it to form an indexed op.
+ if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
+ continue;
+
+ // Check the user of this isn't the store, otherwise we'd be generate a
+ // indexed store defining its own use.
+ if (StoredValDef == &Use)
continue;
Offset = PtrAdd->getOffsetReg();
if (!ForceLegalIndexing &&
- !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ false, MRI))
+ !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
+ /*IsPre*/ false, MRI))
continue;
// Make sure the offset calculation is before the potentially indexed op.
MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
- if (!dominates(*OffsetDef, LdSt))
- continue;
+ if (!dominates(*OffsetDef, LdSt)) {
+ // If the offset however is just a G_CONSTANT, we can always just
+ // rematerialize it where we need it.
+ if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
+ continue;
+ RematOffset = true;
+ }
- // FIXME: check whether all uses of Base are load/store with foldable
- // addressing modes. If so, using the normal addr-modes is better than
- // forming an indexed one.
- if (any_of(MRI.use_nodbg_instructions(PtrAdd->getReg(0)),
- [&](MachineInstr &PtrAddUse) {
- return !dominates(LdSt, PtrAddUse);
- }))
- continue;
+ for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
+ if (&BasePtrUse == PtrDef)
+ continue;
+
+ // If the user is a later load/store that can be post-indexed, then don't
+ // combine this one.
+ auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
+ if (BasePtrLdSt && BasePtrLdSt != &LdSt) {
+ if (dominates(LdSt, *BasePtrLdSt)) {
+ if (isIndexedLoadStoreLegal(*BasePtrLdSt))
+ return false;
+ }
+ }
+
+ // Now we're looking for the key G_PTR_ADD instruction, which contains
+ // the offset add that we want to fold.
+ if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
+ Register PtrAddDefReg = BasePtrUseDef->getReg(0);
+ for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
+ // If the use is in a different block, then we may produce worse code
+ // due to the extra register pressure.
+ if (BaseUseUse.getParent() != LdSt.getParent())
+ return false;
+
+ if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse)) {
+ if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
+ return false;
+ }
+ }
+ if (!dominates(LdSt, BasePtrUse))
+ return false; // All use must be dominated by the load/store.
+ }
+ }
Addr = PtrAdd->getReg(0);
+ Base = PtrAdd->getBaseReg();
return true;
}
@@ -1001,6 +1130,9 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
!TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
return false;
+ if (!isIndexedLoadStoreLegal(LdSt))
+ return false;
+
MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return false;
@@ -1016,27 +1148,43 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
return false;
}
+ // Avoid increasing cross-block register pressure.
+ for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
+ if (AddrUse.getParent() != LdSt.getParent())
+ return false;
+ }
+
// FIXME: check whether all uses of the base pointer are constant PtrAdds.
// That might allow us to end base's liveness here by adjusting the constant.
-
- return all_of(MRI.use_nodbg_instructions(Addr),
- [&](MachineInstr &UseMI) { return dominates(LdSt, UseMI); });
+ bool RealUse = false;
+ for (auto &PtrUse : MRI.use_nodbg_instructions(Addr)) {
+ if (!dominates(LdSt, PtrUse))
+ return false; // All use must be dominated by the load/store.
+
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (auto *UseLdSt = dyn_cast<GLoadStore>(&PtrUse)) {
+ if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
+ RealUse = true;
+ } else {
+ RealUse = true;
+ }
+ }
+ return RealUse;
}
bool CombinerHelper::matchCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
auto &LdSt = cast<GLoadStore>(MI);
- // For now, no targets actually support these opcodes so don't waste time
- // running these unless we're forced to for testing.
- if (!ForceLegalIndexing)
+ if (LdSt.isAtomic())
return false;
MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
!findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
- MatchInfo.Offset))
+ MatchInfo.Offset, MatchInfo.RematOffset))
return false;
return true;
@@ -1045,28 +1193,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
void CombinerHelper::applyCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
- MachineIRBuilder MIRBuilder(MI);
+ Builder.setInstrAndDebugLoc(MI);
unsigned Opcode = MI.getOpcode();
bool IsStore = Opcode == TargetOpcode::G_STORE;
- unsigned NewOpcode;
- switch (Opcode) {
- case TargetOpcode::G_LOAD:
- NewOpcode = TargetOpcode::G_INDEXED_LOAD;
- break;
- case TargetOpcode::G_SEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
- break;
- case TargetOpcode::G_ZEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
- break;
- case TargetOpcode::G_STORE:
- NewOpcode = TargetOpcode::G_INDEXED_STORE;
- break;
- default:
- llvm_unreachable("Unknown load/store opcode");
+ unsigned NewOpcode = getIndexedOpc(Opcode);
+
+ // If the offset constant didn't happen to dominate the load/store, we can
+ // just clone it as needed.
+ if (MatchInfo.RematOffset) {
+ auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
+ auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
+ *OldCst->getOperand(1).getCImm());
+ MatchInfo.Offset = NewCst.getReg(0);
}
- auto MIB = MIRBuilder.buildInstr(NewOpcode);
+ auto MIB = Builder.buildInstr(NewOpcode);
if (IsStore) {
MIB.addDef(MatchInfo.Addr);
MIB.addUse(MI.getOperand(0).getReg());
@@ -1245,13 +1386,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
- if (Ty.isVector())
- return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
- return IntegerType::get(C, Ty.getSizeInBits());
-}
-
+
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index f7b55cad4269944..9ae1dd99f20f45d 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -245,7 +245,8 @@ def AArch64PostLegalizerLowering
// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
- [copy_prop, combines_for_extload,
+ [copy_prop, combines_for_extload, reassocs,
+ combine_indexed_load_store,
sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a16a102e472e709..b9a99c96ef97638 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -37,6 +37,8 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -23615,6 +23617,19 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
+bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
+ Register Offset, bool IsPre,
+ MachineRegisterInfo &MRI) const {
+ auto CstOffset = getIConstantVRegVal(Offset, MRI);
+ if (!CstOffset || CstOffset->isZero())
+ return false;
+
+ // All of the indexed addressing mode instructions take a signed 9 bit
+ // immediate offset. Our CstOffset is a G_PTR_ADD offset so it already
+ // encodes the sign/indexing direction.
+ return isInt<9>(CstOffset->getSExtValue());
+}
+
bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue &Base,
SDValue &Offset,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9dcfba3a229cccd..52e519cd8a0c93c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1201,6 +1201,8 @@ class AArch64TargetLowering : public TargetLowering {
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
+ bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
+ bool IsPre, MachineRegisterInfo &MRI) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1c7a09696e853e2..46114dfa44e83ca 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
@@ -224,6 +225,10 @@ class AArch64InstructionSelector : public InstructionSelector {
bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
+
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
@@ -3038,6 +3043,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
+ case TargetOpcode::G_INDEXED_ZEXTLOAD:
+ case TargetOpcode::G_INDEXED_SEXTLOAD:
+ return selectIndexedExtLoad(I, MRI);
+ case TargetOpcode::G_INDEXED_LOAD:
+ return selectIndexedLoad(I, MRI);
+ case TargetOpcode::G_INDEXED_STORE:
+ return selectIndexedStore(cast<GIndexedStore>(I), MRI);
+
case TargetOpcode::G_SMULH:
case TargetOpcode::G_UMULH: {
// Reject the various things we don't support yet.
@@ -5621,6 +5634,190 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
return &*Mov;
}
+bool AArch64InstructionSelector::selectIndexedExtLoad(
+ MachineInstr &MI, MachineRegisterInfo &MRI) {
+ auto &ExtLd = cast<GIndexedExtLoad>(MI);
+ Register Dst = ExtLd.getDstReg();
+ Register WriteBack = ExtLd.getWritebackReg();
+ Register Base = ExtLd.getBaseReg();
+ Register Offset = ExtLd.getOffsetReg();
+ LLT Ty = MRI.getType(Dst);
+ assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
+ unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
+ bool IsPre = ExtLd.isPre();
+ bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
+ bool InsertIntoXReg = false;
+ bool IsDst64 = Ty.getSizeInBits() == 6...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/69533
More information about the llvm-commits
mailing list