[llvm] [AArch64][GlobalISel] Add support for post-indexed loads/stores. (PR #69532)
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 11:43:22 PDT 2023
https://github.com/aemerson updated https://github.com/llvm/llvm-project/pull/69532
>From 54a92838644980ff6429c7e1d031d21747e29072 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Mon, 25 Sep 2023 08:55:08 -0700
Subject: [PATCH 1/3] [AArch64][GlobalISel] Add support for post-indexed
loads/stores.
Gives small code size improvements across the board at -Os CTMark.
Much of the work is porting the existing heuristics in the DAGCombiner.
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 +-
.../CodeGen/GlobalISel/GenericMachineInstrs.h | 2 +-
.../include/llvm/Target/GlobalISel/Combine.td | 2 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 209 ++++--
llvm/lib/Target/AArch64/AArch64Combine.td | 1 +
.../Target/AArch64/AArch64ISelLowering.cpp | 19 +
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +
.../GISel/AArch64InstructionSelector.cpp | 85 +++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 48 ++
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 74 +-
.../AArch64/GISel/AArch64RegisterBankInfo.h | 5 +
.../combiner-load-store-indexing.ll | 217 +++---
.../legalize-indexed-load-stores.mir | 89 +++
.../GlobalISel/legalizer-info-validation.mir | 17 +-
.../CodeGen/AArch64/arm64-indexed-memory.ll | 284 ++------
.../AArch64/arm64-indexed-vector-ldst.ll | 652 ++++++------------
16 files changed, 887 insertions(+), 825 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index d64b414f2747621..65299e852574bd1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -58,6 +58,8 @@ struct IndexedLoadStoreMatchInfo {
Register Addr;
Register Base;
Register Offset;
+ bool RematOffset; // True if Offset is a constant that needs to be
+ // rematerialized before the new load/store.
bool IsPre;
};
@@ -814,12 +816,14 @@ class CombinerHelper {
void applyCommuteBinOpOperands(MachineInstr &MI);
private:
+ /// Checks for legality of an indexed variant of \p LdSt.
+ bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
///
/// \returns true if a candidate is found.
bool findPostIndexCandidate(GLoadStore &MI, Register &Addr, Register &Base,
- Register &Offset);
+ Register &Offset, bool &RematOffset);
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a pre-indexing operation.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6c36b1bbcf8649b..b34b90fd24eb602 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -97,7 +97,7 @@ class GIndexedLoad : public GMemOperation {
/// Get the offset register of the pointer value.
Register getOffsetReg() const { return getOperand(3).getReg(); }
- bool isPre() const { return getOperand(5).getImm() == 1; }
+ bool isPre() const { return getOperand(4).getImm() == 1; }
bool isPost() const { return !isPre(); }
static bool classof(const MachineInstr *MI) {
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 7e0691e1ee95048..bb8223ba3486a8d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1248,7 +1248,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
- combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
+ undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9efb70f28fee3ee..a8425db6584f61c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -945,42 +945,171 @@ void CombinerHelper::applySextInRegOfLoad(
MI.eraseFromParent();
}
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+/// Return true if 'MI' is a load or a store that may be fold it's address
+/// operand into the load / store addressing mode.
+static bool canFoldInAddressingMode(GLoadStore *MI,
+ const TargetLowering &TLI,
+ MachineRegisterInfo &MRI) {
+ TargetLowering::AddrMode AM;
+ auto *MF = MI->getMF();
+ auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
+ if (!Addr)
+ return false;
+
+ AM.HasBaseReg = true;
+ auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI);
+ if (CstOff)
+ AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
+ else
+ AM.Scale = 1; // [reg +/- reg]
+
+ return TLI.isLegalAddressingMode(
+ MF->getDataLayout(), AM,
+ getTypeForLLT(MI->getMMO().getMemoryType(),
+ MF->getFunction().getContext()),
+ MI->getMMO().getAddrSpace());
+}
+
+namespace {
+unsigned getIndexedOpc(unsigned LdStOpc) {
+ switch (LdStOpc) {
+ case TargetOpcode::G_LOAD:
+ return TargetOpcode::G_INDEXED_LOAD;
+ case TargetOpcode::G_STORE:
+ return TargetOpcode::G_INDEXED_STORE;
+ case TargetOpcode::G_ZEXTLOAD:
+ return TargetOpcode::G_INDEXED_ZEXTLOAD;
+ case TargetOpcode::G_SEXTLOAD:
+ return TargetOpcode::G_INDEXED_SEXTLOAD;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+} // namespace
+
+bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
+ // Check for legality.
+ LLT PtrTy = MRI.getType(LdSt.getPointerReg());
+ LLT Ty = MRI.getType(LdSt.getReg(0));
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+ {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
+ SmallVector<LLT> OpTys;
+ if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
+ OpTys = {PtrTy, Ty, Ty};
+ else
+ OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
+
+ LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
+ return isLegal(Q);
+}
+
+static cl::opt<unsigned> PostIndexUseThreshold(
+ "post-index-use-threshold", cl::Hidden, cl::init(32),
+ cl::desc("Number of uses of a base pointer to check before it is no longer "
+ "considered for post-indexing."));
+
bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
- Register &Base, Register &Offset) {
+ Register &Base, Register &Offset,
+ bool &RematOffset) {
+ // We're looking for the following pattern, for either load or store:
+ // %baseptr:_(p0) = ...
+ // G_STORE %val(s64), %baseptr(p0)
+ // %offset:_(s64) = G_CONSTANT i64 -256
+ // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
auto &MF = *LdSt.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
- Base = LdSt.getPointerReg();
+ Register Ptr = LdSt.getPointerReg();
+ // If the store is the only use, don't bother.
+ if (MRI.hasOneNonDBGUse(Ptr))
+ return false;
+
+ if (!isIndexedLoadStoreLegal(LdSt))
+ return false;
- if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Base, MRI))
+ if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
return false;
- // FIXME: The following use traversal needs a bail out for patholigical cases.
- for (auto &Use : MRI.use_nodbg_instructions(Base)) {
+ MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
+ auto *PtrDef = MRI.getVRegDef(Ptr);
+
+ unsigned NumUsesChecked = 0;
+ for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
+ if (++NumUsesChecked > PostIndexUseThreshold)
+ return false; // Try to avoid exploding compile time.
+
auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
- if (!PtrAdd)
+ // The use itself might be dead. This can happen during combines if DCE
+ // hasn't had a chance to run yet. Don't allow it to form an indexed op.
+ if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
+ continue;
+
+ // Check the user of this isn't the store, otherwise we'd be generate a
+ // indexed store defining its own use.
+ if (StoredValDef == &Use)
continue;
Offset = PtrAdd->getOffsetReg();
if (!ForceLegalIndexing &&
- !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ false, MRI))
+ !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
+ /*IsPre*/ false, MRI))
continue;
// Make sure the offset calculation is before the potentially indexed op.
MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
- if (!dominates(*OffsetDef, LdSt))
- continue;
+ if (!dominates(*OffsetDef, LdSt)) {
+ // If the offset however is just a G_CONSTANT, we can always just
+ // rematerialize it where we need it.
+ if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
+ continue;
+ RematOffset = true;
+ }
- // FIXME: check whether all uses of Base are load/store with foldable
- // addressing modes. If so, using the normal addr-modes is better than
- // forming an indexed one.
- if (any_of(MRI.use_nodbg_instructions(PtrAdd->getReg(0)),
- [&](MachineInstr &PtrAddUse) {
- return !dominates(LdSt, PtrAddUse);
- }))
- continue;
+ for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
+ if (&BasePtrUse == PtrDef)
+ continue;
+
+ // If the user is a later load/store that can be post-indexed, then don't
+ // combine this one.
+ auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
+ if (BasePtrLdSt && BasePtrLdSt != &LdSt) {
+ if (dominates(LdSt, *BasePtrLdSt)) {
+ if (isIndexedLoadStoreLegal(*BasePtrLdSt))
+ return false;
+ }
+ }
+
+ // Now we're looking for the key G_PTR_ADD instruction, which contains
+ // the offset add that we want to fold.
+ if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
+ Register PtrAddDefReg = BasePtrUseDef->getReg(0);
+ for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
+ // If the use is in a different block, then we may produce worse code
+ // due to the extra register pressure.
+ if (BaseUseUse.getParent() != LdSt.getParent())
+ return false;
+
+ if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse)) {
+ if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
+ return false;
+ }
+ }
+ if (!dominates(LdSt, BasePtrUse))
+ return false; // All use must be dominated by the load/store.
+ }
+ }
Addr = PtrAdd->getReg(0);
+ Base = PtrAdd->getBaseReg();
return true;
}
@@ -1001,6 +1130,9 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
!TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
return false;
+ if (!isIndexedLoadStoreLegal(LdSt))
+ return false;
+
MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return false;
@@ -1027,16 +1159,14 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
auto &LdSt = cast<GLoadStore>(MI);
- // For now, no targets actually support these opcodes so don't waste time
- // running these unless we're forced to for testing.
- if (!ForceLegalIndexing)
+ if (LdSt.isAtomic())
return false;
MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
!findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
- MatchInfo.Offset))
+ MatchInfo.Offset, MatchInfo.RematOffset))
return false;
return true;
@@ -1045,28 +1175,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
void CombinerHelper::applyCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
- MachineIRBuilder MIRBuilder(MI);
+ Builder.setInstrAndDebugLoc(MI);
unsigned Opcode = MI.getOpcode();
bool IsStore = Opcode == TargetOpcode::G_STORE;
- unsigned NewOpcode;
- switch (Opcode) {
- case TargetOpcode::G_LOAD:
- NewOpcode = TargetOpcode::G_INDEXED_LOAD;
- break;
- case TargetOpcode::G_SEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
- break;
- case TargetOpcode::G_ZEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
- break;
- case TargetOpcode::G_STORE:
- NewOpcode = TargetOpcode::G_INDEXED_STORE;
- break;
- default:
- llvm_unreachable("Unknown load/store opcode");
+ unsigned NewOpcode = getIndexedOpc(Opcode);
+
+ // If the offset constant didn't happen to dominate the load/store, we can
+ // just clone it as needed.
+ if (MatchInfo.RematOffset) {
+ auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
+ auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
+ *OldCst->getOperand(1).getCImm());
+ MatchInfo.Offset = NewCst.getReg(0);
}
- auto MIB = MIRBuilder.buildInstr(NewOpcode);
+ auto MIB = Builder.buildInstr(NewOpcode);
if (IsStore) {
MIB.addDef(MatchInfo.Addr);
MIB.addUse(MI.getOperand(0).getReg());
@@ -1245,13 +1368,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
- if (Ty.isVector())
- return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
- return IntegerType::get(C, Ty.getSizeInBits());
-}
-
+
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index f7b55cad4269944..017c4523c23a184 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -246,6 +246,7 @@ def AArch64PostLegalizerLowering
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
[copy_prop, combines_for_extload,
+ combine_indexed_load_store,
sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a16a102e472e709..b20c5823371c226 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -37,6 +37,8 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -23615,6 +23617,23 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
+bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
+ Register Offset, bool IsPre,
+ MachineRegisterInfo &MRI) const {
+ // HACK
+ if (IsPre)
+ return false; // Until we implement.
+
+ auto CstOffset = getIConstantVRegVal(Offset, MRI);
+ if (!CstOffset || CstOffset->isZero())
+ return false;
+
+ // All of the indexed addressing mode instructions take a signed 9 bit
+ // immediate offset. Our CstOffset is a G_PTR_ADD offset so it already
+ // encodes the sign/indexing direction.
+ return isInt<9>(CstOffset->getSExtValue());
+}
+
bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue &Base,
SDValue &Offset,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9dcfba3a229cccd..52e519cd8a0c93c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1201,6 +1201,8 @@ class AArch64TargetLowering : public TargetLowering {
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
+ bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
+ bool IsPre, MachineRegisterInfo &MRI) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1c7a09696e853e2..152a6bfab21faf2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
@@ -224,6 +225,9 @@ class AArch64InstructionSelector : public InstructionSelector {
bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
+
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
@@ -3038,6 +3042,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
+ case TargetOpcode::G_INDEXED_LOAD:
+ return selectIndexedLoad(I, MRI);
+ case TargetOpcode::G_INDEXED_STORE:
+ return selectIndexedStore(cast<GIndexedStore>(I), MRI);
+
case TargetOpcode::G_SMULH:
case TargetOpcode::G_UMULH: {
// Reject the various things we don't support yet.
@@ -5621,6 +5630,82 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
return &*Mov;
}
+bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
+ MachineRegisterInfo &MRI) {
+ // TODO: extending loads.
+ if (isa<GIndexedExtLoad>(MI))
+ return false;
+
+ auto &Ld = cast<GIndexedLoad>(MI);
+ Register Dst = Ld.getDstReg();
+ Register WriteBack = Ld.getWritebackReg();
+ Register Base = Ld.getBaseReg();
+ Register Offset = Ld.getOffsetReg();
+
+ if (Ld.isPre())
+ return false; // TODO: add pre-inc support
+
+ unsigned Opc = 0;
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
+ AArch64::LDRXpost};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
+ AArch64::LDRDpost, AArch64::LDRQpost};
+
+ unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
+ if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(MemSize)];
+ else
+ Opc = GPROpcodes[Log2_32(MemSize)];
+
+ auto Cst = getIConstantVRegVal(Offset, MRI);
+ if (!Cst)
+ return false; // Shouldn't happen, but just in case.
+ auto LdMI =
+ MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
+ LdMI.cloneMemRefs(Ld);
+ constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
+ MachineRegisterInfo &MRI) {
+ Register Dst = I.getWritebackReg();
+ Register Val = I.getValueReg();
+ Register Base = I.getBaseReg();
+ Register Offset = I.getOffsetReg();
+ LLT ValTy = MRI.getType(Val);
+
+ if (I.isPre())
+ return false; // TODO: add pre-inc support
+
+ unsigned Opc = 0;
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
+ AArch64::STRXpost};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
+ AArch64::STRDpost, AArch64::STRQpost};
+
+ assert(ValTy.getSizeInBits() <= 128);
+ if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ else
+ Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+
+ auto Cst = getIConstantVRegVal(Offset, MRI);
+ if (!Cst)
+ return false; // Shouldn't happen, but just in case.
+ auto Str =
+ MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
+ Str.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Str, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
MachineInstr *
AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
MachineIRBuilder &MIRBuilder,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index ddc27bebb767693..bb396cb26afc793 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -14,6 +14,7 @@
#include "AArch64LegalizerInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64Subtarget.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -65,6 +66,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
/* End 128bit types */
/* Begin 64bit types */
v8s8, v4s16, v2s32};
+ std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
+ SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
+ SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
@@ -413,6 +417,50 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.customIf(IsPtrVecPred)
.scalarizeIf(typeIs(0, v2s16), 0);
+ getActionDefinitionsBuilder(G_INDEXED_STORE)
+ // Idx 0 == Ptr, Idx 1 == Val
+ // TODO: we can implement legalizations but as of now these are
+ // generated in a very specific way.
+ .legalForTypesWithMemDesc({
+ {p0, s8, s8, 8},
+ {p0, s16, s16, 8},
+ {p0, s32, s8, 8},
+ {p0, s32, s16, 8},
+ {p0, s32, s32, 8},
+ {p0, s64, s64, 8},
+ {p0, p0, p0, 8},
+ {p0, v8s8, v8s8, 8},
+ {p0, v16s8, v16s8, 8},
+ {p0, v4s16, v4s16, 8},
+ {p0, v8s16, v8s16, 8},
+ {p0, v2s32, v2s32, 8},
+ {p0, v4s32, v4s32, 8},
+ {p0, v2s64, v2s64, 8},
+ {p0, v2p0, v2p0, 8},
+ {p0, s128, s128, 8},
+ })
+ .unsupported();
+
+ auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
+ LLT LdTy = Query.Types[0];
+ LLT PtrTy = Query.Types[1];
+ if (llvm::find(PackedVectorAllTypesVec, LdTy) ==
+ PackedVectorAllTypesVec.end() &&
+ llvm::find(ScalarAndPtrTypesVec, LdTy) == ScalarAndPtrTypesVec.end() &&
+ LdTy != s128)
+ return false;
+ if (PtrTy != p0)
+ return false;
+ return true;
+ };
+ getActionDefinitionsBuilder(G_INDEXED_LOAD)
+ .unsupportedIf(
+ atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
+ .legalIf(IndexedLoadBasicPred)
+ .unsupported();
+ getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
+ .unsupported(); // TODO: implement
+
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
.legalFor({p0, s8, s16, s32, s64})
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 26954c62e03f1fc..5e96bc67d8ded13 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -605,6 +605,35 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
return hasFPConstraints(MI, MRI, TRI, Depth);
}
+bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
+ // GMemOperation because we also want to match indexed loads.
+ auto *Load = dyn_cast<GMemOperation>(&MI);
+
+ const auto &MMO = Load->getMMO();
+ const Value *LdVal = MMO.getValue();
+ if (!LdVal)
+ return false;
+
+ Type *EltTy = nullptr;
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
+ EltTy = GV->getValueType();
+ } else {
+ // FIXME: grubbing around uses is pretty ugly, but with no more
+ // `getPointerElementType` there's not much else we can do.
+ for (const auto *LdUser : LdVal->users()) {
+ if (isa<LoadInst>(LdUser)) {
+ EltTy = LdUser->getType();
+ break;
+ }
+ if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
+ EltTy = LdUser->getOperand(0)->getType();
+ break;
+ }
+ }
+ }
+ return EltTy && EltTy->isFPOrFPVectorTy();
+}
+
const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned Opc = MI.getOpcode();
@@ -814,30 +843,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
// Try to guess the type of the load from the MMO.
- const auto &MMO = **MI.memoperands_begin();
- const Value *LdVal = MMO.getValue();
- if (LdVal) {
- Type *EltTy = nullptr;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
- EltTy = GV->getValueType();
- } else {
- // FIXME: grubbing around uses is pretty ugly, but with no more
- // `getPointerElementType` there's not much else we can do.
- for (const auto *LdUser : LdVal->users()) {
- if (isa<LoadInst>(LdUser)) {
- EltTy = LdUser->getType();
- break;
- }
- if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
- EltTy = LdUser->getOperand(0)->getType();
- break;
- }
- }
- }
- if (EltTy && EltTy->isFPOrFPVectorTy()) {
- OpRegBankIdx[0] = PMI_FirstFPR;
- break;
- }
+ if (isLoadFromFPType(MI)) {
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ break;
}
// Check if that load feeds fp instructions.
@@ -870,6 +878,24 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
break;
+ case TargetOpcode::G_INDEXED_STORE:
+ if (OpRegBankIdx[1] == PMI_FirstGPR) {
+ Register VReg = MI.getOperand(1).getReg();
+ if (!VReg)
+ break;
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ if (onlyDefinesFP(*DefMI, MRI, TRI))
+ OpRegBankIdx[1] = PMI_FirstFPR;
+ break;
+ }
+ break;
+ case TargetOpcode::G_INDEXED_LOAD:
+ case TargetOpcode::G_INDEXED_SEXTLOAD:
+ case TargetOpcode::G_INDEXED_ZEXTLOAD: {
+ if (isLoadFromFPType(MI))
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ break;
+ }
case TargetOpcode::G_SELECT: {
// If the destination is FPR, preserve that.
if (OpRegBankIdx[0] != PMI_FirstGPR)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index f8b16e3177cc4bd..4d40efb5ac92485 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERBANKINFO_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERBANKINFO_H
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
@@ -131,6 +132,10 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+ /// \returns true if the load \p MI is likely loading from a floating-point
+ /// type.
+ bool isLoadFromFPType(const MachineInstr &MI) const;
+
public:
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll
index 05d0ef9551bb230..cae1b3949dca7bf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll
@@ -1,63 +1,53 @@
-; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-prelegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s
-; RUN: llc -debugify-and-strip-all-safe -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-prelegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-postlegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s
+; RUN: llc -debugify-and-strip-all-safe -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-postlegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s
define ptr @test_simple_load_pre(ptr %ptr) {
-; CHECK-LABEL: name: test_simple_load_pre
-; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
-; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
-; CHECK-NOT: G_PTR_ADD
-; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1
-; CHECK: $x0 = COPY [[NEXT]](p0)
+ ; CHECK-LABEL: name: test_simple_load_pre
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
+ ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[C]](s64), 1 :: (volatile load (s8) from %ir.next)
+ ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%next = getelementptr i8, ptr %ptr, i32 42
load volatile i8, ptr %next
ret ptr %next
}
define ptr @test_unused_load_pre(ptr %ptr) {
-; CHECK-LABEL: name: test_unused_load_pre
-; CHECK-NOT: G_INDEXED_LOAD
+ ; CHECK-LABEL: name: test_unused_load_pre
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; CHECK-NEXT: $x0 = COPY [[C1]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%next = getelementptr i8, ptr %ptr, i32 42
load volatile i8, ptr %next
ret ptr null
}
-define void @test_load_multiple_dominated(ptr %ptr, i1 %tst, i1 %tst2) {
-; CHECK-LABEL: name: test_load_multiple_dominated
-; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
-; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
-; CHECK-NOT: G_PTR_ADD
-; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1
-; CHECK: $x0 = COPY [[NEXT]](p0)
- %next = getelementptr i8, ptr %ptr, i32 42
- br i1 %tst, label %do_load, label %end
-
-do_load:
- load volatile i8, ptr %next
- br i1 %tst2, label %bb1, label %bb2
-
-bb1:
- store volatile ptr %next, ptr undef
- ret void
-
-bb2:
- call void @bar(ptr %next)
- ret void
-
-end:
- ret void
-}
-
define ptr @test_simple_store_pre(ptr %ptr) {
-; CHECK-LABEL: name: test_simple_store_pre
-; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
-; CHECK: [[VAL:%.*]]:_(s8) = G_CONSTANT i8 0
-; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
-; CHECK-NOT: G_PTR_ADD
-; CHECK: [[NEXT:%.*]]:_(p0) = G_INDEXED_STORE [[VAL]](s8), [[BASE]], [[OFFSET]](s64), 1
-; CHECK: $x0 = COPY [[NEXT]](p0)
+ ; CHECK-LABEL: name: test_simple_store_pre
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+ ; CHECK-NEXT: [[INDEXED_STORE:%[0-9]+]]:_(p0) = G_INDEXED_STORE [[C1]](s8), [[COPY]], [[C]](s64), 1 :: (volatile store (s8) into %ir.next)
+ ; CHECK-NEXT: $x0 = COPY [[INDEXED_STORE]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%next = getelementptr i8, ptr %ptr, i32 42
store volatile i8 0, ptr %next
ret ptr %next
@@ -66,10 +56,17 @@ define ptr @test_simple_store_pre(ptr %ptr) {
; The potentially pre-indexed address is used as the value stored. Converting
; would produce the value too late but only by one instruction.
define ptr @test_store_pre_val_loop(ptr %ptr) {
-; CHECK-LABEL: name: test_store_pre_val_loop
-; CHECK: G_PTR_ADD
-; CHECK: G_STORE %
+ ; CHECK-LABEL: name: test_store_pre_val_loop
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 336
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[PTR_ADD]](p0) :: (volatile store (p0) into %ir.next)
+ ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%next = getelementptr ptr, ptr %ptr, i32 42
store volatile ptr %next, ptr %next
ret ptr %next
@@ -77,11 +74,21 @@ define ptr @test_store_pre_val_loop(ptr %ptr) {
; Potentially pre-indexed address is used between GEP computing it and load.
define ptr @test_load_pre_before(ptr %ptr) {
-; CHECK-LABEL: name: test_load_pre_before
-; CHECK: G_PTR_ADD
-; CHECK: BL @bar
-; CHECK: G_LOAD %
+ ; CHECK-LABEL: name: test_load_pre_before
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0)
+ ; CHECK-NEXT: BL @bar, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next)
+ ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%next = getelementptr i8, ptr %ptr, i32 42
call void @bar(ptr %next)
load volatile i8, ptr %next
@@ -91,56 +98,51 @@ define ptr @test_load_pre_before(ptr %ptr) {
; Materializing the base into a writable register (from sp/fp) would be just as
; bad as the original GEP.
define ptr @test_alloca_load_pre() {
-; CHECK-LABEL: name: test_alloca_load_pre
-; CHECK: G_PTR_ADD
-; CHECK: G_LOAD %
+ ; CHECK-LABEL: name: test_alloca_load_pre
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next)
+ ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%ptr = alloca i8, i32 128
%next = getelementptr i8, ptr %ptr, i32 42
load volatile i8, ptr %next
ret ptr %next
}
-; Load does not dominate use of its address. No indexing.
-define ptr @test_pre_nodom(ptr %in, i1 %tst) {
-; CHECK-LABEL: name: test_pre_nodom
-; CHECK: G_PTR_ADD
-; CHECK: G_LOAD %
-
- %next = getelementptr i8, ptr %in, i32 16
- br i1 %tst, label %do_indexed, label %use_addr
-
-do_indexed:
- %val = load i8, ptr %next
- store i8 %val, ptr @var
- store ptr %next, ptr @varp8
- br label %use_addr
-
-use_addr:
- ret ptr %next
-}
-
define ptr @test_simple_load_post(ptr %ptr) {
-; CHECK-LABEL: name: test_simple_load_post
-; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
-; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
-; CHECK-NOT: G_PTR_ADD
-; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0
-; CHECK: $x0 = COPY [[NEXT]](p0)
+ ; CHECK-LABEL: name: test_simple_load_post
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
+ ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[C]](s64), 0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%next = getelementptr i8, ptr %ptr, i32 42
load volatile i8, ptr %ptr
ret ptr %next
}
define ptr @test_simple_load_post_gep_after(ptr %ptr) {
-; CHECK-LABEL: name: test_simple_load_post_gep_after
-; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
-; CHECK: BL @get_offset
-; CHECK: [[OFFSET:%.*]]:_(s64) = COPY $x0
-; CHECK: {{%.*}}:_(s8), [[ADDR:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0
-; CHECK: $x0 = COPY [[ADDR]](p0)
+ ; CHECK-LABEL: name: test_simple_load_post_gep_after
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[COPY1]](s64), 0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%offset = call i64 @get_offset()
load volatile i8, ptr %ptr
%next = getelementptr i8, ptr %ptr, i64 %offset
@@ -148,9 +150,24 @@ define ptr @test_simple_load_post_gep_after(ptr %ptr) {
}
define ptr @test_load_post_keep_looking(ptr %ptr) {
-; CHECK: name: test_load_post_keep_looking
-; CHECK: G_INDEXED_LOAD
+ ; CHECK-LABEL: name: test_load_post_keep_looking
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[COPY1]](s64), 0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PTRTOINT]](s64)
+ ; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var
+ ; CHECK-NEXT: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[ADD_LOW]](p0) :: (store (s8) into @var)
+ ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%offset = call i64 @get_offset()
load volatile i8, ptr %ptr
%intval = ptrtoint ptr %ptr to i8
@@ -162,10 +179,15 @@ define ptr @test_load_post_keep_looking(ptr %ptr) {
; Base is frame index. Using indexing would need copy anyway.
define ptr @test_load_post_alloca() {
-; CHECK-LABEL: name: test_load_post_alloca
-; CHECK: G_PTR_ADD
-; CHECK: G_LOAD %
+ ; CHECK-LABEL: name: test_load_post_alloca
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%ptr = alloca i8, i32 128
%next = getelementptr i8, ptr %ptr, i32 42
load volatile i8, ptr %ptr
@@ -174,11 +196,20 @@ define ptr @test_load_post_alloca() {
; Offset computation does not dominate the load we might be indexing.
define ptr @test_load_post_gep_offset_after(ptr %ptr) {
-; CHECK-LABEL: name: test_load_post_gep_offset_after
-; CHECK: G_LOAD %
-; CHECK: BL @get_offset
-; CHECK: G_PTR_ADD
+ ; CHECK-LABEL: name: test_load_post_gep_offset_after
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
load volatile i8, ptr %ptr
%offset = call i64 @get_offset()
%next = getelementptr i8, ptr %ptr, i64 %offset
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir
new file mode 100644
index 000000000000000..e82a0c219068fde
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir
@@ -0,0 +1,89 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s
+
+---
+name: post_store_s64
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: post_store_s64
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p0) = COPY $x0
+ ; CHECK-NEXT: %val:_(s64) = COPY $x1
+ ; CHECK-NEXT: %offset:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: %writeback:_(p0) = G_INDEXED_STORE %val(s64), %ptr, %offset(s64), 0 :: (store (s64))
+ ; CHECK-NEXT: $x0 = COPY %writeback(p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %ptr:_(p0) = COPY $x0
+ %val:_(s64) = COPY $x1
+ %offset:_(s64) = G_CONSTANT i64 8
+ %writeback:_(p0) = G_INDEXED_STORE %val, %ptr, %offset, 0 :: (store (s64), align 8)
+ $x0 = COPY %writeback
+ RET_ReallyLR implicit $x0
+...
+---
+name: post_store_v2s64
+body: |
+ bb.0:
+ liveins: $x0, $q0
+
+ ; CHECK-LABEL: name: post_store_v2s64
+ ; CHECK: liveins: $x0, $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p0) = COPY $x0
+ ; CHECK-NEXT: %val:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: %offset:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: %writeback:_(p0) = G_INDEXED_STORE %val(<2 x s64>), %ptr, %offset(s64), 0 :: (store (<2 x s64>), align 8)
+ ; CHECK-NEXT: $x0 = COPY %writeback(p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %ptr:_(p0) = COPY $x0
+ %val:_(<2 x s64>) = COPY $q0
+ %offset:_(s64) = G_CONSTANT i64 8
+ %writeback:_(p0) = G_INDEXED_STORE %val, %ptr, %offset, 0 :: (store (<2 x s64>), align 8)
+ $x0 = COPY %writeback
+ RET_ReallyLR implicit $x0
+...
+---
+name: post_load_s64
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: post_load_s64
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p0) = COPY $x0
+ ; CHECK-NEXT: %offset:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: %dst:_(s64), %writeback:_(p0) = G_INDEXED_LOAD %ptr, %offset(s64), 0 :: (load (s64))
+ ; CHECK-NEXT: $x0 = COPY %writeback(p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %ptr:_(p0) = COPY $x0
+ %offset:_(s64) = G_CONSTANT i64 8
+ %dst:_(s64), %writeback:_(p0) = G_INDEXED_LOAD %ptr, %offset, 0 :: (load (s64), align 8)
+ $x0 = COPY %writeback
+ RET_ReallyLR implicit $x0
+...
+---
+name: post_load_v2s64
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: post_load_v2s64
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p0) = COPY $x0
+ ; CHECK-NEXT: %offset:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: %dst:_(<2 x s64>), %writeback:_(p0) = G_INDEXED_LOAD %ptr, %offset(s64), 0 :: (load (s64))
+ ; CHECK-NEXT: $x0 = COPY %writeback(p0)
+ ; CHECK-NEXT: $q0 = COPY %dst(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $q0
+ %ptr:_(p0) = COPY $x0
+ %offset:_(s64) = G_CONSTANT i64 8
+ %dst:_(<2 x s64>), %writeback:_(p0) = G_INDEXED_LOAD %ptr, %offset, 0 :: (load (s64), align 8)
+ $x0 = COPY %writeback
+ $q0 = COPY %dst
+ RET_ReallyLR implicit $x0, implicit $q0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 549f36b2afd066f..d5f7507ec5dd767 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -174,20 +174,21 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INDEXED_LOAD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INDEXED_SEXTLOAD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INDEXED_ZEXTLOAD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_STORE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INDEXED_STORE (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index e40063def477ccb..42a40a62cc6d374 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -4,22 +4,10 @@
; RUN: llc < %s -mtriple=arm64_32-apple-ios -aarch64-redzone | FileCheck %s --check-prefixes=CHECK,CHECK32
define ptr @store64(ptr %ptr, i64 %index, i64 %spacing) {
-; CHECK64-LABEL: store64:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str x2, [x0], #8
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: store64:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #8
-; GISEL-NEXT: str x2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: store64:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str x2, [x0], #8
-; CHECK32-NEXT: ret
+; CHECK-LABEL: store64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str x2, [x0], #8
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 1
store i64 %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
@@ -38,44 +26,20 @@ define ptr @store64idxpos256(ptr %ptr, i64 %index, i64 %spacing) {
}
define ptr @store64idxneg256(ptr %ptr, i64 %index, i64 %spacing) {
-; CHECK64-LABEL: store64idxneg256:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str x2, [x0], #-256
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: store64idxneg256:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: sub x0, x0, #256
-; GISEL-NEXT: str x2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: store64idxneg256:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str x2, [x0], #-256
-; CHECK32-NEXT: ret
+; CHECK-LABEL: store64idxneg256:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str x2, [x0], #-256
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 -32
store i64 %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
}
define ptr @store32(ptr %ptr, i32 %index, i32 %spacing) {
-; CHECK64-LABEL: store32:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str w2, [x0], #4
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: store32:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #4
-; GISEL-NEXT: str w2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: store32:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str w2, [x0], #4
-; CHECK32-NEXT: ret
+; CHECK-LABEL: store32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str w2, [x0], #4
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 1
store i32 %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
@@ -94,44 +58,20 @@ define ptr @store32idxpos256(ptr %ptr, i32 %index, i32 %spacing) {
}
define ptr @store32idxneg256(ptr %ptr, i32 %index, i32 %spacing) {
-; CHECK64-LABEL: store32idxneg256:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str w2, [x0], #-256
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: store32idxneg256:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: sub x0, x0, #256
-; GISEL-NEXT: str w2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: store32idxneg256:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str w2, [x0], #-256
-; CHECK32-NEXT: ret
+; CHECK-LABEL: store32idxneg256:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str w2, [x0], #-256
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 -64
store i32 %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
}
define ptr @store16(ptr %ptr, i16 %index, i16 %spacing) {
-; CHECK64-LABEL: store16:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strh w2, [x0], #2
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: store16:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #2
-; GISEL-NEXT: strh w2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: store16:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strh w2, [x0], #2
-; CHECK32-NEXT: ret
+; CHECK-LABEL: store16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strh w2, [x0], #2
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 1
store i16 %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
@@ -150,44 +90,20 @@ define ptr @store16idxpos256(ptr %ptr, i16 %index, i16 %spacing) {
}
define ptr @store16idxneg256(ptr %ptr, i16 %index, i16 %spacing) {
-; CHECK64-LABEL: store16idxneg256:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strh w2, [x0], #-256
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: store16idxneg256:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: sub x0, x0, #256
-; GISEL-NEXT: strh w2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: store16idxneg256:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strh w2, [x0], #-256
-; CHECK32-NEXT: ret
+; CHECK-LABEL: store16idxneg256:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strh w2, [x0], #-256
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 -128
store i16 %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
}
define ptr @store8(ptr %ptr, i8 %index, i8 %spacing) {
-; CHECK64-LABEL: store8:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strb w2, [x0], #1
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: store8:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #1
-; GISEL-NEXT: strb w2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: store8:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strb w2, [x0], #1
-; CHECK32-NEXT: ret
+; CHECK-LABEL: store8:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strb w2, [x0], #1
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 1
store i8 %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
@@ -206,44 +122,20 @@ define ptr @store8idxpos256(ptr %ptr, i8 %index, i8 %spacing) {
}
define ptr @store8idxneg256(ptr %ptr, i8 %index, i8 %spacing) {
-; CHECK64-LABEL: store8idxneg256:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strb w2, [x0], #-256
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: store8idxneg256:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: sub x0, x0, #256
-; GISEL-NEXT: strb w2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: store8idxneg256:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strb w2, [x0], #-256
-; CHECK32-NEXT: ret
+; CHECK-LABEL: store8idxneg256:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strb w2, [x0], #-256
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 -256
store i8 %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
}
define ptr @truncst64to32(ptr %ptr, i32 %index, i64 %spacing) {
-; CHECK64-LABEL: truncst64to32:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str w2, [x0], #4
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: truncst64to32:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #4
-; GISEL-NEXT: str w2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: truncst64to32:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str w2, [x0], #4
-; CHECK32-NEXT: ret
+; CHECK-LABEL: truncst64to32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str w2, [x0], #4
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 1
%trunc = trunc i64 %spacing to i32
store i32 %trunc, ptr %ptr, align 4
@@ -251,22 +143,10 @@ define ptr @truncst64to32(ptr %ptr, i32 %index, i64 %spacing) {
}
define ptr @truncst64to16(ptr %ptr, i16 %index, i64 %spacing) {
-; CHECK64-LABEL: truncst64to16:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strh w2, [x0], #2
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: truncst64to16:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #2
-; GISEL-NEXT: strh w2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: truncst64to16:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strh w2, [x0], #2
-; CHECK32-NEXT: ret
+; CHECK-LABEL: truncst64to16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strh w2, [x0], #2
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 1
%trunc = trunc i64 %spacing to i16
store i16 %trunc, ptr %ptr, align 4
@@ -274,22 +154,10 @@ define ptr @truncst64to16(ptr %ptr, i16 %index, i64 %spacing) {
}
define ptr @truncst64to8(ptr %ptr, i8 %index, i64 %spacing) {
-; CHECK64-LABEL: truncst64to8:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strb w2, [x0], #1
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: truncst64to8:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #1
-; GISEL-NEXT: strb w2, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: truncst64to8:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strb w2, [x0], #1
-; CHECK32-NEXT: ret
+; CHECK-LABEL: truncst64to8:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strb w2, [x0], #1
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 1
%trunc = trunc i64 %spacing to i8
store i8 %trunc, ptr %ptr, align 4
@@ -298,66 +166,30 @@ define ptr @truncst64to8(ptr %ptr, i8 %index, i64 %spacing) {
define ptr @storef16(ptr %ptr, half %index, half %spacing) nounwind {
-; CHECK64-LABEL: storef16:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str h1, [x0], #2
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: storef16:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #2
-; GISEL-NEXT: str h1, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: storef16:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str h1, [x0], #2
-; CHECK32-NEXT: ret
+; CHECK-LABEL: storef16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str h1, [x0], #2
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds half, ptr %ptr, i64 1
store half %spacing, ptr %ptr, align 2
ret ptr %incdec.ptr
}
define ptr @storef32(ptr %ptr, float %index, float %spacing) {
-; CHECK64-LABEL: storef32:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str s1, [x0], #4
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: storef32:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #4
-; GISEL-NEXT: str s1, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: storef32:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str s1, [x0], #4
-; CHECK32-NEXT: ret
+; CHECK-LABEL: storef32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str s1, [x0], #4
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds float, ptr %ptr, i64 1
store float %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
}
define ptr @storef64(ptr %ptr, double %index, double %spacing) {
-; CHECK64-LABEL: storef64:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str d1, [x0], #8
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: storef64:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #8
-; GISEL-NEXT: str d1, [x8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: storef64:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str d1, [x0], #8
-; CHECK32-NEXT: ret
+; CHECK-LABEL: storef64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str d1, [x0], #8
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds double, ptr %ptr, i64 1
store double %spacing, ptr %ptr, align 4
ret ptr %incdec.ptr
@@ -1112,8 +944,8 @@ define ptr @postidx_clobber(ptr %addr) nounwind noinline ssp {
; GISEL-LABEL: postidx_clobber:
; GISEL: ; %bb.0:
; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #8
-; GISEL-NEXT: str x8, [x8]
+; GISEL-NEXT: str x0, [x8], #8
+; GISEL-NEXT: mov x0, x8
; GISEL-NEXT: ret
;
; CHECK32-LABEL: postidx_clobber:
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 2cab4932def0724..46563f6a8e089c4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -647,20 +647,12 @@ define <8 x i8> @test_v8i8_pre_load(ptr %addr) {
}
define <8 x i8> @test_v8i8_post_load(ptr %addr) {
-; SDAG-LABEL: test_v8i8_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr d0, [x0], #40
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v8i8_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr d0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v8i8_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr d0, [x0], #40
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
%val = load <8 x i8>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -689,20 +681,12 @@ define void @test_v8i8_pre_store(<8 x i8> %in, ptr %addr) {
}
define void @test_v8i8_post_store(<8 x i8> %in, ptr %addr) {
-; SDAG-LABEL: test_v8i8_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str d0, [x0], #40
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v8i8_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str d0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v8i8_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str d0, [x0], #40
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
store <8 x i8> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -731,20 +715,12 @@ define <4 x i16> @test_v4i16_pre_load(ptr %addr) {
}
define <4 x i16> @test_v4i16_post_load(ptr %addr) {
-; SDAG-LABEL: test_v4i16_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr d0, [x0], #40
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v4i16_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr d0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v4i16_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr d0, [x0], #40
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
%val = load <4 x i16>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -773,20 +749,12 @@ define void @test_v4i16_pre_store(<4 x i16> %in, ptr %addr) {
}
define void @test_v4i16_post_store(<4 x i16> %in, ptr %addr) {
-; SDAG-LABEL: test_v4i16_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str d0, [x0], #40
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v4i16_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str d0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v4i16_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str d0, [x0], #40
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
store <4 x i16> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -815,20 +783,12 @@ define <2 x i32> @test_v2i32_pre_load(ptr %addr) {
}
define <2 x i32> @test_v2i32_post_load(ptr %addr) {
-; SDAG-LABEL: test_v2i32_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr d0, [x0], #40
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v2i32_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr d0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v2i32_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr d0, [x0], #40
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
%val = load <2 x i32>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -857,20 +817,12 @@ define void @test_v2i32_pre_store(<2 x i32> %in, ptr %addr) {
}
define void @test_v2i32_post_store(<2 x i32> %in, ptr %addr) {
-; SDAG-LABEL: test_v2i32_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str d0, [x0], #40
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v2i32_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str d0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v2i32_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str d0, [x0], #40
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
store <2 x i32> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -899,20 +851,12 @@ define <2 x float> @test_v2f32_pre_load(ptr %addr) {
}
define <2 x float> @test_v2f32_post_load(ptr %addr) {
-; SDAG-LABEL: test_v2f32_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr d0, [x0], #40
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v2f32_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr d0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v2f32_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr d0, [x0], #40
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <2 x float>, ptr %addr, i32 5
%val = load <2 x float>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -941,20 +885,12 @@ define void @test_v2f32_pre_store(<2 x float> %in, ptr %addr) {
}
define void @test_v2f32_post_store(<2 x float> %in, ptr %addr) {
-; SDAG-LABEL: test_v2f32_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str d0, [x0], #40
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v2f32_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str d0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v2f32_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str d0, [x0], #40
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <2 x float>, ptr %addr, i32 5
store <2 x float> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -992,10 +928,10 @@ define <1 x i64> @test_v1i64_post_load(ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v1i64_post_load:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr d0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
+; CHECK-GISEL-NEXT: ldr x8, [x0], #40
+; CHECK-GISEL-NEXT: adrp x9, _ptr at PAGE
+; CHECK-GISEL-NEXT: str x0, [x9, _ptr at PAGEOFF]
+; CHECK-GISEL-NEXT: fmov d0, x8
; CHECK-GISEL-NEXT: ret
%newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
%val = load <1 x i64>, ptr %addr, align 8
@@ -1025,20 +961,12 @@ define void @test_v1i64_pre_store(<1 x i64> %in, ptr %addr) {
}
define void @test_v1i64_post_store(<1 x i64> %in, ptr %addr) {
-; SDAG-LABEL: test_v1i64_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str d0, [x0], #40
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v1i64_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #40
-; CHECK-GISEL-NEXT: str d0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v1i64_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str d0, [x0], #40
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
store <1 x i64> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1067,20 +995,12 @@ define <16 x i8> @test_v16i8_pre_load(ptr %addr) {
}
define <16 x i8> @test_v16i8_post_load(ptr %addr) {
-; SDAG-LABEL: test_v16i8_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr q0, [x0], #80
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v16i8_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr q0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v16i8_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr q0, [x0], #80
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
%val = load <16 x i8>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1109,20 +1029,12 @@ define void @test_v16i8_pre_store(<16 x i8> %in, ptr %addr) {
}
define void @test_v16i8_post_store(<16 x i8> %in, ptr %addr) {
-; SDAG-LABEL: test_v16i8_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str q0, [x0], #80
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v16i8_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str q0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v16i8_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str q0, [x0], #80
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
store <16 x i8> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1151,20 +1063,12 @@ define <8 x i16> @test_v8i16_pre_load(ptr %addr) {
}
define <8 x i16> @test_v8i16_post_load(ptr %addr) {
-; SDAG-LABEL: test_v8i16_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr q0, [x0], #80
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v8i16_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr q0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v8i16_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr q0, [x0], #80
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
%val = load <8 x i16>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1193,20 +1097,12 @@ define void @test_v8i16_pre_store(<8 x i16> %in, ptr %addr) {
}
define void @test_v8i16_post_store(<8 x i16> %in, ptr %addr) {
-; SDAG-LABEL: test_v8i16_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str q0, [x0], #80
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v8i16_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str q0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v8i16_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str q0, [x0], #80
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
store <8 x i16> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1235,20 +1131,12 @@ define <4 x i32> @test_v4i32_pre_load(ptr %addr) {
}
define <4 x i32> @test_v4i32_post_load(ptr %addr) {
-; SDAG-LABEL: test_v4i32_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr q0, [x0], #80
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v4i32_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr q0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v4i32_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr q0, [x0], #80
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
%val = load <4 x i32>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1277,20 +1165,12 @@ define void @test_v4i32_pre_store(<4 x i32> %in, ptr %addr) {
}
define void @test_v4i32_post_store(<4 x i32> %in, ptr %addr) {
-; SDAG-LABEL: test_v4i32_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str q0, [x0], #80
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v4i32_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str q0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v4i32_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str q0, [x0], #80
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
store <4 x i32> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1320,20 +1200,12 @@ define <4 x float> @test_v4f32_pre_load(ptr %addr) {
}
define <4 x float> @test_v4f32_post_load(ptr %addr) {
-; SDAG-LABEL: test_v4f32_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr q0, [x0], #80
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v4f32_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr q0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v4f32_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr q0, [x0], #80
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <4 x float>, ptr %addr, i32 5
%val = load <4 x float>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1362,20 +1234,12 @@ define void @test_v4f32_pre_store(<4 x float> %in, ptr %addr) {
}
define void @test_v4f32_post_store(<4 x float> %in, ptr %addr) {
-; SDAG-LABEL: test_v4f32_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str q0, [x0], #80
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v4f32_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str q0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v4f32_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str q0, [x0], #80
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <4 x float>, ptr %addr, i32 5
store <4 x float> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1405,20 +1269,12 @@ define <2 x i64> @test_v2i64_pre_load(ptr %addr) {
}
define <2 x i64> @test_v2i64_post_load(ptr %addr) {
-; SDAG-LABEL: test_v2i64_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr q0, [x0], #80
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v2i64_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr q0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v2i64_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr q0, [x0], #80
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
%val = load <2 x i64>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1447,20 +1303,12 @@ define void @test_v2i64_pre_store(<2 x i64> %in, ptr %addr) {
}
define void @test_v2i64_post_store(<2 x i64> %in, ptr %addr) {
-; SDAG-LABEL: test_v2i64_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str q0, [x0], #80
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v2i64_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str q0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v2i64_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str q0, [x0], #80
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
store <2 x i64> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1490,20 +1338,12 @@ define <2 x double> @test_v2f64_pre_load(ptr %addr) {
}
define <2 x double> @test_v2f64_post_load(ptr %addr) {
-; SDAG-LABEL: test_v2f64_post_load:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: ldr q0, [x0], #80
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v2f64_post_load:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr q0, [x0]
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v2f64_post_load:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ldr q0, [x0], #80
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <2 x double>, ptr %addr, i32 5
%val = load <2 x double>, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1532,20 +1372,12 @@ define void @test_v2f64_pre_store(<2 x double> %in, ptr %addr) {
}
define void @test_v2f64_post_store(<2 x double> %in, ptr %addr) {
-; SDAG-LABEL: test_v2f64_post_store:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, _ptr at PAGE
-; SDAG-NEXT: str q0, [x0], #80
-; SDAG-NEXT: str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT: ret
-;
-; CHECK-GISEL-LABEL: test_v2f64_post_store:
-; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT: add x9, x0, #80
-; CHECK-GISEL-NEXT: str q0, [x0]
-; CHECK-GISEL-NEXT: str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: test_v2f64_post_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: adrp x8, _ptr at PAGE
+; CHECK-NEXT: str q0, [x0], #80
+; CHECK-NEXT: str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT: ret
%newaddr = getelementptr <2 x double>, ptr %addr, i32 5
store <2 x double> %in, ptr %addr, align 8
store ptr %newaddr, ptr @ptr
@@ -1560,9 +1392,8 @@ define ptr @test_v16i8_post_imm_st1_lane(<16 x i8> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v16i8_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #1
-; CHECK-GISEL-NEXT: st1.b { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov b0, v0[3]
+; CHECK-GISEL-NEXT: str b0, [x0], #1
; CHECK-GISEL-NEXT: ret
%elt = extractelement <16 x i8> %in, i32 3
store i8 %elt, ptr %addr
@@ -1580,9 +1411,8 @@ define ptr @test_v16i8_post_reg_st1_lane(<16 x i8> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v16i8_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #2
-; CHECK-GISEL-NEXT: st1.b { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov b0, v0[3]
+; CHECK-GISEL-NEXT: str b0, [x0], #2
; CHECK-GISEL-NEXT: ret
%elt = extractelement <16 x i8> %in, i32 3
store i8 %elt, ptr %addr
@@ -1600,9 +1430,8 @@ define ptr @test_v8i16_post_imm_st1_lane(<8 x i16> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v8i16_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #2
-; CHECK-GISEL-NEXT: st1.h { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov h0, v0[3]
+; CHECK-GISEL-NEXT: str h0, [x0], #2
; CHECK-GISEL-NEXT: ret
%elt = extractelement <8 x i16> %in, i32 3
store i16 %elt, ptr %addr
@@ -1620,9 +1449,8 @@ define ptr @test_v8i16_post_reg_st1_lane(<8 x i16> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v8i16_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #4
-; CHECK-GISEL-NEXT: st1.h { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov h0, v0[3]
+; CHECK-GISEL-NEXT: str h0, [x0], #4
; CHECK-GISEL-NEXT: ret
%elt = extractelement <8 x i16> %in, i32 3
store i16 %elt, ptr %addr
@@ -1639,9 +1467,8 @@ define ptr @test_v4i32_post_imm_st1_lane(<4 x i32> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v4i32_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #4
-; CHECK-GISEL-NEXT: st1.s { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov s0, v0[3]
+; CHECK-GISEL-NEXT: str s0, [x0], #4
; CHECK-GISEL-NEXT: ret
%elt = extractelement <4 x i32> %in, i32 3
store i32 %elt, ptr %addr
@@ -1659,9 +1486,8 @@ define ptr @test_v4i32_post_reg_st1_lane(<4 x i32> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v4i32_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #8
-; CHECK-GISEL-NEXT: st1.s { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov s0, v0[3]
+; CHECK-GISEL-NEXT: str s0, [x0], #8
; CHECK-GISEL-NEXT: ret
%elt = extractelement <4 x i32> %in, i32 3
store i32 %elt, ptr %addr
@@ -1678,9 +1504,8 @@ define ptr @test_v4f32_post_imm_st1_lane(<4 x float> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v4f32_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #4
-; CHECK-GISEL-NEXT: st1.s { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov s0, v0[3]
+; CHECK-GISEL-NEXT: str s0, [x0], #4
; CHECK-GISEL-NEXT: ret
%elt = extractelement <4 x float> %in, i32 3
store float %elt, ptr %addr
@@ -1698,9 +1523,8 @@ define ptr @test_v4f32_post_reg_st1_lane(<4 x float> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v4f32_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #8
-; CHECK-GISEL-NEXT: st1.s { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov s0, v0[3]
+; CHECK-GISEL-NEXT: str s0, [x0], #8
; CHECK-GISEL-NEXT: ret
%elt = extractelement <4 x float> %in, i32 3
store float %elt, ptr %addr
@@ -1717,9 +1541,8 @@ define ptr @test_v2i64_post_imm_st1_lane(<2 x i64> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v2i64_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #8
-; CHECK-GISEL-NEXT: st1.d { v0 }[1], [x8]
+; CHECK-GISEL-NEXT: mov d0, v0[1]
+; CHECK-GISEL-NEXT: str d0, [x0], #8
; CHECK-GISEL-NEXT: ret
%elt = extractelement <2 x i64> %in, i64 1
store i64 %elt, ptr %addr
@@ -1737,9 +1560,8 @@ define ptr @test_v2i64_post_reg_st1_lane(<2 x i64> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v2i64_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #16
-; CHECK-GISEL-NEXT: st1.d { v0 }[1], [x8]
+; CHECK-GISEL-NEXT: mov d0, v0[1]
+; CHECK-GISEL-NEXT: str d0, [x0], #16
; CHECK-GISEL-NEXT: ret
%elt = extractelement <2 x i64> %in, i64 1
store i64 %elt, ptr %addr
@@ -1756,9 +1578,8 @@ define ptr @test_v2f64_post_imm_st1_lane(<2 x double> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v2f64_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #8
-; CHECK-GISEL-NEXT: st1.d { v0 }[1], [x8]
+; CHECK-GISEL-NEXT: mov d0, v0[1]
+; CHECK-GISEL-NEXT: str d0, [x0], #8
; CHECK-GISEL-NEXT: ret
%elt = extractelement <2 x double> %in, i32 1
store double %elt, ptr %addr
@@ -1776,9 +1597,8 @@ define ptr @test_v2f64_post_reg_st1_lane(<2 x double> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v2f64_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #16
-; CHECK-GISEL-NEXT: st1.d { v0 }[1], [x8]
+; CHECK-GISEL-NEXT: mov d0, v0[1]
+; CHECK-GISEL-NEXT: str d0, [x0], #16
; CHECK-GISEL-NEXT: ret
%elt = extractelement <2 x double> %in, i32 1
store double %elt, ptr %addr
@@ -1796,10 +1616,9 @@ define ptr @test_v8i8_post_imm_st1_lane(<8 x i8> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v8i8_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #1
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: st1.b { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov b0, v0[3]
+; CHECK-GISEL-NEXT: str b0, [x0], #1
; CHECK-GISEL-NEXT: ret
%elt = extractelement <8 x i8> %in, i32 3
store i8 %elt, ptr %addr
@@ -1818,10 +1637,9 @@ define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v8i8_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #2
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: st1.b { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov b0, v0[3]
+; CHECK-GISEL-NEXT: str b0, [x0], #2
; CHECK-GISEL-NEXT: ret
%elt = extractelement <8 x i8> %in, i32 3
store i8 %elt, ptr %addr
@@ -1839,10 +1657,9 @@ define ptr @test_v4i16_post_imm_st1_lane(<4 x i16> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v4i16_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #2
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: st1.h { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov h0, v0[3]
+; CHECK-GISEL-NEXT: str h0, [x0], #2
; CHECK-GISEL-NEXT: ret
%elt = extractelement <4 x i16> %in, i32 3
store i16 %elt, ptr %addr
@@ -1861,10 +1678,9 @@ define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v4i16_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #4
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: st1.h { v0 }[3], [x8]
+; CHECK-GISEL-NEXT: mov h0, v0[3]
+; CHECK-GISEL-NEXT: str h0, [x0], #4
; CHECK-GISEL-NEXT: ret
%elt = extractelement <4 x i16> %in, i32 3
store i16 %elt, ptr %addr
@@ -1882,10 +1698,9 @@ define ptr @test_v2i32_post_imm_st1_lane(<2 x i32> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v2i32_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #4
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: st1.s { v0 }[1], [x8]
+; CHECK-GISEL-NEXT: mov s0, v0[1]
+; CHECK-GISEL-NEXT: str s0, [x0], #4
; CHECK-GISEL-NEXT: ret
%elt = extractelement <2 x i32> %in, i32 1
store i32 %elt, ptr %addr
@@ -1904,10 +1719,9 @@ define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v2i32_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #8
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: st1.s { v0 }[1], [x8]
+; CHECK-GISEL-NEXT: mov s0, v0[1]
+; CHECK-GISEL-NEXT: str s0, [x0], #8
; CHECK-GISEL-NEXT: ret
%elt = extractelement <2 x i32> %in, i32 1
store i32 %elt, ptr %addr
@@ -1925,10 +1739,9 @@ define ptr @test_v2f32_post_imm_st1_lane(<2 x float> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v2f32_post_imm_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #4
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: st1.s { v0 }[1], [x8]
+; CHECK-GISEL-NEXT: mov s0, v0[1]
+; CHECK-GISEL-NEXT: str s0, [x0], #4
; CHECK-GISEL-NEXT: ret
%elt = extractelement <2 x float> %in, i32 1
store float %elt, ptr %addr
@@ -1947,10 +1760,9 @@ define ptr @test_v2f32_post_reg_st1_lane(<2 x float> %in, ptr %addr) {
;
; CHECK-GISEL-LABEL: test_v2f32_post_reg_st1_lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: mov x8, x0
-; CHECK-GISEL-NEXT: add x0, x0, #8
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: st1.s { v0 }[1], [x8]
+; CHECK-GISEL-NEXT: mov s0, v0[1]
+; CHECK-GISEL-NEXT: str s0, [x0], #8
; CHECK-GISEL-NEXT: ret
%elt = extractelement <2 x float> %in, i32 1
store float %elt, ptr %addr
@@ -13791,9 +13603,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #1
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldrb w8, [x0], #1
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.16b v0, w8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13861,9 +13673,9 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #1
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldrb w8, [x0], #1
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.8b v0, w8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13915,9 +13727,9 @@ define <8 x i16> @test_v8i16_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.8h { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #2
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldrh w8, [x0], #2
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.8h v0, w8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i16, ptr %bar
%tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
@@ -13970,9 +13782,9 @@ define <4 x i16> @test_v4i16_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.4h { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #2
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldrh w8, [x0], #2
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.4h v0, w8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i16, ptr %bar
%tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
@@ -14017,9 +13829,9 @@ define <4 x i32> @test_v4i32_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.4s { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #4
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldr w8, [x0], #4
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.4s v0, w8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i32, ptr %bar
%tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
@@ -14064,9 +13876,9 @@ define <2 x i32> @test_v2i32_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.2s { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #4
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldr w8, [x0], #4
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.2s v0, w8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i32, ptr %bar
%tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
@@ -14107,9 +13919,9 @@ define <2 x i64> @test_v2i64_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.2d { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #8
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldr x8, [x0], #8
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.2d v0, x8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i64, ptr %bar
%tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
@@ -14150,9 +13962,9 @@ define <4 x float> @test_v4f32_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.4s { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #4
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldr s0, [x0], #4
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.4s v0, v0[0]
; CHECK-GISEL-NEXT: ret
%tmp1 = load float, ptr %bar
%tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
@@ -14197,9 +14009,9 @@ define <2 x float> @test_v2f32_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.2s { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #4
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldr s0, [x0], #4
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.2s v0, v0[0]
; CHECK-GISEL-NEXT: ret
%tmp1 = load float, ptr %bar
%tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
@@ -14240,9 +14052,9 @@ define <2 x double> @test_v2f64_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.2d { v0 }, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #8
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldr d0, [x0], #8
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: dup.2d v0, v0[0]
; CHECK-GISEL-NEXT: ret
%tmp1 = load double, ptr %bar
%tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
@@ -14283,10 +14095,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <16 x i8> %A)
;
; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr b1, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #1
-; CHECK-GISEL-NEXT: str x8, [x1]
-; CHECK-GISEL-NEXT: mov.b v0[1], v1[0]
+; CHECK-GISEL-NEXT: ldrb w8, [x0], #1
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: mov.b v0[1], w8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
@@ -14327,11 +14138,10 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
;
; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr b1, [x0]
+; CHECK-GISEL-NEXT: ldrb w8, [x0], #1
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: add x8, x0, #1
-; CHECK-GISEL-NEXT: str x8, [x1]
-; CHECK-GISEL-NEXT: mov.b v0[1], v1[0]
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: mov.b v0[1], w8
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
@@ -14375,10 +14185,9 @@ define <8 x i16> @test_v8i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i16> %A)
;
; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr h1, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #2
-; CHECK-GISEL-NEXT: str x8, [x1]
-; CHECK-GISEL-NEXT: mov.h v0[1], v1[0]
+; CHECK-GISEL-NEXT: ldrh w8, [x0], #2
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: mov.h v0[1], w8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i16, ptr %bar
%tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
@@ -14420,11 +14229,10 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A)
;
; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr h1, [x0]
+; CHECK-GISEL-NEXT: ldrh w8, [x0], #2
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: add x8, x0, #2
-; CHECK-GISEL-NEXT: str x8, [x1]
-; CHECK-GISEL-NEXT: mov.h v0[1], v1[0]
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: mov.h v0[1], w8
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: ret
%tmp1 = load i16, ptr %bar
@@ -14469,10 +14277,9 @@ define <4 x i32> @test_v4i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i32> %A)
;
; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr s1, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #4
-; CHECK-GISEL-NEXT: str x8, [x1]
-; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
+; CHECK-GISEL-NEXT: ldr w8, [x0], #4
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: mov.s v0[1], w8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i32, ptr %bar
%tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
@@ -14514,11 +14321,10 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A)
;
; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr s1, [x0]
+; CHECK-GISEL-NEXT: ldr w8, [x0], #4
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: add x8, x0, #4
-; CHECK-GISEL-NEXT: str x8, [x1]
-; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: mov.s v0[1], w8
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: ret
%tmp1 = load i32, ptr %bar
@@ -14563,10 +14369,9 @@ define <2 x i64> @test_v2i64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i64> %A)
;
; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr d1, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #8
-; CHECK-GISEL-NEXT: str x8, [x1]
-; CHECK-GISEL-NEXT: mov.d v0[1], v1[0]
+; CHECK-GISEL-NEXT: ldr x8, [x0], #8
+; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: mov.d v0[1], x8
; CHECK-GISEL-NEXT: ret
%tmp1 = load i64, ptr %bar
%tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
@@ -14606,9 +14411,8 @@ define <4 x float> @test_v4f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x float>
;
; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr s1, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #4
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldr s1, [x0], #4
+; CHECK-GISEL-NEXT: str x0, [x1]
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
; CHECK-GISEL-NEXT: ret
%tmp1 = load float, ptr %bar
@@ -14651,10 +14455,9 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float>
;
; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr s1, [x0]
+; CHECK-GISEL-NEXT: ldr s1, [x0], #4
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-GISEL-NEXT: add x8, x0, #4
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: str x0, [x1]
; CHECK-GISEL-NEXT: mov.s v0[1], v1[0]
; CHECK-GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: ret
@@ -14700,9 +14503,8 @@ define <2 x double> @test_v2f64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x double
;
; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1lane:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ldr d1, [x0]
-; CHECK-GISEL-NEXT: add x8, x0, #8
-; CHECK-GISEL-NEXT: str x8, [x1]
+; CHECK-GISEL-NEXT: ldr d1, [x0], #8
+; CHECK-GISEL-NEXT: str x0, [x1]
; CHECK-GISEL-NEXT: mov.d v0[1], v1[0]
; CHECK-GISEL-NEXT: ret
%tmp1 = load double, ptr %bar
>From 109ad5ecfc11f8b5683d9ba8ea4888b62ab90bec Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Wed, 18 Oct 2023 16:24:40 -0700
Subject: [PATCH 2/3] clang-format fixes
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 ++---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +-
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index a8425db6584f61c..552601a90862c0a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -954,8 +954,7 @@ static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
/// Return true if 'MI' is a load or a store that may be fold it's address
/// operand into the load / store addressing mode.
-static bool canFoldInAddressingMode(GLoadStore *MI,
- const TargetLowering &TLI,
+static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI,
MachineRegisterInfo &MRI) {
TargetLowering::AddrMode AM;
auto *MF = MI->getMF();
@@ -995,7 +994,7 @@ unsigned getIndexedOpc(unsigned LdStOpc) {
} // namespace
bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
- // Check for legality.
+ // Check for legality.
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
LLT Ty = MRI.getType(LdSt.getReg(0));
LLT MemTy = LdSt.getMMO().getMemoryType();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b20c5823371c226..576d89255b64b3d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -23623,7 +23623,7 @@ bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
// HACK
if (IsPre)
return false; // Until we implement.
-
+
auto CstOffset = getIConstantVRegVal(Offset, MRI);
if (!CstOffset || CstOffset->isZero())
return false;
>From 1b3a6f263d3fdbbc60819290a02f012346f58a47 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Thu, 19 Oct 2023 11:39:11 -0700
Subject: [PATCH 3/3] Address review comments.
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 24 +++++++------------
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 6 ++---
2 files changed, 11 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 552601a90862c0a..1cccddfd972221c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -963,8 +963,7 @@ static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI,
return false;
AM.HasBaseReg = true;
- auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI);
- if (CstOff)
+ if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
else
AM.Scale = 1; // [reg +/- reg]
@@ -976,8 +975,7 @@ static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI,
MI->getMMO().getAddrSpace());
}
-namespace {
-unsigned getIndexedOpc(unsigned LdStOpc) {
+static unsigned getIndexedOpc(unsigned LdStOpc) {
switch (LdStOpc) {
case TargetOpcode::G_LOAD:
return TargetOpcode::G_INDEXED_LOAD;
@@ -991,7 +989,6 @@ unsigned getIndexedOpc(unsigned LdStOpc) {
llvm_unreachable("Unexpected opcode");
}
}
-} // namespace
bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
// Check for legality.
@@ -1024,8 +1021,7 @@ bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
// G_STORE %val(s64), %baseptr(p0)
// %offset:_(s64) = G_CONSTANT i64 -256
// %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
- auto &MF = *LdSt.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ const auto &TLI = getTargetLowering();
Register Ptr = LdSt.getPointerReg();
// If the store is the only use, don't bother.
@@ -1065,6 +1061,7 @@ bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
// Make sure the offset calculation is before the potentially indexed op.
MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
+ RematOffset = false;
if (!dominates(*OffsetDef, LdSt)) {
// If the offset however is just a G_CONSTANT, we can always just
// rematerialize it where we need it.
@@ -1080,12 +1077,10 @@ bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
// If the user is a later load/store that can be post-indexed, then don't
// combine this one.
auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
- if (BasePtrLdSt && BasePtrLdSt != &LdSt) {
- if (dominates(LdSt, *BasePtrLdSt)) {
- if (isIndexedLoadStoreLegal(*BasePtrLdSt))
- return false;
- }
- }
+ if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
+ dominates(LdSt, *BasePtrLdSt) &&
+ isIndexedLoadStoreLegal(*BasePtrLdSt))
+ return false;
// Now we're looking for the key G_PTR_ADD instruction, which contains
// the offset add that we want to fold.
@@ -1097,10 +1092,9 @@ bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
if (BaseUseUse.getParent() != LdSt.getParent())
return false;
- if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse)) {
+ if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
return false;
- }
}
if (!dominates(LdSt, BasePtrUse))
return false; // All use must be dominated by the load/store.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 5e96bc67d8ded13..b74c4021d3e4efc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -607,10 +607,8 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
// GMemOperation because we also want to match indexed loads.
- auto *Load = dyn_cast<GMemOperation>(&MI);
-
- const auto &MMO = Load->getMMO();
- const Value *LdVal = MMO.getValue();
+ auto *MemOp = cast<GMemOperation>(&MI);
+ const Value *LdVal = MemOp->getMMO().getValue();
if (!LdVal)
return false;
More information about the llvm-commits
mailing list