[llvm] ddddf7f - [AArch64][GlobalISel] Split offsets of consecutive stores to aid STP … (#66980)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 21 18:35:48 PDT 2023
Author: Amara Emerson
Date: 2023-09-22T09:35:43+08:00
New Revision: ddddf7f35eabdc72e4b02b09f8a4301554267c45
URL: https://github.com/llvm/llvm-project/commit/ddddf7f35eabdc72e4b02b09f8a4301554267c45
DIFF: https://github.com/llvm/llvm-project/commit/ddddf7f35eabdc72e4b02b09f8a4301554267c45.diff
LOG: [AArch64][GlobalISel] Split offsets of consecutive stores to aid STP … (#66980)
Added:
llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll
Modified:
llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index e7db9547f03b694..e8e61b73f9e0c43 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -364,6 +364,8 @@ class MachineIRBuilder {
State.Observer = &Observer;
}
+ GISelChangeObserver *getObserver() { return State.Observer; }
+
void stopObservingChanges() { State.Observer = nullptr; }
bool isObservingChanges() const { return State.Observer != nullptr; }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 56e564638cdcafd..51c52aad3594975 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -20,7 +20,9 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -439,6 +441,22 @@ class AArch64PostLegalizerCombiner : public MachineFunctionPass {
private:
bool IsOptNone;
AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
+
+
+ struct StoreInfo {
+ GStore *St = nullptr;
+ // The G_PTR_ADD that's used by the store. We keep this to cache the
+ // MachineInstr def.
+ GPtrAdd *Ptr = nullptr;
+ // The signed offset to the Ptr instruction.
+ int64_t Offset = 0;
+ LLT StoredType;
+ };
+ bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,
+ CSEMIRBuilder &MIB);
+
+ bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,
+ CSEMIRBuilder &MIB);
};
} // end anonymous namespace
@@ -492,7 +510,191 @@ bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
F.hasMinSize());
AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo,
RuleConfig, ST, MDT, LI);
- return Impl.combineMachineInstrs();
+ bool Changed = Impl.combineMachineInstrs();
+
+ auto MIB = CSEMIRBuilder(MF);
+ MIB.setCSEInfo(CSEInfo);
+ Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);
+ return Changed;
+}
+
+bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores(
+ SmallVectorImpl<StoreInfo> &Stores, CSEMIRBuilder &MIB) {
+ if (Stores.size() <= 2)
+ return false;
+
+ // Profitabity checks:
+ int64_t BaseOffset = Stores[0].Offset;
+ unsigned NumPairsExpected = Stores.size() / 2;
+ unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);
+ // Size savings will depend on whether we can fold the offset, as an
+ // immediate of an ADD.
+ auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();
+ if (!TLI.isLegalAddImmediate(BaseOffset))
+ TotalInstsExpected++;
+ int SavingsExpected = Stores.size() - TotalInstsExpected;
+ if (SavingsExpected <= 0)
+ return false;
+
+ auto &MRI = MIB.getMF().getRegInfo();
+
+ // We have a series of consecutive stores. Factor out the common base
+ // pointer and rewrite the offsets.
+ Register NewBase = Stores[0].Ptr->getReg(0);
+ for (auto &SInfo : Stores) {
+ // Compute a new pointer with the new base ptr and adjusted offset.
+ MIB.setInstrAndDebugLoc(*SInfo.St);
+ auto NewOff = MIB.buildConstant(LLT::scalar(64), SInfo.Offset - BaseOffset);
+ auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()),
+ NewBase, NewOff);
+ if (MIB.getObserver())
+ MIB.getObserver()->changingInstr(*SInfo.St);
+ SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));
+ if (MIB.getObserver())
+ MIB.getObserver()->changedInstr(*SInfo.St);
+ }
+ LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()
+ << " stores into a base pointer and offsets.\n");
+ return true;
+}
+
+static cl::opt<bool>
+ EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable consecutive memop optimization "
+ "in AArch64PostLegalizerCombiner"));
+
+bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing(
+ MachineFunction &MF, CSEMIRBuilder &MIB) {
+ // This combine needs to run after all reassociations/folds on pointer
+ // addressing have been done, specifically those that combine two G_PTR_ADDs
+ // with constant offsets into a single G_PTR_ADD with a combined offset.
+ // The goal of this optimization is to undo that combine in the case where
+ // doing so has prevented the formation of pair stores due to illegal
+ // addressing modes of STP. The reason that we do it here is because
+ // it's much easier to undo the transformation of a series consecutive
+ // mem ops, than it is to detect when doing it would be a bad idea looking
+ // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.
+ //
+ // An example:
+ // G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)
+ // %off1:_(s64) = G_CONSTANT i64 4128
+ // %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)
+ // G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)
+ // %off2:_(s64) = G_CONSTANT i64 4144
+ // %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)
+ // G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)
+ // %off3:_(s64) = G_CONSTANT i64 4160
+ // %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)
+ // G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)
+ bool Changed = false;
+ auto &MRI = MF.getRegInfo();
+
+ if (!EnableConsecutiveMemOpOpt)
+ return Changed;
+
+ SmallVector<StoreInfo, 8> Stores;
+ // If we see a load, then we keep track of any values defined by it.
+ // In the following example, STP formation will fail anyway because
+ // the latter store is using a load result that appears after the
+ // the prior store. In this situation if we factor out the offset then
+ // we increase code size for no benefit.
+ // G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))
+ // %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))
+ // G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))
+ SmallVector<Register> LoadValsSinceLastStore;
+
+ auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {
+ // Check if this store is consecutive to the last one.
+ if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||
+ (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=
+ New.Offset) ||
+ Last.StoredType != New.StoredType)
+ return false;
+
+ // Check if this store is using a load result that appears after the
+ // last store. If so, bail out.
+ if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) {
+ return New.St->getValueReg() == LoadVal;
+ }))
+ return false;
+
+ // Check if the current offset would be too large for STP.
+ // If not, then STP formation should be able to handle it, so we don't
+ // need to do anything.
+ int64_t MaxLegalOffset;
+ switch (New.StoredType.getSizeInBits()) {
+ case 32:
+ MaxLegalOffset = 252;
+ break;
+ case 64:
+ MaxLegalOffset = 504;
+ break;
+ case 128:
+ MaxLegalOffset = 1008;
+ break;
+ default:
+ llvm_unreachable("Unexpected stored type size");
+ }
+ if (New.Offset < MaxLegalOffset)
+ return false;
+
+ // If factoring it out still wouldn't help then don't bother.
+ return New.Offset - Stores[0].Offset <= MaxLegalOffset;
+ };
+
+ auto resetState = [&]() {
+ Stores.clear();
+ LoadValsSinceLastStore.clear();
+ };
+
+ for (auto &MBB : MF) {
+ // We're looking inside a single BB at a time since the memset pattern
+ // should only be in a single block.
+ resetState();
+ for (auto &MI : MBB) {
+ if (auto *St = dyn_cast<GStore>(&MI)) {
+ Register PtrBaseReg;
+ APInt Offset;
+ LLT StoredValTy = MRI.getType(St->getValueReg());
+ unsigned ValSize = StoredValTy.getSizeInBits();
+ if (ValSize < 32 || ValSize != St->getMMO().getSizeInBits())
+ continue;
+
+ Register PtrReg = St->getPointerReg();
+ if (mi_match(
+ PtrReg, MRI,
+ m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) {
+ GPtrAdd *PtrAdd = cast<GPtrAdd>(MRI.getVRegDef(PtrReg));
+ StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy};
+
+ if (Stores.empty()) {
+ Stores.push_back(New);
+ continue;
+ }
+
+ // Check if this store is a valid continuation of the sequence.
+ auto &Last = Stores.back();
+ if (storeIsValid(Last, New)) {
+ Stores.push_back(New);
+ LoadValsSinceLastStore.clear(); // Reset the load value tracking.
+ } else {
+ // The store isn't a valid to consider for the prior sequence,
+ // so try to optimize what we have so far and start a new sequence.
+ Changed |= tryOptimizeConsecStores(Stores, MIB);
+ resetState();
+ Stores.push_back(New);
+ }
+ }
+ } else if (auto *Ld = dyn_cast<GLoad>(&MI)) {
+ LoadValsSinceLastStore.push_back(Ld->getDstReg());
+ }
+ }
+ Changed |= tryOptimizeConsecStores(Stores, MIB);
+ resetState();
+ }
+
+ return Changed;
}
char AArch64PostLegalizerCombiner::ID = 0;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll b/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll
new file mode 100644
index 000000000000000..6aaefff1f724062
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll
@@ -0,0 +1,353 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64-apple-ios -verify-machineinstrs -global-isel -aarch64-postlegalizer-consecutive-memops=0 < %s | FileCheck %s --check-prefix=CHECK-NO-SPLIT
+; RUN: llc -mtriple=aarch64-apple-ios -verify-machineinstrs -global-isel < %s | FileCheck %s --check-prefix=CHECK-SPLIT
+
+define void @basic_split(ptr %p) {
+; CHECK-NO-SPLIT-LABEL: basic_split:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8032]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8040]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: basic_split:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: mov w8, #8000 ; =0x1f40
+; CHECK-SPLIT-NEXT: add x8, x0, x8
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8]
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #16]
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #32]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 1000
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 1001
+ store i64 0, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 1002
+ store i64 0, ptr %addr3
+ %addr4 = getelementptr i64, ptr %p, i64 1003
+ store i64 0, ptr %addr4
+ %addr5 = getelementptr i64, ptr %p, i64 1004
+ store i64 0, ptr %addr5
+ %addr6 = getelementptr i64, ptr %p, i64 1005
+ store i64 0, ptr %addr6
+ ret void
+}
+
+define void @basic_multi_use_ptr(ptr %p, ptr %p2) {
+; CHECK-NO-SPLIT-LABEL: basic_multi_use_ptr:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: mov w8, #8008 ; =0x1f48
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-NO-SPLIT-NEXT: add x8, x0, x8
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024]
+; CHECK-NO-SPLIT-NEXT: str x8, [x1]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: basic_multi_use_ptr:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: mov w8, #8008 ; =0x1f48
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-SPLIT-NEXT: add x8, x0, x8
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8008]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8016]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8024]
+; CHECK-SPLIT-NEXT: str x8, [x1]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 1000
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 1001
+ store i64 0, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 1002
+ store i64 0, ptr %addr3
+ %addr4 = getelementptr i64, ptr %p, i64 1003
+ store i64 0, ptr %addr4
+ ; multiuse of %addr2
+ store ptr %addr2, ptr %p2
+ ret void
+}
+
+define void @not_consecutive(ptr %p) {
+; CHECK-NO-SPLIT-LABEL: not_consecutive:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8032]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: not_consecutive:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8008]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8024]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8032]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 1000
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 1001
+ store i64 0, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 1003
+ store i64 0, ptr %addr3
+ %addr4 = getelementptr i64, ptr %p, i64 1004
+ store i64 0, ptr %addr4
+ ret void
+}
+
+define void @early_store_is_invalid_but_split_rest(ptr %p) {
+; CHECK-NO-SPLIT-LABEL: early_store_is_invalid_but_split_rest:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8080]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8032]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8040]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8048]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8056]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: early_store_is_invalid_but_split_rest:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: mov w8, #8016 ; =0x1f50
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-SPLIT-NEXT: add x8, x0, x8
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8080]
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8]
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #16]
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #32]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 1000
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 1010
+ store i64 0, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 1002
+ store i64 0, ptr %addr3
+ %addr4 = getelementptr i64, ptr %p, i64 1003
+ store i64 0, ptr %addr4
+ %addr5 = getelementptr i64, ptr %p, i64 1004
+ store i64 0, ptr %addr5
+ %addr6 = getelementptr i64, ptr %p, i64 1005
+ store i64 0, ptr %addr6
+ %addr7 = getelementptr i64, ptr %p, i64 1006
+ store i64 0, ptr %addr7
+ %addr8 = getelementptr i64, ptr %p, i64 1007
+ store i64 0, ptr %addr8
+ ret void
+}
+
+define void @vector(ptr %p) {
+; CHECK-NO-SPLIT-LABEL: vector:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: movi.2d v0, #0000000000000000
+; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16000]
+; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16016]
+; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16032]
+; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16048]
+; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16064]
+; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16080]
+; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16096]
+; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16112]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: vector:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: movi.2d v0, #0000000000000000
+; CHECK-SPLIT-NEXT: mov w8, #16000 ; =0x3e80
+; CHECK-SPLIT-NEXT: add x8, x0, x8
+; CHECK-SPLIT-NEXT: stp q0, q0, [x8]
+; CHECK-SPLIT-NEXT: stp q0, q0, [x8, #32]
+; CHECK-SPLIT-NEXT: stp q0, q0, [x8, #64]
+; CHECK-SPLIT-NEXT: stp q0, q0, [x8, #96]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr <2 x i64>, ptr %p, i64 1000
+ store <2 x i64> <i64 0, i64 0>, ptr %bigoffset
+ %addr2 = getelementptr <2 x i64>, ptr %p, i64 1001
+ store <2 x i64> <i64 0, i64 0>, ptr %addr2
+ %addr3 = getelementptr <2 x i64>, ptr %p, i64 1002
+ store <2 x i64> <i64 0, i64 0>, ptr %addr3
+ %addr4 = getelementptr <2 x i64>, ptr %p, i64 1003
+ store <2 x i64> <i64 0, i64 0>, ptr %addr4
+ %addr5 = getelementptr <2 x i64>, ptr %p, i64 1004
+ store <2 x i64> <i64 0, i64 0>, ptr %addr5
+ %addr6 = getelementptr <2 x i64>, ptr %p, i64 1005
+ store <2 x i64> <i64 0, i64 0>, ptr %addr6
+ %addr7 = getelementptr <2 x i64>, ptr %p, i64 1006
+ store <2 x i64> <i64 0, i64 0>, ptr %addr7
+ %addr8 = getelementptr <2 x i64>, ptr %p, i64 1007
+ store <2 x i64> <i64 0, i64 0>, ptr %addr8
+ ret void
+}
+
+define void @can_already_form_stp(ptr %p) {
+; CHECK-NO-SPLIT-LABEL: can_already_form_stp:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: stp xzr, xzr, [x0, #80]
+; CHECK-NO-SPLIT-NEXT: stp xzr, xzr, [x0, #96]
+; CHECK-NO-SPLIT-NEXT: stp xzr, xzr, [x0, #112]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: can_already_form_stp:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x0, #80]
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x0, #96]
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x0, #112]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 10
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 11
+ store i64 0, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 12
+ store i64 0, ptr %addr3
+ %addr4 = getelementptr i64, ptr %p, i64 13
+ store i64 0, ptr %addr4
+ %addr5 = getelementptr i64, ptr %p, i64 14
+ store i64 0, ptr %addr5
+ %addr6 = getelementptr i64, ptr %p, i64 15
+ store i64 0, ptr %addr6
+ ret void
+}
+
+define void @use_of_load_in_between(ptr %p, ptr %ldptr, ptr %ldptr2) {
+; CHECK-NO-SPLIT-LABEL: use_of_load_in_between:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-NO-SPLIT-NEXT: ldr x8, [x1]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016]
+; CHECK-NO-SPLIT-NEXT: str x8, [x0, #8008]
+; CHECK-NO-SPLIT-NEXT: ldr x8, [x2]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8032]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8040]
+; CHECK-NO-SPLIT-NEXT: str x8, [x0, #8024]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: use_of_load_in_between:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-SPLIT-NEXT: ldr x8, [x1]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8016]
+; CHECK-SPLIT-NEXT: str x8, [x0, #8008]
+; CHECK-SPLIT-NEXT: ldr x8, [x2]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8032]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8040]
+; CHECK-SPLIT-NEXT: str x8, [x0, #8024]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 1000
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 1001
+ %ld = load i64, ptr %ldptr
+ store i64 %ld, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 1002
+ store i64 0, ptr %addr3
+ %addr4 = getelementptr i64, ptr %p, i64 1003
+ %ld2 = load i64, ptr %ldptr2
+ store i64 %ld2, ptr %addr4
+ %addr5 = getelementptr i64, ptr %p, i64 1004
+ store i64 0, ptr %addr5
+ %addr6 = getelementptr i64, ptr %p, i64 1005
+ store i64 0, ptr %addr6
+ ret void
+}
+
+define void @offset_legal_for_add_imm(ptr %p) {
+; CHECK-NO-SPLIT-LABEL: offset_legal_for_add_imm:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3200]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3208]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3216]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: offset_legal_for_add_imm:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: add x8, x0, #3200
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8]
+; CHECK-SPLIT-NEXT: str xzr, [x8, #16]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 400
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 401
+ store i64 0, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 402
+ store i64 0, ptr %addr3
+ ret void
+}
+
+define void @offset_illegal_for_add_imm(ptr %p) {
+; CHECK-NO-SPLIT-LABEL: offset_illegal_for_add_imm:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: offset_illegal_for_add_imm:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8008]
+; CHECK-SPLIT-NEXT: str xzr, [x0, #8016]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 1000
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 1001
+ store i64 0, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 1002
+ store i64 0, ptr %addr3
+ ret void
+}
+
+define void @offset_legal_for_add_imm_4_stores(ptr %p) {
+; CHECK-NO-SPLIT-LABEL: offset_legal_for_add_imm_4_stores:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3200]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3208]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3216]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3224]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: offset_legal_for_add_imm_4_stores:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: add x8, x0, #3200
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8]
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #16]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 400
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 401
+ store i64 0, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 402
+ store i64 0, ptr %addr3
+ %addr4 = getelementptr i64, ptr %p, i64 403
+ store i64 0, ptr %addr4
+ ret void
+}
+
+define void @offset_illegal_for_add_imm_4_stores(ptr %p) {
+; CHECK-NO-SPLIT-LABEL: offset_illegal_for_add_imm_4_stores:
+; CHECK-NO-SPLIT: ; %bb.0:
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016]
+; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024]
+; CHECK-NO-SPLIT-NEXT: ret
+;
+; CHECK-SPLIT-LABEL: offset_illegal_for_add_imm_4_stores:
+; CHECK-SPLIT: ; %bb.0:
+; CHECK-SPLIT-NEXT: mov w8, #8000 ; =0x1f40
+; CHECK-SPLIT-NEXT: add x8, x0, x8
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8]
+; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #16]
+; CHECK-SPLIT-NEXT: ret
+ %bigoffset = getelementptr i64, ptr %p, i64 1000
+ store i64 0, ptr %bigoffset
+ %addr2 = getelementptr i64, ptr %p, i64 1001
+ store i64 0, ptr %addr2
+ %addr3 = getelementptr i64, ptr %p, i64 1002
+ store i64 0, ptr %addr3
+ %addr4 = getelementptr i64, ptr %p, i64 1003
+ store i64 0, ptr %addr4
+ ret void
+}
More information about the llvm-commits
mailing list