[llvm] 29c851f - [GlobalISel] Move the truncstore_merge combine to the LoadStoreOpt pass and add support for an extra case.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 12 16:43:20 PDT 2023
Author: Amara Emerson
Date: 2023-04-12T16:43:14-07:00
New Revision: 29c851f4e2ff9dc55146be88ae0df3d378a7be9f
URL: https://github.com/llvm/llvm-project/commit/29c851f4e2ff9dc55146be88ae0df3d378a7be9f
DIFF: https://github.com/llvm/llvm-project/commit/29c851f4e2ff9dc55146be88ae0df3d378a7be9f.diff
LOG: [GlobalISel] Move the truncstore_merge combine to the LoadStoreOpt pass and add support for an extra case.
If we have set of mergeable stores of shifts, but the original source value being shifted
is wider than the merged size, we should still be able to merge if we truncate first. To do this
however we need to search for stores speculatively up the block, without knowing exactly how
many stores we should see before we stop. The old algorithm has to match an exact number of
stores to fit the wide type, or it dies. The new one will try to set the wide type to however
many stores we found in the upwards block traversal and use later checks to verify if they're
a valid mergeable set.
The reason I need to move this to LoadStoreOpt is because the combiner works going top down
inside a block, which means that we end up doing partial merges because we haven't seen all
the possible stores before we mutate the MIR. In LoadStoreOpt we can go bottom up.
As a side effect of this change, we also end up doing better on an existing test case (missing_store)
since we manage to do a partial merge there.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir
llvm/test/CodeGen/AArch64/GlobalISel/store-merging-debug.mir
llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index afc29fcb72c76..1b268933618f7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -79,14 +79,6 @@ struct ShiftOfShiftedLogic {
using BuildFnTy = std::function<void(MachineIRBuilder &)>;
-struct MergeTruncStoresInfo {
- SmallVector<GStore *> FoundStores;
- GStore *LowestIdxStore = nullptr;
- Register WideSrcVal;
- bool NeedBSwap = false;
- bool NeedRotate = false;
-};
-
using OperandBuildSteps =
SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;
struct InstructionBuildSteps {
@@ -577,9 +569,6 @@ class CombinerHelper {
/// bswap.
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo);
- bool matchTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
- void applyTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
-
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
index 6efe7c7c9bbdf..466dbed537e29 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -131,6 +132,10 @@ class LoadStoreOpt : public MachineFunctionPass {
bool mergeBlockStores(MachineBasicBlock &MBB);
bool mergeFunctionStores(MachineFunction &MF);
+ bool mergeTruncStore(GStore &StoreMI,
+ SmallPtrSetImpl<GStore *> &DeletedStores);
+ bool mergeTruncStoresBlock(MachineBasicBlock &MBB);
+
/// Initialize some target-specific data structures for the store merging
/// optimization. \p AddrSpace indicates which address space to use when
/// probing the legalizer info for legal stores.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e05de02f693c3..c2054a689bd88 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -715,14 +715,6 @@ def load_or_combine : GICombineRule<
[{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
-
-def truncstore_merge_matcdata : GIDefMatchData<"MergeTruncStoresInfo">;
-def truncstore_merge : GICombineRule<
- (defs root:$root, truncstore_merge_matcdata:$info),
- (match (wip_match_opcode G_STORE):$root,
- [{ return Helper.matchTruncStoreMerge(*${root}, ${info}); }]),
- (apply [{ Helper.applyTruncStoreMerge(*${root}, ${info}); }])>;
-
def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">;
def extend_through_phis : GICombineRule<
(defs root:$root, extend_through_phis_matchdata:$matchinfo),
@@ -1105,7 +1097,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shift,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
- truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
+ div_rem_to_divrem, funnel_shift_combines,
form_bitfield_extract, constant_fold, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 29399835a376d..733e23440a984 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3625,275 +3625,6 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
-/// Check if the store \p Store is a truncstore that can be merged. That is,
-/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
-/// Register then it does not need to match and SrcVal is set to the source
-/// value found.
-/// On match, returns the start byte offset of the \p SrcVal that is being
-/// stored.
-static std::optional<int64_t>
-getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
- MachineRegisterInfo &MRI) {
- Register TruncVal;
- if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
- return std::nullopt;
-
- // The shift amount must be a constant multiple of the narrow type.
- // It is translated to the offset address in the wide source value "y".
- //
- // x = G_LSHR y, ShiftAmtC
- // s8 z = G_TRUNC x
- // store z, ...
- Register FoundSrcVal;
- int64_t ShiftAmt;
- if (!mi_match(TruncVal, MRI,
- m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
- m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
- if (!SrcVal.isValid() || TruncVal == SrcVal) {
- if (!SrcVal.isValid())
- SrcVal = TruncVal;
- return 0; // If it's the lowest index store.
- }
- return std::nullopt;
- }
-
- unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
- if (ShiftAmt % NarrowBits!= 0)
- return std::nullopt;
- const unsigned Offset = ShiftAmt / NarrowBits;
-
- if (SrcVal.isValid() && FoundSrcVal != SrcVal)
- return std::nullopt;
-
- if (!SrcVal.isValid())
- SrcVal = FoundSrcVal;
- else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
- return std::nullopt;
- return Offset;
-}
-
-/// Match a pattern where a wide type scalar value is stored by several narrow
-/// stores. Fold it into a single store or a BSWAP and a store if the targets
-/// supports it.
-///
-/// Assuming little endian target:
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 0) & 0xFF;
-/// p[1] = (val >> 8) & 0xFF;
-/// p[2] = (val >> 16) & 0xFF;
-/// p[3] = (val >> 24) & 0xFF;
-/// =>
-/// *((i32)p) = val;
-///
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 24) & 0xFF;
-/// p[1] = (val >> 16) & 0xFF;
-/// p[2] = (val >> 8) & 0xFF;
-/// p[3] = (val >> 0) & 0xFF;
-/// =>
-/// *((i32)p) = BSWAP(val);
-bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
- auto &StoreMI = cast<GStore>(MI);
- LLT MemTy = StoreMI.getMMO().getMemoryType();
-
- // We only handle merging simple stores of 1-4 bytes.
- if (!MemTy.isScalar())
- return false;
- switch (MemTy.getSizeInBits()) {
- case 8:
- case 16:
- case 32:
- break;
- default:
- return false;
- }
- if (!StoreMI.isSimple())
- return false;
-
- // We do a simple search for mergeable stores prior to this one.
- // Any potential alias hazard along the way terminates the search.
- SmallVector<GStore *> FoundStores;
-
- // We're looking for:
- // 1) a (store(trunc(...)))
- // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
- // the partial value stored.
- // 3) where the offsets form either a little or big-endian sequence.
-
- auto &LastStore = StoreMI;
-
- // The single base pointer that all stores must use.
- Register BaseReg;
- int64_t LastOffset;
- if (!mi_match(LastStore.getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
- BaseReg = LastStore.getPointerReg();
- LastOffset = 0;
- }
-
- GStore *LowestIdxStore = &LastStore;
- int64_t LowestIdxOffset = LastOffset;
-
- Register WideSrcVal;
- auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
- if (!LowestShiftAmt)
- return false; // Didn't match a trunc.
- assert(WideSrcVal.isValid());
-
- LLT WideStoreTy = MRI.getType(WideSrcVal);
- // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
- if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
- return false;
- const unsigned NumStoresRequired =
- WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
-
- SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
- OffsetMap[*LowestShiftAmt] = LastOffset;
- FoundStores.emplace_back(&LastStore);
-
- // Search the block up for more stores.
- // We use a search threshold of 10 instructions here because the combiner
- // works top-down within a block, and we don't want to search an unbounded
- // number of predecessor instructions trying to find matching stores.
- // If we moved this optimization into a separate pass then we could probably
- // use a more efficient search without having a hard-coded threshold.
- const int MaxInstsToCheck = 10;
- int NumInstsChecked = 0;
- for (auto II = ++LastStore.getReverseIterator();
- II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
- ++II) {
- NumInstsChecked++;
- GStore *NewStore;
- if ((NewStore = dyn_cast<GStore>(&*II))) {
- if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
- break;
- } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
- break;
- } else {
- continue; // This is a safe instruction we can look past.
- }
-
- Register NewBaseReg;
- int64_t MemOffset;
- // Check we're storing to the same base + some offset.
- if (!mi_match(NewStore->getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
- NewBaseReg = NewStore->getPointerReg();
- MemOffset = 0;
- }
- if (BaseReg != NewBaseReg)
- break;
-
- auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
- if (!ShiftByteOffset)
- break;
- if (MemOffset < LowestIdxOffset) {
- LowestIdxOffset = MemOffset;
- LowestIdxStore = NewStore;
- }
-
- // Map the offset in the store and the offset in the combined value, and
- // early return if it has been set before.
- if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
- OffsetMap[*ShiftByteOffset] != INT64_MAX)
- break;
- OffsetMap[*ShiftByteOffset] = MemOffset;
-
- FoundStores.emplace_back(NewStore);
- // Reset counter since we've found a matching inst.
- NumInstsChecked = 0;
- if (FoundStores.size() == NumStoresRequired)
- break;
- }
-
- if (FoundStores.size() != NumStoresRequired) {
- return false;
- }
-
- const auto &DL = LastStore.getMF()->getDataLayout();
- auto &C = LastStore.getMF()->getFunction().getContext();
- // Check that a store of the wide type is both allowed and fast on the target
- unsigned Fast = 0;
- bool Allowed = getTargetLowering().allowsMemoryAccess(
- C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
- if (!Allowed || !Fast)
- return false;
-
- // Check if the pieces of the value are going to the expected places in memory
- // to merge the stores.
- unsigned NarrowBits = MemTy.getScalarSizeInBits();
- auto checkOffsets = [&](bool MatchLittleEndian) {
- if (MatchLittleEndian) {
- for (unsigned i = 0; i != NumStoresRequired; ++i)
- if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- } else { // MatchBigEndian by reversing loop counter.
- for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
- ++i, --j)
- if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- }
- return true;
- };
-
- // Check if the offsets line up for the native data layout of this target.
- bool NeedBswap = false;
- bool NeedRotate = false;
- if (!checkOffsets(DL.isLittleEndian())) {
- // Special-case: check if byte offsets line up for the opposite endian.
- if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
- NeedBswap = true;
- else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
- NeedRotate = true;
- else
- return false;
- }
-
- if (NeedBswap &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
- return false;
- if (NeedRotate &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
- return false;
-
- MatchInfo.NeedBSwap = NeedBswap;
- MatchInfo.NeedRotate = NeedRotate;
- MatchInfo.LowestIdxStore = LowestIdxStore;
- MatchInfo.WideSrcVal = WideSrcVal;
- MatchInfo.FoundStores = std::move(FoundStores);
- return true;
-}
-
-void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
-
- Builder.setInstrAndDebugLoc(MI);
- Register WideSrcVal = MatchInfo.WideSrcVal;
- LLT WideStoreTy = MRI.getType(WideSrcVal);
-
- if (MatchInfo.NeedBSwap) {
- WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
- } else if (MatchInfo.NeedRotate) {
- assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
- "Unexpected type for rotate");
- auto RotAmt =
- Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
- WideSrcVal =
- Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
- }
-
- Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
- MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
- MatchInfo.LowestIdxStore->getMMO().getAlign());
-
- // Erase the old stores.
- for (auto *ST : MatchInfo.FoundStores)
- ST->eraseFromParent();
-}
-
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 32d60c4311450..12ae12ab827ad 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -10,6 +10,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -617,11 +619,304 @@ bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
return Changed;
}
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static std::optional<int64_t>
+getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return std::nullopt;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return std::nullopt;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits != 0)
+ return std::nullopt;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return std::nullopt;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return std::nullopt;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI,
+ SmallPtrSetImpl<GStore *> &DeletedStores) {
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, *MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI->getType(WideSrcVal);
+ // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
+ if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
+ return false;
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, *MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ if (FoundStores.size() == 1)
+ return false;
+ // We didn't find enough stores to merge into the size of the original
+ // source value, but we may be able to generate a smaller store if we
+ // truncate the source value.
+ WideStoreTy = LLT::scalar(FoundStores.size() * MemTy.getScalarSizeInBits());
+ }
+
+ unsigned NumStoresFound = FoundStores.size();
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ unsigned Fast = 0;
+ bool Allowed = TLI->allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresFound; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresFound - 1; i != NumStoresFound;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresFound == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}, *MF))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, *MF))
+ return false;
+
+ Builder.setInstrAndDebugLoc(StoreMI);
+
+ if (WideStoreTy != MRI->getType(WideSrcVal))
+ WideSrcVal = Builder.buildTrunc(WideStoreTy, WideSrcVal).getReg(0);
+
+ if (NeedBswap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, LowestIdxStore->getPointerReg(),
+ LowestIdxStore->getMMO().getPointerInfo(),
+ LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : FoundStores) {
+ ST->eraseFromParent();
+ DeletedStores.insert(ST);
+ }
+ return true;
+}
+
+bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) {
+ bool Changed = false;
+ SmallVector<GStore *, 16> Stores;
+ SmallPtrSet<GStore *, 8> DeletedStores;
+ // Walk up the block so we can see the most eligible stores.
+ for (MachineInstr &MI : llvm::reverse(BB))
+ if (auto *StoreMI = dyn_cast<GStore>(&MI))
+ Stores.emplace_back(StoreMI);
+
+ for (auto *StoreMI : Stores) {
+ if (DeletedStores.count(StoreMI))
+ continue;
+ if (mergeTruncStore(*StoreMI, DeletedStores))
+ Changed = true;
+ }
+ return Changed;
+}
+
bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
bool Changed = false;
- for (auto &BB : MF) {
+ for (auto &BB : MF){
Changed |= mergeBlockStores(BB);
+ Changed |= mergeTruncStoresBlock(BB);
+ }
+
+ // Erase all dead instructions left over by the merging.
+ if (Changed) {
+ for (auto &BB : MF) {
+ for (auto &I : make_early_inc_range(make_range(BB.rbegin(), BB.rend()))) {
+ if (isTriviallyDead(I, *MRI))
+ I.eraseFromParent();
+ }
+ }
}
+
return Changed;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
index ff4044be8e356..4a49bd7ad758b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
@@ -287,13 +287,13 @@ define dso_local void @invalid_shift(i16 %x, ptr %p) {
}
define dso_local void @missing_store(i32 %x, ptr %p) {
+; The missing store of shift 16 means we can't merge to 32 bit store,
+; but we can still partially merge to a 16 bit one.
; CHECK-LABEL: missing_store:
; CHECK: ; %bb.0:
-; CHECK-NEXT: lsr w8, w0, #8
-; CHECK-NEXT: lsr w9, w0, #24
-; CHECK-NEXT: strb w0, [x1]
-; CHECK-NEXT: strb w8, [x1, #1]
-; CHECK-NEXT: strb w9, [x1, #3]
+; CHECK-NEXT: lsr w8, w0, #24
+; CHECK-NEXT: strh w0, [x1]
+; CHECK-NEXT: strb w8, [x1, #3]
; CHECK-NEXT: ret
%t1 = trunc i32 %x to i8
%sh1 = lshr i32 %x, 8
@@ -339,3 +339,40 @@ define dso_local void @second_store_is_volatile(i16 %x, ptr %p) {
store i8 %t2, ptr %p1, align 1
ret void
}
+
+declare void @use_ptr(ptr)
+
+define dso_local void @trunc_from_larger_src_val(i64 %hold.4.lcssa, ptr %check1792) {
+ ; Here we can merge these i8 stores into a single i32 store, but first we need
+ ; to truncate the i64 value to i32.
+; CHECK-LABEL: trunc_from_larger_src_val:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: str w0, [sp, #12]
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: bl _use_ptr
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+ %hbuf = alloca [4 x i8], align 1
+ %arrayidx177 = getelementptr inbounds [4 x i8], ptr %hbuf, i64 0, i64 1
+ %arrayidx234 = getelementptr inbounds [4 x i8], ptr %hbuf, i64 0, i64 2
+ %arrayidx237 = getelementptr inbounds [4 x i8], ptr %hbuf, i64 0, i64 3
+ %conv227 = trunc i64 %hold.4.lcssa to i8
+ store i8 %conv227, ptr %hbuf, align 1
+ %shr229 = lshr i64 %hold.4.lcssa, 8
+ %conv230 = trunc i64 %shr229 to i8
+ store i8 %conv230, ptr %arrayidx177, align 1
+ %shr232 = lshr i64 %hold.4.lcssa, 16
+ %conv233 = trunc i64 %shr232 to i8
+ store i8 %conv233, ptr %arrayidx234, align 1
+ %shr235 = lshr i64 %hold.4.lcssa, 24
+ %conv236 = trunc i64 %shr235 to i8
+ store i8 %conv236, ptr %arrayidx237, align 1
+ call void @use_ptr(ptr noundef nonnull %hbuf)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir
index 68a048e927344..62c15292b8cbf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple aarch64 -run-pass=loadstore-opt -verify-machineinstrs %s -o - | FileCheck %s
---
name: trunc_i16_to_i8
alignment: 4
@@ -637,20 +637,14 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32)
- ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[COPY1]](p0) :: (store (s8))
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
- ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[PTR_ADD]](p0) :: (store (s8))
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
- ; CHECK-NEXT: G_STORE [[TRUNC2]](s8), [[PTR_ADD1]](p0) :: (store (s8))
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[COPY1]](p0) :: (store (s16), align 1)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[PTR_ADD]](p0) :: (store (s8))
; CHECK-NEXT: RET_ReallyLR
%0:_(s32) = COPY $w0
%1:_(p0) = COPY $x1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging-debug.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging-debug.mir
index 200281f3e54f5..d52ef0f3da74c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging-debug.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging-debug.mir
@@ -103,31 +103,21 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0, debug-location !11
; CHECK-NEXT: DBG_VALUE [[COPY]](p0), $noreg, !9, !DIExpression(), debug-location !11
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4, debug-location !DILocation(line: 2, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[C]](s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 2, column: 1, scope: !5)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5, debug-location !DILocation(line: 3, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[C1]](s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 3, column: 1, scope: !5)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 9, debug-location !DILocation(line: 4, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[C2]](s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 4, column: 1, scope: !5)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 14, debug-location !DILocation(line: 5, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[C3]](s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 5, column: 1, scope: !5)
+ ; CHECK-NEXT: DBG_VALUE %1:_(s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 2, column: 1, scope: !5)
+ ; CHECK-NEXT: DBG_VALUE %4:_(s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 3, column: 1, scope: !5)
+ ; CHECK-NEXT: DBG_VALUE %7:_(s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 4, column: 1, scope: !5)
+ ; CHECK-NEXT: DBG_VALUE %10:_(s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 5, column: 1, scope: !5)
; CHECK-NEXT: DBG_VALUE 0, $noreg, !9, !DIExpression(), debug-location !DILocation(line: 6, column: 1, scope: !5)
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2, debug-location !DILocation(line: 7, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[C4]](s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 7, column: 1, scope: !5)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64), debug-location !DILocation(line: 8, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[PTR_ADD]](p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 8, column: 1, scope: !5)
+ ; CHECK-NEXT: DBG_VALUE %2:_(s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 7, column: 1, scope: !5)
+ ; CHECK-NEXT: DBG_VALUE %3:_(p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 8, column: 1, scope: !5)
; CHECK-NEXT: DBG_VALUE 1, $noreg, !9, !DIExpression(), debug-location !DILocation(line: 9, column: 1, scope: !5)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4, debug-location !DILocation(line: 10, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[C5]](s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 10, column: 1, scope: !5)
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64), debug-location !DILocation(line: 11, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[PTR_ADD1]](p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 11, column: 1, scope: !5)
+ ; CHECK-NEXT: DBG_VALUE %5:_(s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 10, column: 1, scope: !5)
+ ; CHECK-NEXT: DBG_VALUE %6:_(p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 11, column: 1, scope: !5)
; CHECK-NEXT: DBG_VALUE 2, $noreg, !9, !DIExpression(), debug-location !DILocation(line: 12, column: 1, scope: !5)
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6, debug-location !DILocation(line: 13, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[C6]](s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 13, column: 1, scope: !5)
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64), debug-location !DILocation(line: 14, column: 1, scope: !5)
- ; CHECK-NEXT: DBG_VALUE [[PTR_ADD2]](p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 14, column: 1, scope: !5)
- ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3940688328982532
- ; CHECK-NEXT: G_STORE [[C7]](s64), [[COPY]](p0), debug-location !DILocation(line: 9, scope: !5) :: (store (s64), align 2)
+ ; CHECK-NEXT: DBG_VALUE %8:_(s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 13, column: 1, scope: !5)
+ ; CHECK-NEXT: DBG_VALUE %9:_(p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 14, column: 1, scope: !5)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3940688328982532
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0), debug-location !DILocation(line: 9, scope: !5) :: (store (s64), align 2)
; CHECK-NEXT: DBG_VALUE 3, $noreg, !9, !DIExpression(), debug-location !DILocation(line: 15, column: 1, scope: !5)
; CHECK-NEXT: RET_ReallyLR debug-location !DILocation(line: 16, column: 1, scope: !5)
%0:_(p0) = COPY $x0, debug-location !11
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
index fe0419aba8216..e98e1ce599f2f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
@@ -178,14 +178,15 @@ body: |
; CHECK-LABEL: name: test_simple_2xs8
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
- ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 5
- ; CHECK: G_STORE [[C]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: G_STORE [[C1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 5
+ ; CHECK-NEXT: G_STORE [[C]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK-NEXT: G_STORE [[C1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s8) = G_CONSTANT i8 4
%4:_(s8) = G_CONSTANT i8 5
@@ -211,14 +212,11 @@ body: |
; CHECK-LABEL: name: test_simple_2xs16
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684
- ; CHECK: G_STORE [[C3]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684
+ ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s16) = G_CONSTANT i16 4
%4:_(s16) = G_CONSTANT i16 5
@@ -244,20 +242,11 @@ body: |
; CHECK-LABEL: name: test_simple_4xs16
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
- ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
- ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
- ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
- ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
- ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3940688328982532
- ; CHECK: G_STORE [[C7]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 2)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3940688328982532
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 2)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s16) = G_CONSTANT i16 4
%4:_(s16) = G_CONSTANT i16 5
@@ -291,14 +280,11 @@ body: |
; CHECK-LABEL: name: test_simple_2xs32
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484
- ; CHECK: G_STORE [[C3]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 4)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 4)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s32) = G_CONSTANT i32 4
%4:_(s32) = G_CONSTANT i32 5
@@ -324,14 +310,15 @@ body: |
; CHECK-LABEL: name: test_simple_2xs64_illegal
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
- ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: G_STORE [[C1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.addr2)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK-NEXT: G_STORE [[C1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.addr2)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s64) = G_CONSTANT i64 4
%4:_(s64) = G_CONSTANT i64 5
@@ -357,18 +344,19 @@ body: |
; CHECK-LABEL: name: test_simple_vector
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 7
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16)
- ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
- ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
- ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C3]](s16)
- ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s16>), [[COPY]](p0) :: (store (<2 x s16>) into %ir.addr11)
- ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
- ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s16>), [[PTR_ADD]](p0) :: (store (<2 x s16>) into %ir.addr2)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 7
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C3]](s16)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s16>), [[COPY]](p0) :: (store (<2 x s16>) into %ir.addr11)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s16>), [[PTR_ADD]](p0) :: (store (<2 x s16>) into %ir.addr2)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%2:_(s16) = G_CONSTANT i16 4
%3:_(s16) = G_CONSTANT i16 7
@@ -399,17 +387,18 @@ body: |
; CHECK-LABEL: name: test_unknown_alias
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11)
- ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.aliasptr)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2)
- ; CHECK: $w0 = COPY [[LOAD]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.aliasptr)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2)
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s32) = G_CONSTANT i32 4
@@ -439,20 +428,18 @@ body: |
; CHECK-LABEL: name: test_2x_2xs32
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
- ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
- ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11)
- ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
- ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2)
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
- ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 73014444041
- ; CHECK: G_STORE [[C5]](s64), [[COPY1]](p0) :: (store (s64) into %ir.addr32, align 4)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 73014444041
+ ; CHECK-NEXT: G_STORE [[C3]](s64), [[COPY1]](p0) :: (store (s64) into %ir.addr32, align 4)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s32) = G_CONSTANT i32 4
@@ -486,16 +473,17 @@ body: |
; CHECK-LABEL: name: test_simple_var_2xs8
; CHECK: liveins: $w1, $w2, $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
- ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
- ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CHECK: G_STORE [[TRUNC1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%3:_(s32) = COPY $w1
%1:_(s8) = G_TRUNC %3(s32)
@@ -525,16 +513,17 @@ body: |
; CHECK-LABEL: name: test_simple_var_2xs16
; CHECK: liveins: $w1, $w2, $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
- ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; CHECK: G_STORE [[TRUNC]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CHECK: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%3:_(s32) = COPY $w1
%1:_(s16) = G_TRUNC %3(s32)
@@ -564,14 +553,15 @@ body: |
; CHECK-LABEL: name: test_simple_var_2xs32
; CHECK: liveins: $w1, $w2, $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
- ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CHECK: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s32) = COPY $w1
%2:_(s32) = COPY $w2
@@ -601,25 +591,22 @@ body: |
; CHECK-LABEL: name: test_alias_4xs16
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
- ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
- ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
- ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
- ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684
- ; CHECK: G_STORE [[C6]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2)
- ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
- ; CHECK: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr3)
- ; CHECK: G_STORE [[C3]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2)
- ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
- ; CHECK: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684
+ ; CHECK-NEXT: G_STORE [[C3]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+ ; CHECK-NEXT: G_STORE [[C]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr3)
+ ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+ ; CHECK-NEXT: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr4)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s16) = G_CONSTANT i16 4
@@ -658,27 +645,24 @@ body: |
; Here store of 5 and 9 can be merged, others have aliasing barriers.
; CHECK-LABEL: name: test_alias2_4xs16
; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
- ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
- ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
- ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
- ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
- ; CHECK: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11)
- ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; CHECK: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3)
- ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
- ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 589829
- ; CHECK: G_STORE [[C7]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2, align 2)
- ; CHECK: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2)
- ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
- ; CHECK: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
+ ; CHECK-NEXT: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+ ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 589829
+ ; CHECK-NEXT: G_STORE [[C4]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2, align 2)
+ ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+ ; CHECK-NEXT: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr4)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(p0) = COPY $x2
@@ -722,29 +706,30 @@ body: |
; CHECK-LABEL: name: test_alias3_4xs16
; CHECK: liveins: $x0, $x1, $x2, $x3
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
- ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
- ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
- ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
- ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
- ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
- ; CHECK: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11)
- ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; CHECK: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3)
- ; CHECK: G_STORE [[C2]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2)
- ; CHECK: G_STORE [[C1]](s16), [[COPY3]](p0) :: (store (s16) into %ir.ptr4)
- ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
- ; CHECK: G_STORE [[C3]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr3)
- ; CHECK: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2)
- ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
- ; CHECK: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
+ ; CHECK-NEXT: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+ ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3)
+ ; CHECK-NEXT: G_STORE [[C2]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2)
+ ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY3]](p0) :: (store (s16) into %ir.ptr4)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+ ; CHECK-NEXT: G_STORE [[C3]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr3)
+ ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2)
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+ ; CHECK-NEXT: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(p0) = COPY $x2
@@ -790,17 +775,14 @@ body: |
; CHECK-LABEL: name: test_alias_allocas_2xs32
; CHECK: liveins: $x0
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a1
- ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.a2
- ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (s32) from %ir.a2)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64)
- ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484
- ; CHECK: G_STORE [[C3]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %ir.addr11, align 4)
- ; CHECK: $w0 = COPY [[LOAD]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a1
+ ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.a2
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (s32) from %ir.a2)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %ir.addr11, align 4)
+ ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%3:_(s32) = G_CONSTANT i32 4
%7:_(s32) = G_CONSTANT i32 5
%1:_(p0) = G_FRAME_INDEX %stack.0.a1
@@ -829,14 +811,11 @@ body: |
; CHECK-LABEL: name: test_simple_2xs32_with_align
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484
- ; CHECK: G_STORE [[C3]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 2)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 2)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s32) = G_CONSTANT i32 4
%4:_(s32) = G_CONSTANT i32 5
More information about the llvm-commits
mailing list