[llvm] eae44c8 - [GlobalISel] Implement merging of stores of truncates.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 8 17:08:11 PDT 2021
Author: Amara Emerson
Date: 2021-09-08T17:06:33-07:00
New Revision: eae44c8a863b49d6419ccfceb4f38803d2d9c30c
URL: https://github.com/llvm/llvm-project/commit/eae44c8a863b49d6419ccfceb4f38803d2d9c30c
DIFF: https://github.com/llvm/llvm-project/commit/eae44c8a863b49d6419ccfceb4f38803d2d9c30c.diff
LOG: [GlobalISel] Implement merging of stores of truncates.
This is a port of a combine which matches a pattern where a wide type scalar
value is stored by several narrow stores. It folds it into a single store or
a BSWAP and a store if the targets supports it.
Assuming little endian target:
i8 *p = ...
i32 val = ...
p[0] = (val >> 0) & 0xFF;
p[1] = (val >> 8) & 0xFF;
p[2] = (val >> 16) & 0xFF;
p[3] = (val >> 24) & 0xFF;
=>
*((i32)p) = val;
On CTMark AArch64 -Os this results in a good amount of savings:
Program before after diff
SPASS 412792 412788 -0.0%
kc 432528 432512 -0.0%
lencod 430112 430096 -0.0%
consumer-typeset 419156 419128 -0.0%
bullet 475840 475752 -0.0%
tramp3d-v4 367760 367628 -0.0%
clamscan 383388 383204 -0.0%
pairlocalalign 249764 249476 -0.1%
7zip-benchmark 570100 568860 -0.2%
sqlite3 287628 286920 -0.2%
Geomean difference -0.1%
Differential Revision: https://reviews.llvm.org/D109419
Added:
llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 8d95b83cb1122..50cdfb3785cc2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -74,6 +74,14 @@ struct ShiftOfShiftedLogic {
using BuildFnTy = std::function<void(MachineIRBuilder &)>;
+struct MergeTruncStoresInfo {
+ SmallVector<GStore *> FoundStores;
+ GStore *LowestIdxStore = nullptr;
+ Register WideSrcVal;
+ bool NeedBSwap = false;
+ bool NeedRotate = false;
+};
+
using OperandBuildSteps =
SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;
struct InstructionBuildSteps {
@@ -523,6 +531,9 @@ class CombinerHelper {
/// bswap.
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
+ void applyTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
+
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 519edbe043de8..5697e1e592c09 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -594,6 +594,14 @@ def load_or_combine : GICombineRule<
[{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def truncstore_merge_matcdata : GIDefMatchData<"MergeTruncStoresInfo">;
+def truncstore_merge : GICombineRule<
+ (defs root:$root, truncstore_merge_matcdata:$info),
+ (match (wip_match_opcode G_STORE):$root,
+ [{ return Helper.matchTruncStoreMerge(*${root}, ${info}); }]),
+ (apply [{ Helper.applyTruncStoreMerge(*${root}, ${info}); }])>;
+
def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">;
def extend_through_phis : GICombineRule<
(defs root:$root, extend_through_phis_matchdata:$matchinfo),
@@ -733,8 +741,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shl,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
- div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract,
- constant_fold]>;
+ truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
+ form_bitfield_extract, constant_fold]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 62df6fed36c93..36eea66c0198e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -28,6 +28,8 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
#include <tuple>
@@ -3266,6 +3268,271 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return None;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return None;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits!= 0)
+ return None;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return None;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return None;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+ auto &StoreMI = cast<GStore>(MI);
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ // Search the block up for more stores.
+ // We use a search threshold of 10 instructions here because the combiner
+ // works top-down within a block, and we don't want to search an unbounded
+ // number of predecessor instructions trying to find matching stores.
+ // If we moved this optimization into a separate pass then we could probably
+ // use a more efficient search without having a hard-coded threshold.
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ return false;
+ }
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = getTargetLowering().allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresRequired; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
+ return false;
+
+ MatchInfo.NeedBSwap = NeedBswap;
+ MatchInfo.NeedRotate = NeedRotate;
+ MatchInfo.LowestIdxStore = LowestIdxStore;
+ MatchInfo.WideSrcVal = WideSrcVal;
+ MatchInfo.FoundStores = std::move(FoundStores);
+ return true;
+}
+
+void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+
+ Builder.setInstrAndDebugLoc(MI);
+ Register WideSrcVal = MatchInfo.WideSrcVal;
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+
+ if (MatchInfo.NeedBSwap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (MatchInfo.NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
+ MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
+ MatchInfo.LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : MatchInfo.FoundStores)
+ ST->eraseFromParent();
+}
+
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
new file mode 100644
index 0000000000000..8b0073d27c7da
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
@@ -0,0 +1,348 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-apple-ios -global-isel -global-isel-abort=1 | FileCheck %s
+
+define dso_local void @trunc_i16_to_i8(i16 %x, i8* %p) {
+; CHECK-LABEL: trunc_i16_to_i8:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strh w0, [x1]
+; CHECK-NEXT: ret
+ %t1 = trunc i16 %x to i8
+ %sh = lshr i16 %x, 8
+ %t2 = trunc i16 %sh to i8
+ store i8 %t1, i8* %p, align 1
+ %p1 = getelementptr inbounds i8, i8* %p, i64 1
+ store i8 %t2, i8* %p1, align 1
+ ret void
+}
+
+define dso_local void @trunc_i32_to_i8(i32 %x, i8* %p) {
+; CHECK-LABEL: trunc_i32_to_i8:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str w0, [x1]
+; CHECK-NEXT: ret
+ %t1 = trunc i32 %x to i8
+ %sh1 = lshr i32 %x, 8
+ %t2 = trunc i32 %sh1 to i8
+ %sh2 = lshr i32 %x, 16
+ %t3 = trunc i32 %sh2 to i8
+ %sh3 = lshr i32 %x, 24
+ %t4 = trunc i32 %sh3 to i8
+ store i8 %t1, i8* %p, align 1
+ %p1 = getelementptr inbounds i8, i8* %p, i64 1
+ store i8 %t2, i8* %p1, align 1
+ %p2 = getelementptr inbounds i8, i8* %p, i64 2
+ store i8 %t3, i8* %p2, align 1
+ %p3 = getelementptr inbounds i8, i8* %p, i64 3
+ store i8 %t4, i8* %p3, align 1
+ ret void
+}
+
+define dso_local void @trunc_i32_to_i16(i32 %x, i16* %p) {
+; CHECK-LABEL: trunc_i32_to_i16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str w0, [x1]
+; CHECK-NEXT: ret
+ %t1 = trunc i32 %x to i16
+ %sh = lshr i32 %x, 16
+ %t2 = trunc i32 %sh to i16
+ store i16 %t1, i16* %p, align 2
+ %p1 = getelementptr inbounds i16, i16* %p, i64 1
+ store i16 %t2, i16* %p1, align 2
+ ret void
+}
+
+define dso_local void @be_i32_to_i16(i32 %x, i16* %p0) {
+; CHECK-LABEL: be_i32_to_i16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ror w8, w0, #16
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %sh1 = lshr i32 %x, 16
+ %t0 = trunc i32 %x to i16
+ %t1 = trunc i32 %sh1 to i16
+ %p1 = getelementptr inbounds i16, i16* %p0, i64 1
+ store i16 %t0, i16* %p1, align 2
+ store i16 %t1, i16* %p0, align 2
+ ret void
+}
+
+define dso_local void @be_i32_to_i16_order(i32 %x, i16* %p0) {
+; CHECK-LABEL: be_i32_to_i16_order:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ror w8, w0, #16
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %sh1 = lshr i32 %x, 16
+ %t0 = trunc i32 %x to i16
+ %t1 = trunc i32 %sh1 to i16
+ %p1 = getelementptr inbounds i16, i16* %p0, i64 1
+ store i16 %t1, i16* %p0, align 2
+ store i16 %t0, i16* %p1, align 2
+ ret void
+}
+
+define dso_local void @trunc_i64_to_i8(i64 %x, i8* %p) {
+; CHECK-LABEL: trunc_i64_to_i8:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str x0, [x1]
+; CHECK-NEXT: ret
+ %t1 = trunc i64 %x to i8
+ %sh1 = lshr i64 %x, 8
+ %t2 = trunc i64 %sh1 to i8
+ %sh2 = lshr i64 %x, 16
+ %t3 = trunc i64 %sh2 to i8
+ %sh3 = lshr i64 %x, 24
+ %t4 = trunc i64 %sh3 to i8
+ %sh4 = lshr i64 %x, 32
+ %t5 = trunc i64 %sh4 to i8
+ %sh5 = lshr i64 %x, 40
+ %t6 = trunc i64 %sh5 to i8
+ %sh6 = lshr i64 %x, 48
+ %t7 = trunc i64 %sh6 to i8
+ %sh7 = lshr i64 %x, 56
+ %t8 = trunc i64 %sh7 to i8
+ store i8 %t1, i8* %p, align 1
+ %p1 = getelementptr inbounds i8, i8* %p, i64 1
+ store i8 %t2, i8* %p1, align 1
+ %p2 = getelementptr inbounds i8, i8* %p, i64 2
+ store i8 %t3, i8* %p2, align 1
+ %p3 = getelementptr inbounds i8, i8* %p, i64 3
+ store i8 %t4, i8* %p3, align 1
+ %p4 = getelementptr inbounds i8, i8* %p, i64 4
+ store i8 %t5, i8* %p4, align 1
+ %p5 = getelementptr inbounds i8, i8* %p, i64 5
+ store i8 %t6, i8* %p5, align 1
+ %p6 = getelementptr inbounds i8, i8* %p, i64 6
+ store i8 %t7, i8* %p6, align 1
+ %p7 = getelementptr inbounds i8, i8* %p, i64 7
+ store i8 %t8, i8* %p7, align 1
+ ret void
+}
+
+define dso_local void @trunc_i64_to_i16(i64 %x, i16* %p) {
+; CHECK-LABEL: trunc_i64_to_i16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str x0, [x1]
+; CHECK-NEXT: ret
+ %t1 = trunc i64 %x to i16
+ %sh1 = lshr i64 %x, 16
+ %t2 = trunc i64 %sh1 to i16
+ %sh2 = lshr i64 %x, 32
+ %t3 = trunc i64 %sh2 to i16
+ %sh3 = lshr i64 %x, 48
+ %t4 = trunc i64 %sh3 to i16
+ store i16 %t1, i16* %p, align 2
+ %p1 = getelementptr inbounds i16, i16* %p, i64 1
+ store i16 %t2, i16* %p1, align 2
+ %p2 = getelementptr inbounds i16, i16* %p, i64 2
+ store i16 %t3, i16* %p2, align 2
+ %p3 = getelementptr inbounds i16, i16* %p, i64 3
+ store i16 %t4, i16* %p3, align 2
+ ret void
+}
+
+define dso_local void @trunc_i64_to_i32(i64 %x, i32* %p) {
+; CHECK-LABEL: trunc_i64_to_i32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str x0, [x1]
+; CHECK-NEXT: ret
+ %t1 = trunc i64 %x to i32
+ %sh = lshr i64 %x, 32
+ %t2 = trunc i64 %sh to i32
+ store i32 %t1, i32* %p, align 4
+ %p1 = getelementptr inbounds i32, i32* %p, i64 1
+ store i32 %t2, i32* %p1, align 4
+ ret void
+}
+define dso_local void @be_i64_to_i32(i64 %x, i32* %p0) {
+; CHECK-LABEL: be_i64_to_i32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ror x8, x0, #32
+; CHECK-NEXT: str x8, [x1]
+; CHECK-NEXT: ret
+ %sh1 = lshr i64 %x, 32
+ %t0 = trunc i64 %x to i32
+ %t1 = trunc i64 %sh1 to i32
+ %p1 = getelementptr inbounds i32, i32* %p0, i64 1
+ store i32 %t0, i32* %p1, align 4
+ store i32 %t1, i32* %p0, align 4
+ ret void
+}
+
+define dso_local void @be_i64_to_i32_order(i64 %x, i32* %p0) {
+; CHECK-LABEL: be_i64_to_i32_order:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ror x8, x0, #32
+; CHECK-NEXT: str x8, [x1]
+; CHECK-NEXT: ret
+ %sh1 = lshr i64 %x, 32
+ %t0 = trunc i64 %x to i32
+ %t1 = trunc i64 %sh1 to i32
+ %p1 = getelementptr inbounds i32, i32* %p0, i64 1
+ store i32 %t1, i32* %p0, align 4
+ store i32 %t0, i32* %p1, align 4
+ ret void
+}
+
+; Negative tests.
+
+define void @merge_hole(i32 %x, i8* %p) {
+; CHECK-LABEL: merge_hole:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsr w8, w0, #16
+; CHECK-NEXT: strb w0, [x1]
+; CHECK-NEXT: strh w8, [x1, #2]
+; CHECK-NEXT: ret
+ %pcast = bitcast i8* %p to i16*
+ %p2 = getelementptr inbounds i16, i16* %pcast, i64 1
+ %x3 = trunc i32 %x to i8
+ store i8 %x3, i8* %p, align 1
+ %sh = lshr i32 %x, 16
+ %x01 = trunc i32 %sh to i16
+ store i16 %x01, i16* %p2, align 1
+ ret void
+}
+
+define void @merge_hole2(i32 %x, i8* %p) {
+; CHECK-LABEL: merge_hole2:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsr w8, w0, #16
+; CHECK-NEXT: strh w8, [x1, #2]
+; CHECK-NEXT: strb w0, [x1]
+; CHECK-NEXT: ret
+ %pcast = bitcast i8* %p to i16*
+ %p2 = getelementptr inbounds i16, i16* %pcast, i64 1
+ %sh = lshr i32 %x, 16
+ %x01 = trunc i32 %sh to i16
+ store i16 %x01, i16* %p2, align 1
+ %x3 = trunc i32 %x to i8
+ store i8 %x3, i8* %p, align 1
+ ret void
+}
+
+define void @merge_hole3(i32 %x, i8* %p) {
+; CHECK-LABEL: merge_hole3:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsr w8, w0, #16
+; CHECK-NEXT: strb w0, [x1, #1]
+; CHECK-NEXT: strh w8, [x1, #2]
+; CHECK-NEXT: ret
+ %p1 = getelementptr inbounds i8, i8* %p, i64 1
+ %pcast = bitcast i8* %p to i16*
+ %p2 = getelementptr inbounds i16, i16* %pcast, i64 1
+ %x3 = trunc i32 %x to i8
+ store i8 %x3, i8* %p1, align 1
+ %sh = lshr i32 %x, 16
+ %x01 = trunc i32 %sh to i16
+ store i16 %x01, i16* %p2, align 1
+ ret void
+}
+
+define void @merge_hole4(i32 %x, i8* %p) {
+; CHECK-LABEL: merge_hole4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsr w8, w0, #16
+; CHECK-NEXT: strb w0, [x1, #2]
+; CHECK-NEXT: strh w8, [x1]
+; CHECK-NEXT: ret
+ %pcast = bitcast i8* %p to i16*
+ %p2 = getelementptr inbounds i8, i8* %p, i64 2
+ %x3 = trunc i32 %x to i8
+ store i8 %x3, i8* %p2, align 1
+ %sh = lshr i32 %x, 16
+ %x01 = trunc i32 %sh to i16
+ store i16 %x01, i16* %pcast, align 1
+ ret void
+}
+
+define dso_local i32 @load_between_stores(i32 %x, i16* %p, i32 *%ptr) {
+; CHECK-LABEL: load_between_stores:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strh w0, [x1]
+; CHECK-NEXT: ldr w8, [x2]
+; CHECK-NEXT: lsr w9, w0, #16
+; CHECK-NEXT: strh w9, [x1, #2]
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %t1 = trunc i32 %x to i16
+ %sh = lshr i32 %x, 16
+ %t2 = trunc i32 %sh to i16
+ store i16 %t1, i16* %p, align 2
+ %ld = load i32, i32 *%ptr
+ %p1 = getelementptr inbounds i16, i16* %p, i64 1
+ store i16 %t2, i16* %p1, align 2
+ ret i32 %ld
+}
+
+define dso_local void @invalid_shift(i16 %x, i8* %p) {
+; CHECK-LABEL: invalid_shift:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: lsr w8, w8, #4
+; CHECK-NEXT: strb w0, [x1]
+; CHECK-NEXT: strb w8, [x1, #1]
+; CHECK-NEXT: ret
+ %t1 = trunc i16 %x to i8
+ %sh = lshr i16 %x, 4
+ %t2 = trunc i16 %sh to i8
+ store i8 %t1, i8* %p, align 1
+ %p1 = getelementptr inbounds i8, i8* %p, i64 1
+ store i8 %t2, i8* %p1, align 1
+ ret void
+}
+
+define dso_local void @missing_store(i32 %x, i8* %p) {
+; CHECK-LABEL: missing_store:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: lsr w8, w0, #8
+; CHECK-NEXT: lsr w9, w0, #24
+; CHECK-NEXT: strb w0, [x1]
+; CHECK-NEXT: strb w8, [x1, #1]
+; CHECK-NEXT: strb w9, [x1, #3]
+; CHECK-NEXT: ret
+ %t1 = trunc i32 %x to i8
+ %sh1 = lshr i32 %x, 8
+ %t2 = trunc i32 %sh1 to i8
+ %sh3 = lshr i32 %x, 24
+ %t4 = trunc i32 %sh3 to i8
+ store i8 %t1, i8* %p, align 1
+ %p1 = getelementptr inbounds i8, i8* %p, i64 1
+ store i8 %t2, i8* %p1, align 1
+ %p3 = getelementptr inbounds i8, i8* %p, i64 3
+ store i8 %t4, i8* %p3, align 1
+ ret void
+}
+
+define dso_local void @
diff erent_base_reg(i16 %x, i8* %p, i8 *%p2) {
+; CHECK-LABEL:
diff erent_base_reg:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: lsr w8, w8, #8
+; CHECK-NEXT: strb w0, [x1]
+; CHECK-NEXT: strb w8, [x2, #1]
+; CHECK-NEXT: ret
+ %t1 = trunc i16 %x to i8
+ %sh = lshr i16 %x, 8
+ %t2 = trunc i16 %sh to i8
+ store i8 %t1, i8* %p, align 1
+ %p1 = getelementptr inbounds i8, i8* %p2, i64 1
+ store i8 %t2, i8* %p1, align 1
+ ret void
+}
+
+define dso_local void @second_store_is_volatile(i16 %x, i8* %p) {
+; CHECK-LABEL: second_store_is_volatile:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: lsr w8, w8, #8
+; CHECK-NEXT: strb w0, [x1]
+; CHECK-NEXT: strb w8, [x1, #1]
+; CHECK-NEXT: ret
+ %t1 = trunc i16 %x to i8
+ %sh = lshr i16 %x, 8
+ %t2 = trunc i16 %sh to i8
+ store volatile i8 %t1, i8* %p, align 1
+ %p1 = getelementptr inbounds i8, i8* %p, i64 1
+ store i8 %t2, i8* %p1, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir
new file mode 100644
index 0000000000000..eea181d2db5ef
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir
@@ -0,0 +1,737 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: trunc_i16_to_i8
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: trunc_i16_to_i8
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: G_STORE [[TRUNC]](s16), [[COPY1]](p0) :: (store (s16), align 1)
+ ; CHECK: RET_ReallyLR
+ %2:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %1:_(p0) = COPY $x1
+ %4:_(s16) = G_CONSTANT i16 8
+ %3:_(s8) = G_TRUNC %0(s16)
+ %5:_(s16) = G_LSHR %0, %4(s16)
+ %6:_(s8) = G_TRUNC %5(s16)
+ G_STORE %3(s8), %1(p0) :: (store (s8))
+ %7:_(s64) = G_CONSTANT i64 1
+ %8:_(p0) = G_PTR_ADD %1, %7(s64)
+ G_STORE %6(s8), %8(p0) :: (store (s8))
+ RET_ReallyLR
+
+...
+---
+name: trunc_i32_to_i8
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: trunc_i32_to_i8
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32), align 1)
+ ; CHECK: RET_ReallyLR
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %3:_(s32) = G_CONSTANT i32 8
+ %6:_(s32) = G_CONSTANT i32 16
+ %9:_(s32) = G_CONSTANT i32 24
+ %2:_(s8) = G_TRUNC %0(s32)
+ %4:_(s32) = G_LSHR %0, %3(s32)
+ %5:_(s8) = G_TRUNC %4(s32)
+ %7:_(s32) = G_LSHR %0, %6(s32)
+ %8:_(s8) = G_TRUNC %7(s32)
+ %10:_(s32) = G_LSHR %0, %9(s32)
+ %11:_(s8) = G_TRUNC %10(s32)
+ G_STORE %2(s8), %1(p0) :: (store (s8))
+ %12:_(s64) = G_CONSTANT i64 1
+ %13:_(p0) = G_PTR_ADD %1, %12(s64)
+ G_STORE %5(s8), %13(p0) :: (store (s8))
+ %14:_(s64) = G_CONSTANT i64 2
+ %15:_(p0) = G_PTR_ADD %1, %14(s64)
+ G_STORE %8(s8), %15(p0) :: (store (s8))
+ %16:_(s64) = G_CONSTANT i64 3
+ %17:_(p0) = G_PTR_ADD %1, %16(s64)
+ G_STORE %11(s8), %17(p0) :: (store (s8))
+ RET_ReallyLR
+
+...
+---
+name: trunc_i32_to_i16
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: trunc_i32_to_i16
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32), align 2)
+ ; CHECK: RET_ReallyLR
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %3:_(s32) = G_CONSTANT i32 16
+ %2:_(s16) = G_TRUNC %0(s32)
+ %4:_(s32) = G_LSHR %0, %3(s32)
+ %5:_(s16) = G_TRUNC %4(s32)
+ G_STORE %2(s16), %1(p0) :: (store (s16))
+ %6:_(s64) = G_CONSTANT i64 2
+ %7:_(p0) = G_PTR_ADD %1, %6(s64)
+ G_STORE %5(s16), %7(p0) :: (store (s16))
+ RET_ReallyLR
+
+...
+---
+name: be_i32_to_i16
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: be_i32_to_i16
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[C]](s32)
+ ; CHECK: G_STORE [[ROTR]](s32), [[COPY1]](p0) :: (store (s32), align 2)
+ ; CHECK: RET_ReallyLR
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %2:_(s32) = G_CONSTANT i32 16
+ %3:_(s32) = G_LSHR %0, %2(s32)
+ %4:_(s16) = G_TRUNC %0(s32)
+ %5:_(s16) = G_TRUNC %3(s32)
+ %6:_(s64) = G_CONSTANT i64 2
+ %7:_(p0) = G_PTR_ADD %1, %6(s64)
+ G_STORE %4(s16), %7(p0) :: (store (s16))
+ G_STORE %5(s16), %1(p0) :: (store (s16))
+ RET_ReallyLR
+
+...
+---
+name: be_i32_to_i16_order
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: be_i32_to_i16_order
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[C]](s32)
+ ; CHECK: G_STORE [[ROTR]](s32), [[COPY1]](p0) :: (store (s32), align 2)
+ ; CHECK: RET_ReallyLR
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %2:_(s32) = G_CONSTANT i32 16
+ %3:_(s32) = G_LSHR %0, %2(s32)
+ %4:_(s16) = G_TRUNC %0(s32)
+ %5:_(s16) = G_TRUNC %3(s32)
+ %6:_(s64) = G_CONSTANT i64 2
+ %7:_(p0) = G_PTR_ADD %1, %6(s64)
+ G_STORE %5(s16), %1(p0) :: (store (s16))
+ G_STORE %4(s16), %7(p0) :: (store (s16))
+ RET_ReallyLR
+
+...
+---
+name: trunc_i64_to_i8
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: trunc_i64_to_i8
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64), align 1)
+ ; CHECK: RET_ReallyLR
+ %0:_(s64) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %3:_(s64) = G_CONSTANT i64 8
+ %6:_(s64) = G_CONSTANT i64 16
+ %9:_(s64) = G_CONSTANT i64 24
+ %12:_(s64) = G_CONSTANT i64 32
+ %15:_(s64) = G_CONSTANT i64 40
+ %18:_(s64) = G_CONSTANT i64 48
+ %21:_(s64) = G_CONSTANT i64 56
+ %2:_(s8) = G_TRUNC %0(s64)
+ %4:_(s64) = G_LSHR %0, %3(s64)
+ %5:_(s8) = G_TRUNC %4(s64)
+ %7:_(s64) = G_LSHR %0, %6(s64)
+ %8:_(s8) = G_TRUNC %7(s64)
+ %10:_(s64) = G_LSHR %0, %9(s64)
+ %11:_(s8) = G_TRUNC %10(s64)
+ %13:_(s64) = G_LSHR %0, %12(s64)
+ %14:_(s8) = G_TRUNC %13(s64)
+ %16:_(s64) = G_LSHR %0, %15(s64)
+ %17:_(s8) = G_TRUNC %16(s64)
+ %19:_(s64) = G_LSHR %0, %18(s64)
+ %20:_(s8) = G_TRUNC %19(s64)
+ %22:_(s64) = G_LSHR %0, %21(s64)
+ %23:_(s8) = G_TRUNC %22(s64)
+ G_STORE %2(s8), %1(p0) :: (store (s8))
+ %24:_(s64) = G_CONSTANT i64 1
+ %25:_(p0) = G_PTR_ADD %1, %24(s64)
+ G_STORE %5(s8), %25(p0) :: (store (s8))
+ %26:_(s64) = G_CONSTANT i64 2
+ %27:_(p0) = G_PTR_ADD %1, %26(s64)
+ G_STORE %8(s8), %27(p0) :: (store (s8))
+ %28:_(s64) = G_CONSTANT i64 3
+ %29:_(p0) = G_PTR_ADD %1, %28(s64)
+ G_STORE %11(s8), %29(p0) :: (store (s8))
+ %30:_(s64) = G_CONSTANT i64 4
+ %31:_(p0) = G_PTR_ADD %1, %30(s64)
+ G_STORE %14(s8), %31(p0) :: (store (s8))
+ %32:_(s64) = G_CONSTANT i64 5
+ %33:_(p0) = G_PTR_ADD %1, %32(s64)
+ G_STORE %17(s8), %33(p0) :: (store (s8))
+ %34:_(s64) = G_CONSTANT i64 6
+ %35:_(p0) = G_PTR_ADD %1, %34(s64)
+ G_STORE %20(s8), %35(p0) :: (store (s8))
+ %36:_(s64) = G_CONSTANT i64 7
+ %37:_(p0) = G_PTR_ADD %1, %36(s64)
+ G_STORE %23(s8), %37(p0) :: (store (s8))
+ RET_ReallyLR
+
+...
+---
+name: trunc_i64_to_i16
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: trunc_i64_to_i16
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64), align 2)
+ ; CHECK: RET_ReallyLR
+ %0:_(s64) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %3:_(s64) = G_CONSTANT i64 16
+ %6:_(s64) = G_CONSTANT i64 32
+ %9:_(s64) = G_CONSTANT i64 48
+ %2:_(s16) = G_TRUNC %0(s64)
+ %4:_(s64) = G_LSHR %0, %3(s64)
+ %5:_(s16) = G_TRUNC %4(s64)
+ %7:_(s64) = G_LSHR %0, %6(s64)
+ %8:_(s16) = G_TRUNC %7(s64)
+ %10:_(s64) = G_LSHR %0, %9(s64)
+ %11:_(s16) = G_TRUNC %10(s64)
+ G_STORE %2(s16), %1(p0) :: (store (s16))
+ %12:_(s64) = G_CONSTANT i64 2
+ %13:_(p0) = G_PTR_ADD %1, %12(s64)
+ G_STORE %5(s16), %13(p0) :: (store (s16))
+ %14:_(s64) = G_CONSTANT i64 4
+ %15:_(p0) = G_PTR_ADD %1, %14(s64)
+ G_STORE %8(s16), %15(p0) :: (store (s16))
+ %16:_(s64) = G_CONSTANT i64 6
+ %17:_(p0) = G_PTR_ADD %1, %16(s64)
+ G_STORE %11(s16), %17(p0) :: (store (s16))
+ RET_ReallyLR
+
+...
+---
+name: trunc_i64_to_i32
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: trunc_i64_to_i32
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64), align 4)
+ ; CHECK: RET_ReallyLR
+ %0:_(s64) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %3:_(s64) = G_CONSTANT i64 32
+ %2:_(s32) = G_TRUNC %0(s64)
+ %4:_(s64) = G_LSHR %0, %3(s64)
+ %5:_(s32) = G_TRUNC %4(s64)
+ G_STORE %2(s32), %1(p0) :: (store (s32))
+ %6:_(s64) = G_CONSTANT i64 4
+ %7:_(p0) = G_PTR_ADD %1, %6(s64)
+ G_STORE %5(s32), %7(p0) :: (store (s32))
+ RET_ReallyLR
+
+...
+---
+name: be_i64_to_i32
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: be_i64_to_i32
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK: [[ROTR:%[0-9]+]]:_(s64) = G_ROTR [[COPY]], [[C]](s64)
+ ; CHECK: G_STORE [[ROTR]](s64), [[COPY1]](p0) :: (store (s64), align 4)
+ ; CHECK: RET_ReallyLR
+ %0:_(s64) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 32
+ %3:_(s64) = G_LSHR %0, %2(s64)
+ %4:_(s32) = G_TRUNC %0(s64)
+ %5:_(s32) = G_TRUNC %3(s64)
+ %6:_(s64) = G_CONSTANT i64 4
+ %7:_(p0) = G_PTR_ADD %1, %6(s64)
+ G_STORE %4(s32), %7(p0) :: (store (s32))
+ G_STORE %5(s32), %1(p0) :: (store (s32))
+ RET_ReallyLR
+
+...
+---
+name: be_i64_to_i32_order
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: be_i64_to_i32_order
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK: [[ROTR:%[0-9]+]]:_(s64) = G_ROTR [[COPY]], [[C]](s64)
+ ; CHECK: G_STORE [[ROTR]](s64), [[COPY1]](p0) :: (store (s64), align 4)
+ ; CHECK: RET_ReallyLR
+ %0:_(s64) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 32
+ %3:_(s64) = G_LSHR %0, %2(s64)
+ %4:_(s32) = G_TRUNC %0(s64)
+ %5:_(s32) = G_TRUNC %3(s64)
+ %6:_(s64) = G_CONSTANT i64 4
+ %7:_(p0) = G_PTR_ADD %1, %6(s64)
+ G_STORE %5(s32), %1(p0) :: (store (s32))
+ G_STORE %4(s32), %7(p0) :: (store (s32))
+ RET_ReallyLR
+
+...
+---
+name: merge_hole
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: merge_hole
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: G_STORE [[TRUNC]](s8), [[COPY1]](p0) :: (store (s8))
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; CHECK: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16), align 1)
+ ; CHECK: RET_ReallyLR
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %5:_(s32) = G_CONSTANT i32 16
+ %2:_(s64) = G_CONSTANT i64 2
+ %3:_(p0) = G_PTR_ADD %1, %2(s64)
+ %4:_(s8) = G_TRUNC %0(s32)
+ G_STORE %4(s8), %1(p0) :: (store (s8))
+ %6:_(s32) = G_LSHR %0, %5(s32)
+ %7:_(s16) = G_TRUNC %6(s32)
+ G_STORE %7(s16), %3(p0) :: (store (s16), align 1)
+ RET_ReallyLR
+
+...
+---
+name: merge_hole2
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: merge_hole2
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; CHECK: G_STORE [[TRUNC]](s16), [[PTR_ADD]](p0) :: (store (s16), align 1)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY1]](p0) :: (store (s8))
+ ; CHECK: RET_ReallyLR
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %4:_(s32) = G_CONSTANT i32 16
+ %2:_(s64) = G_CONSTANT i64 2
+ %3:_(p0) = G_PTR_ADD %1, %2(s64)
+ %5:_(s32) = G_LSHR %0, %4(s32)
+ %6:_(s16) = G_TRUNC %5(s32)
+ G_STORE %6(s16), %3(p0) :: (store (s16), align 1)
+ %7:_(s8) = G_TRUNC %0(s32)
+ G_STORE %7(s8), %1(p0) :: (store (s8))
+ RET_ReallyLR
+
+...
+---
+name: merge_hole3
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: merge_hole3
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: G_STORE [[TRUNC]](s8), [[PTR_ADD]](p0) :: (store (s8))
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; CHECK: G_STORE [[TRUNC1]](s16), [[PTR_ADD1]](p0) :: (store (s16), align 1)
+ ; CHECK: RET_ReallyLR
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %7:_(s32) = G_CONSTANT i32 16
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(p0) = G_PTR_ADD %1, %2(s64)
+ %4:_(s64) = G_CONSTANT i64 2
+ %5:_(p0) = G_PTR_ADD %1, %4(s64)
+ %6:_(s8) = G_TRUNC %0(s32)
+ G_STORE %6(s8), %3(p0) :: (store (s8))
+ %8:_(s32) = G_LSHR %0, %7(s32)
+ %9:_(s16) = G_TRUNC %8(s32)
+ G_STORE %9(s16), %5(p0) :: (store (s16), align 1)
+ RET_ReallyLR
+
+...
+---
+name: merge_hole4
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: merge_hole4
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: G_STORE [[TRUNC]](s8), [[PTR_ADD]](p0) :: (store (s8))
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; CHECK: G_STORE [[TRUNC1]](s16), [[COPY1]](p0) :: (store (s16), align 1)
+ ; CHECK: RET_ReallyLR
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %5:_(s32) = G_CONSTANT i32 16
+ %2:_(s64) = G_CONSTANT i64 2
+ %3:_(p0) = G_PTR_ADD %1, %2(s64)
+ %4:_(s8) = G_TRUNC %0(s32)
+ G_STORE %4(s8), %3(p0) :: (store (s8))
+ %6:_(s32) = G_LSHR %0, %5(s32)
+ %7:_(s16) = G_TRUNC %6(s32)
+ G_STORE %7(s16), %1(p0) :: (store (s16), align 1)
+ RET_ReallyLR
+
+...
+---
+name: load_between_stores
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+body: |
+ bb.1:
+ liveins: $w0, $x1, $x2
+
+ ; CHECK-LABEL: name: load_between_stores
+ ; CHECK: liveins: $w0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; CHECK: G_STORE [[TRUNC]](s16), [[COPY1]](p0) :: (store (s16))
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32))
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %2:_(p0) = COPY $x2
+ %4:_(s32) = G_CONSTANT i32 16
+ %3:_(s16) = G_TRUNC %0(s32)
+ %5:_(s32) = G_LSHR %0, %4(s32)
+ %6:_(s16) = G_TRUNC %5(s32)
+ G_STORE %3(s16), %1(p0) :: (store (s16))
+ %7:_(s32) = G_LOAD %2(p0) :: (load (s32))
+ %8:_(s64) = G_CONSTANT i64 2
+ %9:_(p0) = G_PTR_ADD %1, %8(s64)
+ G_STORE %6(s16), %9(p0) :: (store (s16))
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: invalid_shift
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: invalid_shift
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+ ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY1]](p0) :: (store (s8))
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK: G_STORE [[TRUNC2]](s8), [[PTR_ADD]](p0) :: (store (s8))
+ ; CHECK: RET_ReallyLR
+ %2:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %1:_(p0) = COPY $x1
+ %4:_(s16) = G_CONSTANT i16 4
+ %3:_(s8) = G_TRUNC %0(s16)
+ %5:_(s16) = G_LSHR %0, %4(s16)
+ %6:_(s8) = G_TRUNC %5(s16)
+ G_STORE %3(s8), %1(p0) :: (store (s8))
+ %7:_(s64) = G_CONSTANT i64 1
+ %8:_(p0) = G_PTR_ADD %1, %7(s64)
+ G_STORE %6(s8), %8(p0) :: (store (s8))
+ RET_ReallyLR
+
+...
+---
+name: missing_store
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: missing_store
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32)
+ ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32)
+ ; CHECK: G_STORE [[TRUNC]](s8), [[COPY1]](p0) :: (store (s8))
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+ ; CHECK: G_STORE [[TRUNC1]](s8), [[PTR_ADD]](p0) :: (store (s8))
+ ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+ ; CHECK: G_STORE [[TRUNC2]](s8), [[PTR_ADD1]](p0) :: (store (s8))
+ ; CHECK: RET_ReallyLR
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %3:_(s32) = G_CONSTANT i32 8
+ %6:_(s32) = G_CONSTANT i32 24
+ %2:_(s8) = G_TRUNC %0(s32)
+ %4:_(s32) = G_LSHR %0, %3(s32)
+ %5:_(s8) = G_TRUNC %4(s32)
+ %7:_(s32) = G_LSHR %0, %6(s32)
+ %8:_(s8) = G_TRUNC %7(s32)
+ G_STORE %2(s8), %1(p0) :: (store (s8))
+ %9:_(s64) = G_CONSTANT i64 1
+ %10:_(p0) = G_PTR_ADD %1, %9(s64)
+ G_STORE %5(s8), %10(p0) :: (store (s8))
+ %11:_(s64) = G_CONSTANT i64 3
+ %12:_(p0) = G_PTR_ADD %1, %11(s64)
+ G_STORE %8(s8), %12(p0) :: (store (s8))
+ RET_ReallyLR
+
+...
+---
+name:
diff erent_base_reg
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+body: |
+ bb.1:
+ liveins: $w0, $x1, $x2
+
+ ; CHECK-LABEL: name:
diff erent_base_reg
+ ; CHECK: liveins: $w0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+ ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY1]](p0) :: (store (s8))
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C1]](s64)
+ ; CHECK: G_STORE [[TRUNC2]](s8), [[PTR_ADD]](p0) :: (store (s8))
+ ; CHECK: RET_ReallyLR
+ %3:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %3(s32)
+ %1:_(p0) = COPY $x1
+ %2:_(p0) = COPY $x2
+ %5:_(s16) = G_CONSTANT i16 8
+ %4:_(s8) = G_TRUNC %0(s16)
+ %6:_(s16) = G_LSHR %0, %5(s16)
+ %7:_(s8) = G_TRUNC %6(s16)
+ G_STORE %4(s8), %1(p0) :: (store (s8))
+ %8:_(s64) = G_CONSTANT i64 1
+ %9:_(p0) = G_PTR_ADD %2, %8(s64)
+ G_STORE %7(s8), %9(p0) :: (store (s8))
+ RET_ReallyLR
+
+...
+---
+name: second_store_is_volatile
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+body: |
+ bb.1:
+ liveins: $w0, $x1
+
+ ; CHECK-LABEL: name: second_store_is_volatile
+ ; CHECK: liveins: $w0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+ ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY1]](p0) :: (volatile store (s8))
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK: G_STORE [[TRUNC2]](s8), [[PTR_ADD]](p0) :: (store (s8))
+ ; CHECK: RET_ReallyLR
+ %2:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %1:_(p0) = COPY $x1
+ %4:_(s16) = G_CONSTANT i16 8
+ %3:_(s8) = G_TRUNC %0(s16)
+ %5:_(s16) = G_LSHR %0, %4(s16)
+ %6:_(s8) = G_TRUNC %5(s16)
+ G_STORE %3(s8), %1(p0) :: (volatile store (s8))
+ %7:_(s64) = G_CONSTANT i64 1
+ %8:_(p0) = G_PTR_ADD %1, %7(s64)
+ G_STORE %6(s8), %8(p0) :: (store (s8))
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list