[llvm] 3c70c8c - [AggressiveInstCombine] Combine consecutive loads which are being merged to form a wider load.
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 23 02:20:20 PDT 2022
Author: bipmis
Date: 2022-09-23T10:19:50+01:00
New Revision: 3c70c8c1df66500f67f77596b1e76cf0a8447ee5
URL: https://github.com/llvm/llvm-project/commit/3c70c8c1df66500f67f77596b1e76cf0a8447ee5
DIFF: https://github.com/llvm/llvm-project/commit/3c70c8c1df66500f67f77596b1e76cf0a8447ee5.diff
LOG: [AggressiveInstCombine] Combine consecutive loads which are being merged to form a wider load.
The patch simplifies some of the patterns as below
1. (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
2. (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
The pattern is indicative of the fact that the loads are being merged to a wider load and the only use of this pattern is with a wider load. In this case for a non-atomic/non-volatile loads reduce the pattern to a combined load which would improve the cost of inlining, unrolling, vectorization etc.
Differential Revision: https://reviews.llvm.org/D127392
Added:
Modified:
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 704b530baa4b0..2996561cefb20 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -50,6 +50,10 @@ STATISTIC(NumGuardedFunnelShifts,
"Number of guarded funnel shifts transformed into funnel shifts");
STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
+static cl::opt<unsigned> MaxInstrsToScan(
+ "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
+ cl::desc("Max number of instructions to scan for aggressive instcombine."));
+
namespace {
/// Contains expression pattern combiner logic.
/// This class provides both the logic to combine expression patterns and
@@ -635,18 +639,208 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
return true;
}
+/// This is used by foldLoadsRecursive() to capture a Root Load node which is
+/// of type or(load, load) and recursively build the wide load. Also capture the
+/// shift amount, zero extend type and loadSize.
+struct LoadOps {
+ LoadInst *Root = nullptr;
+ bool FoundRoot = false;
+ uint64_t LoadSize = 0;
+ Value *Shift = nullptr;
+ Type *ZextType;
+ AAMDNodes AATags;
+};
+
+// Identify and Merge consecutive loads recursively which is of the form
+// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
+// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
+static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
+ AliasAnalysis &AA) {
+ Value *ShAmt2 = nullptr;
+ Value *X;
+ Instruction *L1, *L2;
+
+ // Go to the last node with loads.
+ if (match(V, m_OneUse(m_c_Or(
+ m_Value(X),
+ m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))),
+ m_Value(ShAmt2)))))) ||
+ match(V, m_OneUse(m_Or(m_Value(X),
+ m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2))))))))
+ foldLoadsRecursive(X, LOps, DL, AA);
+ else
+ return false;
+
+ // Check if the pattern has loads
+ LoadInst *LI1 = LOps.Root;
+ Value *ShAmt1 = LOps.Shift;
+ if (LOps.FoundRoot == false &&
+ (match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) ||
+ match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))),
+ m_Value(ShAmt1)))))) {
+ LI1 = dyn_cast<LoadInst>(L1);
+ }
+ LoadInst *LI2 = dyn_cast<LoadInst>(L2);
+
+ // Check if loads are same, atomic, volatile and having same address space.
+ if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||
+ LI1->getPointerAddressSpace() != LI2->getPointerAddressSpace())
+ return false;
+
+ // Check if Loads come from same BB.
+ if (LI1->getParent() != LI2->getParent())
+ return false;
+
+ // Swap loads if LI1 comes later as we handle only forward loads.
+ if (!LI1->comesBefore(LI2)) {
+ std::swap(LI1, LI2);
+ std::swap(ShAmt1, ShAmt2);
+ }
+
+ // Find the data layout
+ bool IsBigEndian = DL.isBigEndian();
+
+ // Check if loads are consecutive and same size.
+ Value *Load1Ptr = LI1->getPointerOperand();
+ APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
+ Load1Ptr =
+ Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,
+ /* AllowNonInbounds */ true);
+
+ Value *Load2Ptr = LI2->getPointerOperand();
+ APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);
+ Load2Ptr =
+ Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
+ /* AllowNonInbounds */ true);
+
+ // Verify if both loads have same base pointers and load sizes are same.
+ uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
+ uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
+ if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2)
+ return false;
+
+ // Support Loadsizes greater or equal to 8bits and only power of 2.
+ if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1))
+ return false;
+
+ // Alias Analysis to check for store b/w the loads.
+ MemoryLocation Loc = MemoryLocation::get(LI2);
+ unsigned NumScanned = 0;
+ for (Instruction &Inst : make_range(LI1->getIterator(), LI2->getIterator())) {
+ if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
+ return false;
+ if (++NumScanned > MaxInstrsToScan)
+ return false;
+ }
+
+ // Big endian swap the shifts
+ if (IsBigEndian)
+ std::swap(ShAmt1, ShAmt2);
+
+ // Find Shifts values.
+ const APInt *Temp;
+ uint64_t Shift1 = 0, Shift2 = 0;
+ if (ShAmt1 && match(ShAmt1, m_APInt(Temp)))
+ Shift1 = Temp->getZExtValue();
+ if (ShAmt2 && match(ShAmt2, m_APInt(Temp)))
+ Shift2 = Temp->getZExtValue();
+
+ // First load is always LI1. This is where we put the new load.
+ // Use the merged load size available from LI1, if we already combined loads.
+ if (LOps.FoundRoot)
+ LoadSize1 = LOps.LoadSize;
+
+ // Verify if shift amount and load index aligns and verifies that loads
+ // are consecutive.
+ uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
+ uint64_t PrevSize =
+ DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
+ if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
+ return false;
+
+ // Update LOps
+ AAMDNodes AATags1 = LOps.AATags;
+ AAMDNodes AATags2 = LI2->getAAMetadata();
+ if (LOps.FoundRoot == false) {
+ LOps.FoundRoot = true;
+ LOps.LoadSize = LoadSize1 + LoadSize2;
+ AATags1 = LI1->getAAMetadata();
+ } else
+ LOps.LoadSize = LOps.LoadSize + LoadSize2;
+
+ // Concatenate the AATags of the Merged Loads.
+ LOps.AATags = AATags1.concat(AATags2);
+
+ LOps.Root = LI1;
+ LOps.Shift = ShAmt1;
+ LOps.ZextType = X->getType();
+ return true;
+}
+
+// For a given BB instruction, evaluate all loads in the chain that form a
+// pattern which suggests that the loads can be combined. The one and only use
+// of the loads is to form a wider load.
+static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
+ TargetTransformInfo &TTI, AliasAnalysis &AA) {
+ LoadOps LOps;
+ if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot)
+ return false;
+
+ IRBuilder<> Builder(&I);
+ LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;
+
+ // TTI based checks if we want to proceed with wider load
+ bool Allowed =
+ TTI.isTypeLegal(IntegerType::get(I.getContext(), LOps.LoadSize));
+ if (!Allowed)
+ return false;
+
+ unsigned AS = LI1->getPointerAddressSpace();
+ bool Fast = false;
+ Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
+ AS, LI1->getAlign(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // New load can be generated
+ Value *Load1Ptr = LI1->getPointerOperand();
+ Builder.SetInsertPoint(LI1);
+ NewLoad = Builder.CreateAlignedLoad(
+ IntegerType::get(Load1Ptr->getContext(), LOps.LoadSize), Load1Ptr,
+ LI1->getAlign(), LI1->isVolatile(), "");
+ NewLoad->takeName(LI1);
+ // Set the New Load AATags Metadata.
+ if (LOps.AATags)
+ NewLoad->setAAMetadata(LOps.AATags);
+
+ Value *NewOp = NewLoad;
+ // Check if zero extend needed.
+ if (LOps.ZextType)
+ NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);
+
+ // Check if shift needed. We need to shift with the amount of load1
+ // shift if not zero.
+ if (LOps.Shift)
+ NewOp = Builder.CreateShl(NewOp, LOps.Shift);
+ I.replaceAllUsesWith(NewOp);
+
+ return true;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
- TargetLibraryInfo &TLI) {
+ TargetLibraryInfo &TLI, AliasAnalysis &AA) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
if (!DT.isReachableFromEntry(&BB))
continue;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
// Walk the block backwards for efficiency. We're matching a chain of
// use->defs, so we're more likely to succeed by starting from the bottom.
// Also, we want to avoid matching partial patterns.
@@ -658,6 +852,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
MadeChange |= tryToRecognizePopCount(I);
MadeChange |= tryToFPToSat(I, TTI);
MadeChange |= tryToRecognizeTableBasedCttz(I);
+ MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA);
// NOTE: This function introduces erasing of the instruction `I`, so it
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
@@ -676,12 +871,13 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
/// This is the entry point for all transforms. Pass manager
diff erences are
/// handled in the callers of this function.
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
- TargetLibraryInfo &TLI, DominatorTree &DT) {
+ TargetLibraryInfo &TLI, DominatorTree &DT,
+ AliasAnalysis &AA) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI);
+ MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA);
return MadeChange;
}
@@ -696,6 +892,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
}
bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
@@ -703,7 +900,8 @@ bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return runImpl(F, AC, TTI, TLI, DT);
+ auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ return runImpl(F, AC, TTI, TLI, DT, AA);
}
PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
@@ -712,7 +910,8 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- if (!runImpl(F, AC, TTI, TLI, DT)) {
+ auto &AA = AM.getResult<AAManager>(F);
+ if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 59a459e8243a0..0caafa8dc3442 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="e-n64" | FileCheck %s --check-prefixes=ALL
-; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="E-n64" | FileCheck %s --check-prefixes=ALL
+; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="e-n64" | FileCheck %s --check-prefixes=ALL,LE
+; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="E-n64" | FileCheck %s --check-prefixes=ALL,BE
define i16 @loadCombine_2consecutive(ptr %p) {
;
@@ -46,25 +46,29 @@ define i16 @loadCombine_2consecutive_BE(ptr %p) {
}
define i32 @loadCombine_4consecutive(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -90,25 +94,29 @@ define i32 @loadCombine_4consecutive(ptr %p) {
}
define i32 @loadCombine_4consecutive_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_BE(
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -134,26 +142,31 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) {
}
define i32 @loadCombine_4consecutive_alias(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: store i8 10, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: store i8 10, ptr [[P]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: store i8 10, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -180,26 +193,31 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) {
}
define i32 @loadCombine_4consecutive_alias_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: store i8 10, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: store i8 10, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias_BE(
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT: store i8 10, ptr [[P]], align 1
+; BE-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -711,25 +729,40 @@ define i32 @loadCombine_parLoad1(ptr %p) {
}
define i128 @loadCombine_i128(ptr %p) {
-; ALL-LABEL: @loadCombine_i128(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
-; ALL-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
-; ALL-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
-; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; ALL-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
-; ALL-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
-; ALL-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
-; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
-; ALL-NEXT: [[S2:%.*]] = shl i128 [[E2]], 32
-; ALL-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64
-; ALL-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96
-; ALL-NEXT: [[O1:%.*]] = or i128 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]]
-; ALL-NEXT: ret i128 [[O3]]
+; LE-LABEL: @loadCombine_i128(
+; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4
+; LE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64
+; LE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96
+; LE-NEXT: [[O2:%.*]] = or i128 [[TMP1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; LE-NEXT: ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128(
+; BE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
+; BE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
+; BE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
+; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 32
+; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64
+; BE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96
+; BE-NEXT: [[O1:%.*]] = or i128 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; BE-NEXT: ret i128 [[O3]]
;
%p1 = getelementptr i32, ptr %p, i32 1
%p2 = getelementptr i32, ptr %p, i32 2
@@ -755,25 +788,40 @@ define i128 @loadCombine_i128(ptr %p) {
}
define i128 @loadCombine_i128_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_i128_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
-; ALL-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
-; ALL-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
-; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; ALL-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
-; ALL-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
-; ALL-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
-; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
-; ALL-NEXT: [[S1:%.*]] = shl i128 [[E1]], 96
-; ALL-NEXT: [[S2:%.*]] = shl i128 [[E2]], 64
-; ALL-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32
-; ALL-NEXT: [[O1:%.*]] = or i128 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]]
-; ALL-NEXT: ret i128 [[O3]]
+; LE-LABEL: @loadCombine_i128_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
+; LE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
+; LE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
+; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT: [[S1:%.*]] = shl i128 [[E1]], 96
+; LE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 64
+; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32
+; LE-NEXT: [[O1:%.*]] = or i128 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; LE-NEXT: ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128_BE(
+; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4
+; BE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; BE-NEXT: [[TMP2:%.*]] = shl i128 [[TMP1]], 64
+; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32
+; BE-NEXT: [[O2:%.*]] = or i128 [[TMP2]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; BE-NEXT: ret i128 [[O3]]
;
%p1 = getelementptr i32, ptr %p, i32 1
%p2 = getelementptr i32, ptr %p, i32 2
@@ -799,25 +847,29 @@ define i128 @loadCombine_i128_BE(ptr %p) {
}
define i64 @loadCombine_i64(ptr %p) {
-; ALL-LABEL: @loadCombine_i64(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
-; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
-; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
-; ALL-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
-; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 32
-; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 48
-; ALL-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
-; ALL-NEXT: ret i64 [[O3]]
+; LE-LABEL: @loadCombine_i64(
+; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; LE-NEXT: ret i64 [[L1]]
+;
+; BE-LABEL: @loadCombine_i64(
+; BE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; BE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
+; BE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
+; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; BE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
+; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 16
+; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 32
+; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 48
+; BE-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; BE-NEXT: ret i64 [[O3]]
;
%p1 = getelementptr i16, ptr %p, i32 1
%p2 = getelementptr i16, ptr %p, i32 2
@@ -843,25 +895,29 @@ define i64 @loadCombine_i64(ptr %p) {
}
define i64 @loadCombine_i64_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_i64_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
-; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
-; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
-; ALL-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
-; ALL-NEXT: [[S1:%.*]] = shl i64 [[E1]], 48
-; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 32
-; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
-; ALL-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]]
-; ALL-NEXT: ret i64 [[O3]]
+; LE-LABEL: @loadCombine_i64_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; LE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
+; LE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
+; LE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; LE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
+; LE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 48
+; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 32
+; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; LE-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]]
+; LE-NEXT: ret i64 [[O3]]
+;
+; BE-LABEL: @loadCombine_i64_BE(
+; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; BE-NEXT: ret i64 [[L1]]
;
%p1 = getelementptr i16, ptr %p, i32 1
%p2 = getelementptr i16, ptr %p, i32 2
@@ -975,46 +1031,51 @@ define i16 @loadCombine_2consecutive_separateptr(ptr %p, ptr %p2) {
}
define i64 @load64_farLoads(ptr %ptr) {
-; ALL-LABEL: @load64_farLoads(
-; ALL-NEXT: entry:
-; ALL-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
-; ALL-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64
-; ALL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
-; ALL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; ALL-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64
-; ALL-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 8
-; ALL-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[SHL]]
-; ALL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
-; ALL-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
-; ALL-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64
-; ALL-NEXT: [[SHL5:%.*]] = shl i64 [[CONV4]], 16
-; ALL-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]]
-; ALL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
-; ALL-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
-; ALL-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64
-; ALL-NEXT: [[SHL9:%.*]] = shl i64 [[CONV8]], 24
-; ALL-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]]
-; ALL-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
-; ALL-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1
-; ALL-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64
-; ALL-NEXT: [[SHL13:%.*]] = shl i64 [[CONV12]], 32
-; ALL-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]]
-; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
-; ALL-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
-; ALL-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64
-; ALL-NEXT: [[SHL17:%.*]] = shl i64 [[CONV16]], 40
-; ALL-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]]
-; ALL-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
-; ALL-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1
-; ALL-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64
-; ALL-NEXT: [[SHL21:%.*]] = shl i64 [[CONV20]], 48
-; ALL-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]]
-; ALL-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7
-; ALL-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1
-; ALL-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64
-; ALL-NEXT: [[SHL25:%.*]] = shl i64 [[CONV24]], 56
-; ALL-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]]
-; ALL-NEXT: ret i64 [[OR26]]
+; LE-LABEL: @load64_farLoads(
+; LE-NEXT: entry:
+; LE-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1
+; LE-NEXT: ret i64 [[TMP0]]
+;
+; BE-LABEL: @load64_farLoads(
+; BE-NEXT: entry:
+; BE-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
+; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64
+; BE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
+; BE-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; BE-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64
+; BE-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 8
+; BE-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[SHL]]
+; BE-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
+; BE-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
+; BE-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64
+; BE-NEXT: [[SHL5:%.*]] = shl i64 [[CONV4]], 16
+; BE-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]]
+; BE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
+; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
+; BE-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64
+; BE-NEXT: [[SHL9:%.*]] = shl i64 [[CONV8]], 24
+; BE-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]]
+; BE-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
+; BE-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1
+; BE-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64
+; BE-NEXT: [[SHL13:%.*]] = shl i64 [[CONV12]], 32
+; BE-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]]
+; BE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
+; BE-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
+; BE-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64
+; BE-NEXT: [[SHL17:%.*]] = shl i64 [[CONV16]], 40
+; BE-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]]
+; BE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
+; BE-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1
+; BE-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64
+; BE-NEXT: [[SHL21:%.*]] = shl i64 [[CONV20]], 48
+; BE-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]]
+; BE-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7
+; BE-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1
+; BE-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64
+; BE-NEXT: [[SHL25:%.*]] = shl i64 [[CONV24]], 56
+; BE-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]]
+; BE-NEXT: ret i64 [[OR26]]
;
entry:
%0 = load i8, ptr %ptr, align 1
@@ -1058,26 +1119,31 @@ entry:
}
define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) {
-; ALL-LABEL: @loadCombine_4consecutive_metadata(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
-; ALL-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_metadata(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0
+; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_metadata(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
+; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 9258fa4f59e82..5494b6f8d4c7e 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1,18 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="e-n64" -S | FileCheck %s --check-prefixes=ALL
-; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="E-n64" -S | FileCheck %s --check-prefixes=ALL
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="e-n64" -S | FileCheck %s --check-prefixes=ALL,LE
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="E-n64" -S | FileCheck %s --check-prefixes=ALL,BE
define i16 @loadCombine_2consecutive(ptr %p) {
;
-; ALL-LABEL: @loadCombine_2consecutive(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16
-; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8
-; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]]
-; ALL-NEXT: ret i16 [[O1]]
+; LE-LABEL: @loadCombine_2consecutive(
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i16 [[L1]]
+;
+; BE-LABEL: @loadCombine_2consecutive(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16
+; BE-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8
+; BE-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]]
+; BE-NEXT: ret i16 [[O1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%l1 = load i8, ptr %p
@@ -25,15 +29,19 @@ define i16 @loadCombine_2consecutive(ptr %p) {
}
define i16 @loadCombine_2consecutive_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16
-; ALL-NEXT: [[S1:%.*]] = shl i16 [[E1]], 8
-; ALL-NEXT: [[O1:%.*]] = or i16 [[S1]], [[E2]]
-; ALL-NEXT: ret i16 [[O1]]
+; LE-LABEL: @loadCombine_2consecutive_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16
+; LE-NEXT: [[S1:%.*]] = shl i16 [[E1]], 8
+; LE-NEXT: [[O1:%.*]] = or i16 [[S1]], [[E2]]
+; LE-NEXT: ret i16 [[O1]]
+;
+; BE-LABEL: @loadCombine_2consecutive_BE(
+; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P:%.*]], align 1
+; BE-NEXT: ret i16 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%l1 = load i8, ptr %p
@@ -46,25 +54,29 @@ define i16 @loadCombine_2consecutive_BE(ptr %p) {
}
define i32 @loadCombine_4consecutive(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -90,25 +102,29 @@ define i32 @loadCombine_4consecutive(ptr %p) {
}
define i32 @loadCombine_4consecutive_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_BE(
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -134,26 +150,31 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) {
}
define i32 @loadCombine_4consecutive_alias(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: store i8 10, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: store i8 10, ptr [[P]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: store i8 10, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -180,26 +201,31 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) {
}
define i32 @loadCombine_4consecutive_alias_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: store i8 10, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: store i8 10, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias_BE(
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT: store i8 10, ptr [[P]], align 1
+; BE-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -226,26 +252,42 @@ define i32 @loadCombine_4consecutive_alias_BE(ptr %p) {
}
define i32 @loadCombine_4consecutive_alias2(ptr %p, ptr %pstr) {
-; ALL-LABEL: @loadCombine_4consecutive_alias2(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias2(
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1
+; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias2(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -272,26 +314,42 @@ define i32 @loadCombine_4consecutive_alias2(ptr %p, ptr %pstr) {
}
define i32 @loadCombine_4consecutive_alias2_BE(ptr %p, ptr %pstr) {
-; ALL-LABEL: @loadCombine_4consecutive_alias2_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias2_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias2_BE(
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1
+; BE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; BE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; BE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -318,27 +376,44 @@ define i32 @loadCombine_4consecutive_alias2_BE(ptr %p, ptr %pstr) {
}
define i32 @loadCombine_4consecutive_alias3(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias3(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: store i8 10, ptr [[P3]], align 1
-; ALL-NEXT: store i8 5, ptr [[P]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias3(
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1
+; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: store i8 10, ptr [[P3]], align 1
+; LE-NEXT: store i8 5, ptr [[P]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias3(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: store i8 10, ptr [[P3]], align 1
+; BE-NEXT: store i8 5, ptr [[P]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -366,27 +441,44 @@ define i32 @loadCombine_4consecutive_alias3(ptr %p) {
}
define i32 @loadCombine_4consecutive_alias3_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias3_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: store i8 10, ptr [[P3]], align 1
-; ALL-NEXT: store i8 5, ptr [[P]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias3_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: store i8 10, ptr [[P3]], align 1
+; LE-NEXT: store i8 5, ptr [[P]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias3_BE(
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1
+; BE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; BE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: store i8 10, ptr [[P3]], align 1
+; BE-NEXT: store i8 5, ptr [[P]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; BE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -677,20 +769,30 @@ define i32 @loadCombine_4consecutive_hasOneUse4(ptr %p) {
}
define i32 @loadCombine_parLoad1(ptr %p) {
-; ALL-LABEL: @loadCombine_parLoad1(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_parLoad1(
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1
+; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]]
+; LE-NEXT: ret i32 [[O2]]
+;
+; BE-LABEL: @loadCombine_parLoad1(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: ret i32 [[O2]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -711,25 +813,40 @@ define i32 @loadCombine_parLoad1(ptr %p) {
}
define i128 @loadCombine_i128(ptr %p) {
-; ALL-LABEL: @loadCombine_i128(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
-; ALL-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
-; ALL-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
-; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; ALL-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
-; ALL-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
-; ALL-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
-; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
-; ALL-NEXT: [[S2:%.*]] = shl i128 [[E2]], 32
-; ALL-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64
-; ALL-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96
-; ALL-NEXT: [[O1:%.*]] = or i128 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]]
-; ALL-NEXT: ret i128 [[O3]]
+; LE-LABEL: @loadCombine_i128(
+; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4
+; LE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64
+; LE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96
+; LE-NEXT: [[O2:%.*]] = or i128 [[TMP1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; LE-NEXT: ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128(
+; BE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
+; BE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
+; BE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
+; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 32
+; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64
+; BE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96
+; BE-NEXT: [[O1:%.*]] = or i128 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; BE-NEXT: ret i128 [[O3]]
;
%p1 = getelementptr i32, ptr %p, i32 1
%p2 = getelementptr i32, ptr %p, i32 2
@@ -755,25 +872,40 @@ define i128 @loadCombine_i128(ptr %p) {
}
define i128 @loadCombine_i128_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_i128_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
-; ALL-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
-; ALL-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
-; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; ALL-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
-; ALL-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
-; ALL-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
-; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
-; ALL-NEXT: [[S1:%.*]] = shl i128 [[E1]], 96
-; ALL-NEXT: [[S2:%.*]] = shl i128 [[E2]], 64
-; ALL-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32
-; ALL-NEXT: [[O1:%.*]] = or i128 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]]
-; ALL-NEXT: ret i128 [[O3]]
+; LE-LABEL: @loadCombine_i128_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
+; LE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
+; LE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
+; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT: [[S1:%.*]] = shl i128 [[E1]], 96
+; LE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 64
+; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32
+; LE-NEXT: [[O1:%.*]] = or i128 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; LE-NEXT: ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128_BE(
+; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4
+; BE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; BE-NEXT: [[TMP2:%.*]] = shl i128 [[TMP1]], 64
+; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32
+; BE-NEXT: [[O2:%.*]] = or i128 [[TMP2]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; BE-NEXT: ret i128 [[O3]]
;
%p1 = getelementptr i32, ptr %p, i32 1
%p2 = getelementptr i32, ptr %p, i32 2
@@ -799,25 +931,29 @@ define i128 @loadCombine_i128_BE(ptr %p) {
}
define i64 @loadCombine_i64(ptr %p) {
-; ALL-LABEL: @loadCombine_i64(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
-; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
-; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
-; ALL-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
-; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 32
-; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 48
-; ALL-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
-; ALL-NEXT: ret i64 [[O3]]
+; LE-LABEL: @loadCombine_i64(
+; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; LE-NEXT: ret i64 [[L1]]
+;
+; BE-LABEL: @loadCombine_i64(
+; BE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; BE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
+; BE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
+; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; BE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
+; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 16
+; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 32
+; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 48
+; BE-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; BE-NEXT: ret i64 [[O3]]
;
%p1 = getelementptr i16, ptr %p, i32 1
%p2 = getelementptr i16, ptr %p, i32 2
@@ -843,25 +979,29 @@ define i64 @loadCombine_i64(ptr %p) {
}
define i64 @loadCombine_i64_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_i64_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
-; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
-; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
-; ALL-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
-; ALL-NEXT: [[S1:%.*]] = shl i64 [[E1]], 48
-; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 32
-; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
-; ALL-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]]
-; ALL-NEXT: ret i64 [[O3]]
+; LE-LABEL: @loadCombine_i64_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; LE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
+; LE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
+; LE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; LE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
+; LE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 48
+; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 32
+; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; LE-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]]
+; LE-NEXT: ret i64 [[O3]]
+;
+; BE-LABEL: @loadCombine_i64_BE(
+; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; BE-NEXT: ret i64 [[L1]]
;
%p1 = getelementptr i16, ptr %p, i32 1
%p2 = getelementptr i16, ptr %p, i32 2
@@ -976,46 +1116,51 @@ define i16 @loadCombine_2consecutive_separateptr(ptr %p, ptr %p2) {
}
define i64 @load64_farLoads(ptr %ptr) {
-; ALL-LABEL: @load64_farLoads(
-; ALL-NEXT: entry:
-; ALL-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
-; ALL-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64
-; ALL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
-; ALL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; ALL-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64
-; ALL-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 8
-; ALL-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[SHL]]
-; ALL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
-; ALL-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
-; ALL-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64
-; ALL-NEXT: [[SHL5:%.*]] = shl i64 [[CONV4]], 16
-; ALL-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]]
-; ALL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
-; ALL-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
-; ALL-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64
-; ALL-NEXT: [[SHL9:%.*]] = shl i64 [[CONV8]], 24
-; ALL-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]]
-; ALL-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
-; ALL-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1
-; ALL-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64
-; ALL-NEXT: [[SHL13:%.*]] = shl i64 [[CONV12]], 32
-; ALL-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]]
-; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
-; ALL-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
-; ALL-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64
-; ALL-NEXT: [[SHL17:%.*]] = shl i64 [[CONV16]], 40
-; ALL-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]]
-; ALL-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
-; ALL-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1
-; ALL-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64
-; ALL-NEXT: [[SHL21:%.*]] = shl i64 [[CONV20]], 48
-; ALL-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]]
-; ALL-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7
-; ALL-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1
-; ALL-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64
-; ALL-NEXT: [[SHL25:%.*]] = shl i64 [[CONV24]], 56
-; ALL-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]]
-; ALL-NEXT: ret i64 [[OR26]]
+; LE-LABEL: @load64_farLoads(
+; LE-NEXT: entry:
+; LE-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1
+; LE-NEXT: ret i64 [[TMP0]]
+;
+; BE-LABEL: @load64_farLoads(
+; BE-NEXT: entry:
+; BE-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
+; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64
+; BE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
+; BE-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; BE-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64
+; BE-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 8
+; BE-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[SHL]]
+; BE-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
+; BE-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
+; BE-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64
+; BE-NEXT: [[SHL5:%.*]] = shl i64 [[CONV4]], 16
+; BE-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]]
+; BE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
+; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
+; BE-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64
+; BE-NEXT: [[SHL9:%.*]] = shl i64 [[CONV8]], 24
+; BE-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]]
+; BE-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
+; BE-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1
+; BE-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64
+; BE-NEXT: [[SHL13:%.*]] = shl i64 [[CONV12]], 32
+; BE-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]]
+; BE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
+; BE-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
+; BE-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64
+; BE-NEXT: [[SHL17:%.*]] = shl i64 [[CONV16]], 40
+; BE-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]]
+; BE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
+; BE-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1
+; BE-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64
+; BE-NEXT: [[SHL21:%.*]] = shl i64 [[CONV20]], 48
+; BE-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]]
+; BE-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7
+; BE-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1
+; BE-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64
+; BE-NEXT: [[SHL25:%.*]] = shl i64 [[CONV24]], 56
+; BE-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]]
+; BE-NEXT: ret i64 [[OR26]]
;
entry:
%0 = load i8, ptr %ptr, align 1
@@ -1059,26 +1204,31 @@ entry:
}
define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) {
-; ALL-LABEL: @loadCombine_4consecutive_metadata(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
-; ALL-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_metadata(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0
+; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_metadata(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
+; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
More information about the llvm-commits
mailing list