[llvm] 3c70c8c - [AggressiveInstCombine] Combine consecutive loads which are being merged to form a wider load.

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 23 02:20:20 PDT 2022


Author: bipmis
Date: 2022-09-23T10:19:50+01:00
New Revision: 3c70c8c1df66500f67f77596b1e76cf0a8447ee5

URL: https://github.com/llvm/llvm-project/commit/3c70c8c1df66500f67f77596b1e76cf0a8447ee5
DIFF: https://github.com/llvm/llvm-project/commit/3c70c8c1df66500f67f77596b1e76cf0a8447ee5.diff

LOG: [AggressiveInstCombine] Combine consecutive loads which are being merged to form a wider load.

The patch simplifies some of the patterns as below

1. (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
2. (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)

The pattern is indicative of the fact that the loads are being merged to a wider load and the only use of this pattern is with a wider load. In this case for a non-atomic/non-volatile loads reduce the pattern to a combined load which would improve the cost of inlining, unrolling, vectorization etc.

Differential Revision: https://reviews.llvm.org/D127392

Added: 
    

Modified: 
    llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
    llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
    llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 704b530baa4b0..2996561cefb20 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -50,6 +50,10 @@ STATISTIC(NumGuardedFunnelShifts,
           "Number of guarded funnel shifts transformed into funnel shifts");
 STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
 
+static cl::opt<unsigned> MaxInstrsToScan(
+    "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
+    cl::desc("Max number of instructions to scan for aggressive instcombine."));
+
 namespace {
 /// Contains expression pattern combiner logic.
 /// This class provides both the logic to combine expression patterns and
@@ -635,18 +639,208 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
   return true;
 }
 
+/// This is used by foldLoadsRecursive() to capture a Root Load node which is
+/// of type or(load, load) and recursively build the wide load. Also capture the
+/// shift amount, zero extend type and loadSize.
+struct LoadOps {
+  LoadInst *Root = nullptr;
+  bool FoundRoot = false;
+  uint64_t LoadSize = 0;
+  Value *Shift = nullptr;
+  Type *ZextType;
+  AAMDNodes AATags;
+};
+
+// Identify and Merge consecutive loads recursively which is of the form
+// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
+// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
+static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
+                               AliasAnalysis &AA) {
+  Value *ShAmt2 = nullptr;
+  Value *X;
+  Instruction *L1, *L2;
+
+  // Go to the last node with loads.
+  if (match(V, m_OneUse(m_c_Or(
+                   m_Value(X),
+                   m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))),
+                                  m_Value(ShAmt2)))))) ||
+      match(V, m_OneUse(m_Or(m_Value(X),
+                             m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2))))))))
+    foldLoadsRecursive(X, LOps, DL, AA);
+  else
+    return false;
+
+  // Check if the pattern has loads
+  LoadInst *LI1 = LOps.Root;
+  Value *ShAmt1 = LOps.Shift;
+  if (LOps.FoundRoot == false &&
+      (match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) ||
+       match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))),
+                               m_Value(ShAmt1)))))) {
+    LI1 = dyn_cast<LoadInst>(L1);
+  }
+  LoadInst *LI2 = dyn_cast<LoadInst>(L2);
+
+  // Check if loads are same, atomic, volatile and having same address space.
+  if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||
+      LI1->getPointerAddressSpace() != LI2->getPointerAddressSpace())
+    return false;
+
+  // Check if Loads come from same BB.
+  if (LI1->getParent() != LI2->getParent())
+    return false;
+
+  // Swap loads if LI1 comes later as we handle only forward loads.
+  if (!LI1->comesBefore(LI2)) {
+    std::swap(LI1, LI2);
+    std::swap(ShAmt1, ShAmt2);
+  }
+
+  // Find the data layout
+  bool IsBigEndian = DL.isBigEndian();
+
+  // Check if loads are consecutive and same size.
+  Value *Load1Ptr = LI1->getPointerOperand();
+  APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
+  Load1Ptr =
+      Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,
+                                                  /* AllowNonInbounds */ true);
+
+  Value *Load2Ptr = LI2->getPointerOperand();
+  APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);
+  Load2Ptr =
+      Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
+                                                  /* AllowNonInbounds */ true);
+
+  // Verify if both loads have same base pointers and load sizes are same.
+  uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
+  uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
+  if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2)
+    return false;
+
+  // Support Loadsizes greater or equal to 8bits and only power of 2.
+  if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1))
+    return false;
+
+  // Alias Analysis to check for store b/w the loads.
+  MemoryLocation Loc = MemoryLocation::get(LI2);
+  unsigned NumScanned = 0;
+  for (Instruction &Inst : make_range(LI1->getIterator(), LI2->getIterator())) {
+    if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
+      return false;
+    if (++NumScanned > MaxInstrsToScan)
+      return false;
+  }
+
+  // Big endian swap the shifts
+  if (IsBigEndian)
+    std::swap(ShAmt1, ShAmt2);
+
+  // Find Shifts values.
+  const APInt *Temp;
+  uint64_t Shift1 = 0, Shift2 = 0;
+  if (ShAmt1 && match(ShAmt1, m_APInt(Temp)))
+    Shift1 = Temp->getZExtValue();
+  if (ShAmt2 && match(ShAmt2, m_APInt(Temp)))
+    Shift2 = Temp->getZExtValue();
+
+  // First load is always LI1. This is where we put the new load.
+  // Use the merged load size available from LI1, if we already combined loads.
+  if (LOps.FoundRoot)
+    LoadSize1 = LOps.LoadSize;
+
+  // Verify if shift amount and load index aligns and verifies that loads
+  // are consecutive.
+  uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
+  uint64_t PrevSize =
+      DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
+  if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
+    return false;
+
+  // Update LOps
+  AAMDNodes AATags1 = LOps.AATags;
+  AAMDNodes AATags2 = LI2->getAAMetadata();
+  if (LOps.FoundRoot == false) {
+    LOps.FoundRoot = true;
+    LOps.LoadSize = LoadSize1 + LoadSize2;
+    AATags1 = LI1->getAAMetadata();
+  } else
+    LOps.LoadSize = LOps.LoadSize + LoadSize2;
+
+  // Concatenate the AATags of the Merged Loads.
+  LOps.AATags = AATags1.concat(AATags2);
+
+  LOps.Root = LI1;
+  LOps.Shift = ShAmt1;
+  LOps.ZextType = X->getType();
+  return true;
+}
+
+// For a given BB instruction, evaluate all loads in the chain that form a
+// pattern which suggests that the loads can be combined. The one and only use
+// of the loads is to form a wider load.
+static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
+                                 TargetTransformInfo &TTI, AliasAnalysis &AA) {
+  LoadOps LOps;
+  if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot)
+    return false;
+
+  IRBuilder<> Builder(&I);
+  LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;
+
+  // TTI based checks if we want to proceed with wider load
+  bool Allowed =
+      TTI.isTypeLegal(IntegerType::get(I.getContext(), LOps.LoadSize));
+  if (!Allowed)
+    return false;
+
+  unsigned AS = LI1->getPointerAddressSpace();
+  bool Fast = false;
+  Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
+                                               AS, LI1->getAlign(), &Fast);
+  if (!Allowed || !Fast)
+    return false;
+
+  // New load can be generated
+  Value *Load1Ptr = LI1->getPointerOperand();
+  Builder.SetInsertPoint(LI1);
+  NewLoad = Builder.CreateAlignedLoad(
+      IntegerType::get(Load1Ptr->getContext(), LOps.LoadSize), Load1Ptr,
+      LI1->getAlign(), LI1->isVolatile(), "");
+  NewLoad->takeName(LI1);
+  // Set the New Load AATags Metadata.
+  if (LOps.AATags)
+    NewLoad->setAAMetadata(LOps.AATags);
+
+  Value *NewOp = NewLoad;
+  // Check if zero extend needed.
+  if (LOps.ZextType)
+    NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);
+
+  // Check if shift needed. We need to shift with the amount of load1
+  // shift if not zero.
+  if (LOps.Shift)
+    NewOp = Builder.CreateShl(NewOp, LOps.Shift);
+  I.replaceAllUsesWith(NewOp);
+
+  return true;
+}
+
 /// This is the entry point for folds that could be implemented in regular
 /// InstCombine, but they are separated because they are not expected to
 /// occur frequently and/or have more than a constant-length pattern match.
 static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
                                 TargetTransformInfo &TTI,
-                                TargetLibraryInfo &TLI) {
+                                TargetLibraryInfo &TLI, AliasAnalysis &AA) {
   bool MadeChange = false;
   for (BasicBlock &BB : F) {
     // Ignore unreachable basic blocks.
     if (!DT.isReachableFromEntry(&BB))
       continue;
 
+    const DataLayout &DL = F.getParent()->getDataLayout();
+
     // Walk the block backwards for efficiency. We're matching a chain of
     // use->defs, so we're more likely to succeed by starting from the bottom.
     // Also, we want to avoid matching partial patterns.
@@ -658,6 +852,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
       MadeChange |= tryToRecognizePopCount(I);
       MadeChange |= tryToFPToSat(I, TTI);
       MadeChange |= tryToRecognizeTableBasedCttz(I);
+      MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA);
       // NOTE: This function introduces erasing of the instruction `I`, so it
       // needs to be called at the end of this sequence, otherwise we may make
       // bugs.
@@ -676,12 +871,13 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
 /// This is the entry point for all transforms. Pass manager 
diff erences are
 /// handled in the callers of this function.
 static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
-                    TargetLibraryInfo &TLI, DominatorTree &DT) {
+                    TargetLibraryInfo &TLI, DominatorTree &DT,
+                    AliasAnalysis &AA) {
   bool MadeChange = false;
   const DataLayout &DL = F.getParent()->getDataLayout();
   TruncInstCombine TIC(AC, TLI, DL, DT);
   MadeChange |= TIC.run(F);
-  MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI);
+  MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA);
   return MadeChange;
 }
 
@@ -696,6 +892,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
   AU.addPreserved<BasicAAWrapperPass>();
   AU.addPreserved<DominatorTreeWrapperPass>();
   AU.addPreserved<GlobalsAAWrapperPass>();
+  AU.addRequired<AAResultsWrapperPass>();
 }
 
 bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
@@ -703,7 +900,8 @@ bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  return runImpl(F, AC, TTI, TLI, DT);
+  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+  return runImpl(F, AC, TTI, TLI, DT, AA);
 }
 
 PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
@@ -712,7 +910,8 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-  if (!runImpl(F, AC, TTI, TLI, DT)) {
+  auto &AA = AM.getResult<AAManager>(F);
+  if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
   }

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 59a459e8243a0..0caafa8dc3442 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="e-n64" | FileCheck %s --check-prefixes=ALL
-; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="E-n64" | FileCheck %s --check-prefixes=ALL
+; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="e-n64" | FileCheck %s --check-prefixes=ALL,LE
+; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="E-n64" | FileCheck %s --check-prefixes=ALL,BE
 
 define i16 @loadCombine_2consecutive(ptr %p) {
 ;
@@ -46,25 +46,29 @@ define i16 @loadCombine_2consecutive_BE(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive(
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT:    ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -90,25 +94,29 @@ define i32 @loadCombine_4consecutive(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT:    ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_BE(
+; BE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT:    ret i32 [[L1]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -134,26 +142,31 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive_alias(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    store i8 10, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias(
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT:    store i8 10, ptr [[P]], align 1
+; LE-NEXT:    ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT:    store i8 10, ptr [[P]], align 1
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -180,26 +193,31 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive_alias_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    store i8 10, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT:    store i8 10, ptr [[P]], align 1
+; LE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT:    ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias_BE(
+; BE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT:    store i8 10, ptr [[P]], align 1
+; BE-NEXT:    ret i32 [[L1]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -711,25 +729,40 @@ define i32 @loadCombine_parLoad1(ptr %p) {
 }
 
 define i128 @loadCombine_i128(ptr %p) {
-; ALL-LABEL: @loadCombine_i128(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i32, ptr [[P]], align 4
-; ALL-NEXT:    [[L2:%.*]] = load i32, ptr [[P1]], align 4
-; ALL-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
-; ALL-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; ALL-NEXT:    [[E1:%.*]] = zext i32 [[L1]] to i128
-; ALL-NEXT:    [[E2:%.*]] = zext i32 [[L2]] to i128
-; ALL-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
-; ALL-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
-; ALL-NEXT:    [[S2:%.*]] = shl i128 [[E2]], 32
-; ALL-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 64
-; ALL-NEXT:    [[S4:%.*]] = shl i128 [[E4]], 96
-; ALL-NEXT:    [[O1:%.*]] = or i128 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i128 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[S4]]
-; ALL-NEXT:    ret i128 [[O3]]
+; LE-LABEL: @loadCombine_i128(
+; LE-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i64, ptr [[P]], align 4
+; LE-NEXT:    [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; LE-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 64
+; LE-NEXT:    [[S4:%.*]] = shl i128 [[E4]], 96
+; LE-NEXT:    [[O2:%.*]] = or i128 [[TMP1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; LE-NEXT:    ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i32, ptr [[P]], align 4
+; BE-NEXT:    [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; BE-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT:    [[E1:%.*]] = zext i32 [[L1]] to i128
+; BE-NEXT:    [[E2:%.*]] = zext i32 [[L2]] to i128
+; BE-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT:    [[S2:%.*]] = shl i128 [[E2]], 32
+; BE-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 64
+; BE-NEXT:    [[S4:%.*]] = shl i128 [[E4]], 96
+; BE-NEXT:    [[O1:%.*]] = or i128 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; BE-NEXT:    ret i128 [[O3]]
 ;
   %p1 = getelementptr i32, ptr %p, i32 1
   %p2 = getelementptr i32, ptr %p, i32 2
@@ -755,25 +788,40 @@ define i128 @loadCombine_i128(ptr %p) {
 }
 
 define i128 @loadCombine_i128_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_i128_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i32, ptr [[P]], align 4
-; ALL-NEXT:    [[L2:%.*]] = load i32, ptr [[P1]], align 4
-; ALL-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
-; ALL-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; ALL-NEXT:    [[E1:%.*]] = zext i32 [[L1]] to i128
-; ALL-NEXT:    [[E2:%.*]] = zext i32 [[L2]] to i128
-; ALL-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
-; ALL-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
-; ALL-NEXT:    [[S1:%.*]] = shl i128 [[E1]], 96
-; ALL-NEXT:    [[S2:%.*]] = shl i128 [[E2]], 64
-; ALL-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 32
-; ALL-NEXT:    [[O1:%.*]] = or i128 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i128 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[E4]]
-; ALL-NEXT:    ret i128 [[O3]]
+; LE-LABEL: @loadCombine_i128_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P]], align 4
+; LE-NEXT:    [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; LE-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT:    [[E1:%.*]] = zext i32 [[L1]] to i128
+; LE-NEXT:    [[E2:%.*]] = zext i32 [[L2]] to i128
+; LE-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT:    [[S1:%.*]] = shl i128 [[E1]], 96
+; LE-NEXT:    [[S2:%.*]] = shl i128 [[E2]], 64
+; LE-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 32
+; LE-NEXT:    [[O1:%.*]] = or i128 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; LE-NEXT:    ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128_BE(
+; BE-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i64, ptr [[P]], align 4
+; BE-NEXT:    [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; BE-NEXT:    [[TMP2:%.*]] = shl i128 [[TMP1]], 64
+; BE-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 32
+; BE-NEXT:    [[O2:%.*]] = or i128 [[TMP2]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; BE-NEXT:    ret i128 [[O3]]
 ;
   %p1 = getelementptr i32, ptr %p, i32 1
   %p2 = getelementptr i32, ptr %p, i32 2
@@ -799,25 +847,29 @@ define i128 @loadCombine_i128_BE(ptr %p) {
 }
 
 define i64 @loadCombine_i64(ptr %p) {
-; ALL-LABEL: @loadCombine_i64(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT:    [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT:    [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT:    [[L4:%.*]] = load i16, ptr [[P3]], align 2
-; ALL-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i64
-; ALL-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i64
-; ALL-NEXT:    [[E3:%.*]] = zext i16 [[L3]] to i64
-; ALL-NEXT:    [[E4:%.*]] = zext i16 [[L4]] to i64
-; ALL-NEXT:    [[S2:%.*]] = shl i64 [[E2]], 16
-; ALL-NEXT:    [[S3:%.*]] = shl i64 [[E3]], 32
-; ALL-NEXT:    [[S4:%.*]] = shl i64 [[E4]], 48
-; ALL-NEXT:    [[O1:%.*]] = or i64 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i64 [[O2]], [[S4]]
-; ALL-NEXT:    ret i64 [[O3]]
+; LE-LABEL: @loadCombine_i64(
+; LE-NEXT:    [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; LE-NEXT:    ret i64 [[L1]]
+;
+; BE-LABEL: @loadCombine_i64(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 2
+; BE-NEXT:    [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; BE-NEXT:    [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; BE-NEXT:    [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; BE-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i64
+; BE-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i64
+; BE-NEXT:    [[E3:%.*]] = zext i16 [[L3]] to i64
+; BE-NEXT:    [[E4:%.*]] = zext i16 [[L4]] to i64
+; BE-NEXT:    [[S2:%.*]] = shl i64 [[E2]], 16
+; BE-NEXT:    [[S3:%.*]] = shl i64 [[E3]], 32
+; BE-NEXT:    [[S4:%.*]] = shl i64 [[E4]], 48
+; BE-NEXT:    [[O1:%.*]] = or i64 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; BE-NEXT:    ret i64 [[O3]]
 ;
   %p1 = getelementptr i16, ptr %p, i32 1
   %p2 = getelementptr i16, ptr %p, i32 2
@@ -843,25 +895,29 @@ define i64 @loadCombine_i64(ptr %p) {
 }
 
 define i64 @loadCombine_i64_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_i64_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT:    [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT:    [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT:    [[L4:%.*]] = load i16, ptr [[P3]], align 2
-; ALL-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i64
-; ALL-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i64
-; ALL-NEXT:    [[E3:%.*]] = zext i16 [[L3]] to i64
-; ALL-NEXT:    [[E4:%.*]] = zext i16 [[L4]] to i64
-; ALL-NEXT:    [[S1:%.*]] = shl i64 [[E1]], 48
-; ALL-NEXT:    [[S2:%.*]] = shl i64 [[E2]], 32
-; ALL-NEXT:    [[S3:%.*]] = shl i64 [[E3]], 16
-; ALL-NEXT:    [[O1:%.*]] = or i64 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i64 [[O2]], [[E4]]
-; ALL-NEXT:    ret i64 [[O3]]
+; LE-LABEL: @loadCombine_i64_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 2
+; LE-NEXT:    [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; LE-NEXT:    [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; LE-NEXT:    [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; LE-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i64
+; LE-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i64
+; LE-NEXT:    [[E3:%.*]] = zext i16 [[L3]] to i64
+; LE-NEXT:    [[E4:%.*]] = zext i16 [[L4]] to i64
+; LE-NEXT:    [[S1:%.*]] = shl i64 [[E1]], 48
+; LE-NEXT:    [[S2:%.*]] = shl i64 [[E2]], 32
+; LE-NEXT:    [[S3:%.*]] = shl i64 [[E3]], 16
+; LE-NEXT:    [[O1:%.*]] = or i64 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i64 [[O2]], [[E4]]
+; LE-NEXT:    ret i64 [[O3]]
+;
+; BE-LABEL: @loadCombine_i64_BE(
+; BE-NEXT:    [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; BE-NEXT:    ret i64 [[L1]]
 ;
   %p1 = getelementptr i16, ptr %p, i32 1
   %p2 = getelementptr i16, ptr %p, i32 2
@@ -975,46 +1031,51 @@ define i16 @loadCombine_2consecutive_separateptr(ptr %p, ptr %p2) {
 }
 
 define i64 @load64_farLoads(ptr %ptr) {
-; ALL-LABEL: @load64_farLoads(
-; ALL-NEXT:  entry:
-; ALL-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
-; ALL-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i64
-; ALL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
-; ALL-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; ALL-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i64
-; ALL-NEXT:    [[SHL:%.*]] = shl i64 [[CONV2]], 8
-; ALL-NEXT:    [[OR:%.*]] = or i64 [[CONV]], [[SHL]]
-; ALL-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
-; ALL-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
-; ALL-NEXT:    [[CONV4:%.*]] = zext i8 [[TMP2]] to i64
-; ALL-NEXT:    [[SHL5:%.*]] = shl i64 [[CONV4]], 16
-; ALL-NEXT:    [[OR6:%.*]] = or i64 [[OR]], [[SHL5]]
-; ALL-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
-; ALL-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
-; ALL-NEXT:    [[CONV8:%.*]] = zext i8 [[TMP3]] to i64
-; ALL-NEXT:    [[SHL9:%.*]] = shl i64 [[CONV8]], 24
-; ALL-NEXT:    [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]]
-; ALL-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
-; ALL-NEXT:    [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1
-; ALL-NEXT:    [[CONV12:%.*]] = zext i8 [[TMP4]] to i64
-; ALL-NEXT:    [[SHL13:%.*]] = shl i64 [[CONV12]], 32
-; ALL-NEXT:    [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]]
-; ALL-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
-; ALL-NEXT:    [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
-; ALL-NEXT:    [[CONV16:%.*]] = zext i8 [[TMP5]] to i64
-; ALL-NEXT:    [[SHL17:%.*]] = shl i64 [[CONV16]], 40
-; ALL-NEXT:    [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]]
-; ALL-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
-; ALL-NEXT:    [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1
-; ALL-NEXT:    [[CONV20:%.*]] = zext i8 [[TMP6]] to i64
-; ALL-NEXT:    [[SHL21:%.*]] = shl i64 [[CONV20]], 48
-; ALL-NEXT:    [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]]
-; ALL-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7
-; ALL-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1
-; ALL-NEXT:    [[CONV24:%.*]] = zext i8 [[TMP7]] to i64
-; ALL-NEXT:    [[SHL25:%.*]] = shl i64 [[CONV24]], 56
-; ALL-NEXT:    [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]]
-; ALL-NEXT:    ret i64 [[OR26]]
+; LE-LABEL: @load64_farLoads(
+; LE-NEXT:  entry:
+; LE-NEXT:    [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1
+; LE-NEXT:    ret i64 [[TMP0]]
+;
+; BE-LABEL: @load64_farLoads(
+; BE-NEXT:  entry:
+; BE-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
+; BE-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i64
+; BE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
+; BE-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; BE-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i64
+; BE-NEXT:    [[SHL:%.*]] = shl i64 [[CONV2]], 8
+; BE-NEXT:    [[OR:%.*]] = or i64 [[CONV]], [[SHL]]
+; BE-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
+; BE-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
+; BE-NEXT:    [[CONV4:%.*]] = zext i8 [[TMP2]] to i64
+; BE-NEXT:    [[SHL5:%.*]] = shl i64 [[CONV4]], 16
+; BE-NEXT:    [[OR6:%.*]] = or i64 [[OR]], [[SHL5]]
+; BE-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
+; BE-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
+; BE-NEXT:    [[CONV8:%.*]] = zext i8 [[TMP3]] to i64
+; BE-NEXT:    [[SHL9:%.*]] = shl i64 [[CONV8]], 24
+; BE-NEXT:    [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]]
+; BE-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
+; BE-NEXT:    [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1
+; BE-NEXT:    [[CONV12:%.*]] = zext i8 [[TMP4]] to i64
+; BE-NEXT:    [[SHL13:%.*]] = shl i64 [[CONV12]], 32
+; BE-NEXT:    [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]]
+; BE-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
+; BE-NEXT:    [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
+; BE-NEXT:    [[CONV16:%.*]] = zext i8 [[TMP5]] to i64
+; BE-NEXT:    [[SHL17:%.*]] = shl i64 [[CONV16]], 40
+; BE-NEXT:    [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]]
+; BE-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
+; BE-NEXT:    [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1
+; BE-NEXT:    [[CONV20:%.*]] = zext i8 [[TMP6]] to i64
+; BE-NEXT:    [[SHL21:%.*]] = shl i64 [[CONV20]], 48
+; BE-NEXT:    [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]]
+; BE-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7
+; BE-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1
+; BE-NEXT:    [[CONV24:%.*]] = zext i8 [[TMP7]] to i64
+; BE-NEXT:    [[SHL25:%.*]] = shl i64 [[CONV24]], 56
+; BE-NEXT:    [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]]
+; BE-NEXT:    ret i64 [[OR26]]
 ;
 entry:
   %0 = load i8, ptr %ptr, align 1
@@ -1058,26 +1119,31 @@ entry:
 }
 
 define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) {
-; ALL-LABEL: @loadCombine_4consecutive_metadata(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
-; ALL-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_metadata(
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0
+; LE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; LE-NEXT:    ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_metadata(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
+; BE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 9258fa4f59e82..5494b6f8d4c7e 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1,18 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="e-n64" -S | FileCheck %s --check-prefixes=ALL
-; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="E-n64" -S | FileCheck %s --check-prefixes=ALL
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="e-n64" -S | FileCheck %s --check-prefixes=ALL,LE
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="E-n64" -S | FileCheck %s --check-prefixes=ALL,BE
 
 define i16 @loadCombine_2consecutive(ptr %p) {
 ;
-; ALL-LABEL: @loadCombine_2consecutive(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i16
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i16
-; ALL-NEXT:    [[S2:%.*]] = shl i16 [[E2]], 8
-; ALL-NEXT:    [[O1:%.*]] = or i16 [[E1]], [[S2]]
-; ALL-NEXT:    ret i16 [[O1]]
+; LE-LABEL: @loadCombine_2consecutive(
+; LE-NEXT:    [[L1:%.*]] = load i16, ptr [[P:%.*]], align 1
+; LE-NEXT:    ret i16 [[L1]]
+;
+; BE-LABEL: @loadCombine_2consecutive(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i16
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i16
+; BE-NEXT:    [[S2:%.*]] = shl i16 [[E2]], 8
+; BE-NEXT:    [[O1:%.*]] = or i16 [[E1]], [[S2]]
+; BE-NEXT:    ret i16 [[O1]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %l1 = load i8, ptr %p
@@ -25,15 +29,19 @@ define i16 @loadCombine_2consecutive(ptr %p) {
 }
 
 define i16 @loadCombine_2consecutive_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i16
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i16
-; ALL-NEXT:    [[S1:%.*]] = shl i16 [[E1]], 8
-; ALL-NEXT:    [[O1:%.*]] = or i16 [[S1]], [[E2]]
-; ALL-NEXT:    ret i16 [[O1]]
+; LE-LABEL: @loadCombine_2consecutive_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i16
+; LE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i16
+; LE-NEXT:    [[S1:%.*]] = shl i16 [[E1]], 8
+; LE-NEXT:    [[O1:%.*]] = or i16 [[S1]], [[E2]]
+; LE-NEXT:    ret i16 [[O1]]
+;
+; BE-LABEL: @loadCombine_2consecutive_BE(
+; BE-NEXT:    [[L1:%.*]] = load i16, ptr [[P:%.*]], align 1
+; BE-NEXT:    ret i16 [[L1]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %l1 = load i8, ptr %p
@@ -46,25 +54,29 @@ define i16 @loadCombine_2consecutive_BE(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive(
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT:    ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -90,25 +102,29 @@ define i32 @loadCombine_4consecutive(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT:    ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_BE(
+; BE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT:    ret i32 [[L1]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -134,26 +150,31 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive_alias(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    store i8 10, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias(
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT:    store i8 10, ptr [[P]], align 1
+; LE-NEXT:    ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT:    store i8 10, ptr [[P]], align 1
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -180,26 +201,31 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive_alias_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    store i8 10, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT:    store i8 10, ptr [[P]], align 1
+; LE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT:    ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias_BE(
+; BE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT:    store i8 10, ptr [[P]], align 1
+; BE-NEXT:    ret i32 [[L1]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -226,26 +252,42 @@ define i32 @loadCombine_4consecutive_alias_BE(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive_alias2(ptr %p, ptr %pstr) {
-; ALL-LABEL: @loadCombine_4consecutive_alias2(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    store i8 10, ptr [[PSTR:%.*]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias2(
+; LE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 1
+; LE-NEXT:    [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; LE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT:    store i8 10, ptr [[PSTR:%.*]], align 1
+; LE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; LE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; LE-NEXT:    [[O2:%.*]] = or i32 [[TMP1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; LE-NEXT:    ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias2(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT:    store i8 10, ptr [[PSTR:%.*]], align 1
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -272,26 +314,42 @@ define i32 @loadCombine_4consecutive_alias2(ptr %p, ptr %pstr) {
 }
 
 define i32 @loadCombine_4consecutive_alias2_BE(ptr %p, ptr %pstr) {
-; ALL-LABEL: @loadCombine_4consecutive_alias2_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    store i8 10, ptr [[PSTR:%.*]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias2_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT:    store i8 10, ptr [[PSTR:%.*]], align 1
+; LE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT:    ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias2_BE(
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 1
+; BE-NEXT:    [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; BE-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 16
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT:    store i8 10, ptr [[PSTR:%.*]], align 1
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
+; BE-NEXT:    [[O2:%.*]] = or i32 [[TMP2]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -318,27 +376,44 @@ define i32 @loadCombine_4consecutive_alias2_BE(ptr %p, ptr %pstr) {
 }
 
 define i32 @loadCombine_4consecutive_alias3(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias3(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    store i8 10, ptr [[P3]], align 1
-; ALL-NEXT:    store i8 5, ptr [[P]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias3(
+; LE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 1
+; LE-NEXT:    [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; LE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT:    store i8 10, ptr [[P3]], align 1
+; LE-NEXT:    store i8 5, ptr [[P]], align 1
+; LE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; LE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; LE-NEXT:    [[O2:%.*]] = or i32 [[TMP1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; LE-NEXT:    ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias3(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT:    store i8 10, ptr [[P3]], align 1
+; BE-NEXT:    store i8 5, ptr [[P]], align 1
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -366,27 +441,44 @@ define i32 @loadCombine_4consecutive_alias3(ptr %p) {
 }
 
 define i32 @loadCombine_4consecutive_alias3_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_alias3_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    store i8 10, ptr [[P3]], align 1
-; ALL-NEXT:    store i8 5, ptr [[P]], align 1
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_alias3_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT:    store i8 10, ptr [[P3]], align 1
+; LE-NEXT:    store i8 5, ptr [[P]], align 1
+; LE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT:    [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT:    ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias3_BE(
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 1
+; BE-NEXT:    [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; BE-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 16
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT:    store i8 10, ptr [[P3]], align 1
+; BE-NEXT:    store i8 5, ptr [[P]], align 1
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 8
+; BE-NEXT:    [[O2:%.*]] = or i32 [[TMP2]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -677,20 +769,30 @@ define i32 @loadCombine_4consecutive_hasOneUse4(ptr %p) {
 }
 
 define i32 @loadCombine_parLoad1(ptr %p) {
-; ALL-LABEL: @loadCombine_parLoad1(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    ret i32 [[O2]]
+; LE-LABEL: @loadCombine_parLoad1(
+; LE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; LE-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 1
+; LE-NEXT:    [[TMP1:%.*]] = zext i16 [[L1]] to i32
+; LE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; LE-NEXT:    [[O2:%.*]] = or i32 [[TMP1]], [[S3]]
+; LE-NEXT:    ret i32 [[O2]]
+;
+; BE-LABEL: @loadCombine_parLoad1(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT:    ret i32 [[O2]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2
@@ -711,25 +813,40 @@ define i32 @loadCombine_parLoad1(ptr %p) {
 }
 
 define i128 @loadCombine_i128(ptr %p) {
-; ALL-LABEL: @loadCombine_i128(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i32, ptr [[P]], align 4
-; ALL-NEXT:    [[L2:%.*]] = load i32, ptr [[P1]], align 4
-; ALL-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
-; ALL-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; ALL-NEXT:    [[E1:%.*]] = zext i32 [[L1]] to i128
-; ALL-NEXT:    [[E2:%.*]] = zext i32 [[L2]] to i128
-; ALL-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
-; ALL-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
-; ALL-NEXT:    [[S2:%.*]] = shl i128 [[E2]], 32
-; ALL-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 64
-; ALL-NEXT:    [[S4:%.*]] = shl i128 [[E4]], 96
-; ALL-NEXT:    [[O1:%.*]] = or i128 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i128 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[S4]]
-; ALL-NEXT:    ret i128 [[O3]]
+; LE-LABEL: @loadCombine_i128(
+; LE-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i64, ptr [[P]], align 4
+; LE-NEXT:    [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; LE-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 64
+; LE-NEXT:    [[S4:%.*]] = shl i128 [[E4]], 96
+; LE-NEXT:    [[O2:%.*]] = or i128 [[TMP1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; LE-NEXT:    ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i32, ptr [[P]], align 4
+; BE-NEXT:    [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; BE-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT:    [[E1:%.*]] = zext i32 [[L1]] to i128
+; BE-NEXT:    [[E2:%.*]] = zext i32 [[L2]] to i128
+; BE-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT:    [[S2:%.*]] = shl i128 [[E2]], 32
+; BE-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 64
+; BE-NEXT:    [[S4:%.*]] = shl i128 [[E4]], 96
+; BE-NEXT:    [[O1:%.*]] = or i128 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; BE-NEXT:    ret i128 [[O3]]
 ;
   %p1 = getelementptr i32, ptr %p, i32 1
   %p2 = getelementptr i32, ptr %p, i32 2
@@ -755,25 +872,40 @@ define i128 @loadCombine_i128(ptr %p) {
 }
 
 define i128 @loadCombine_i128_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_i128_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i32, ptr [[P]], align 4
-; ALL-NEXT:    [[L2:%.*]] = load i32, ptr [[P1]], align 4
-; ALL-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
-; ALL-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; ALL-NEXT:    [[E1:%.*]] = zext i32 [[L1]] to i128
-; ALL-NEXT:    [[E2:%.*]] = zext i32 [[L2]] to i128
-; ALL-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
-; ALL-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
-; ALL-NEXT:    [[S1:%.*]] = shl i128 [[E1]], 96
-; ALL-NEXT:    [[S2:%.*]] = shl i128 [[E2]], 64
-; ALL-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 32
-; ALL-NEXT:    [[O1:%.*]] = or i128 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i128 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[E4]]
-; ALL-NEXT:    ret i128 [[O3]]
+; LE-LABEL: @loadCombine_i128_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P]], align 4
+; LE-NEXT:    [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; LE-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT:    [[E1:%.*]] = zext i32 [[L1]] to i128
+; LE-NEXT:    [[E2:%.*]] = zext i32 [[L2]] to i128
+; LE-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT:    [[S1:%.*]] = shl i128 [[E1]], 96
+; LE-NEXT:    [[S2:%.*]] = shl i128 [[E2]], 64
+; LE-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 32
+; LE-NEXT:    [[O1:%.*]] = or i128 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; LE-NEXT:    ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128_BE(
+; BE-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i64, ptr [[P]], align 4
+; BE-NEXT:    [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; BE-NEXT:    [[TMP2:%.*]] = shl i128 [[TMP1]], 64
+; BE-NEXT:    [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT:    [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT:    [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT:    [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT:    [[S3:%.*]] = shl i128 [[E3]], 32
+; BE-NEXT:    [[O2:%.*]] = or i128 [[TMP2]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; BE-NEXT:    ret i128 [[O3]]
 ;
   %p1 = getelementptr i32, ptr %p, i32 1
   %p2 = getelementptr i32, ptr %p, i32 2
@@ -799,25 +931,29 @@ define i128 @loadCombine_i128_BE(ptr %p) {
 }
 
 define i64 @loadCombine_i64(ptr %p) {
-; ALL-LABEL: @loadCombine_i64(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT:    [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT:    [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT:    [[L4:%.*]] = load i16, ptr [[P3]], align 2
-; ALL-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i64
-; ALL-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i64
-; ALL-NEXT:    [[E3:%.*]] = zext i16 [[L3]] to i64
-; ALL-NEXT:    [[E4:%.*]] = zext i16 [[L4]] to i64
-; ALL-NEXT:    [[S2:%.*]] = shl i64 [[E2]], 16
-; ALL-NEXT:    [[S3:%.*]] = shl i64 [[E3]], 32
-; ALL-NEXT:    [[S4:%.*]] = shl i64 [[E4]], 48
-; ALL-NEXT:    [[O1:%.*]] = or i64 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i64 [[O2]], [[S4]]
-; ALL-NEXT:    ret i64 [[O3]]
+; LE-LABEL: @loadCombine_i64(
+; LE-NEXT:    [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; LE-NEXT:    ret i64 [[L1]]
+;
+; BE-LABEL: @loadCombine_i64(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 2
+; BE-NEXT:    [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; BE-NEXT:    [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; BE-NEXT:    [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; BE-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i64
+; BE-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i64
+; BE-NEXT:    [[E3:%.*]] = zext i16 [[L3]] to i64
+; BE-NEXT:    [[E4:%.*]] = zext i16 [[L4]] to i64
+; BE-NEXT:    [[S2:%.*]] = shl i64 [[E2]], 16
+; BE-NEXT:    [[S3:%.*]] = shl i64 [[E3]], 32
+; BE-NEXT:    [[S4:%.*]] = shl i64 [[E4]], 48
+; BE-NEXT:    [[O1:%.*]] = or i64 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; BE-NEXT:    ret i64 [[O3]]
 ;
   %p1 = getelementptr i16, ptr %p, i32 1
   %p2 = getelementptr i16, ptr %p, i32 2
@@ -843,25 +979,29 @@ define i64 @loadCombine_i64(ptr %p) {
 }
 
 define i64 @loadCombine_i64_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_i64_BE(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT:    [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT:    [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT:    [[L4:%.*]] = load i16, ptr [[P3]], align 2
-; ALL-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i64
-; ALL-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i64
-; ALL-NEXT:    [[E3:%.*]] = zext i16 [[L3]] to i64
-; ALL-NEXT:    [[E4:%.*]] = zext i16 [[L4]] to i64
-; ALL-NEXT:    [[S1:%.*]] = shl i64 [[E1]], 48
-; ALL-NEXT:    [[S2:%.*]] = shl i64 [[E2]], 32
-; ALL-NEXT:    [[S3:%.*]] = shl i64 [[E3]], 16
-; ALL-NEXT:    [[O1:%.*]] = or i64 [[S1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i64 [[O2]], [[E4]]
-; ALL-NEXT:    ret i64 [[O3]]
+; LE-LABEL: @loadCombine_i64_BE(
+; LE-NEXT:    [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; LE-NEXT:    [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; LE-NEXT:    [[L1:%.*]] = load i16, ptr [[P]], align 2
+; LE-NEXT:    [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; LE-NEXT:    [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; LE-NEXT:    [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; LE-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i64
+; LE-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i64
+; LE-NEXT:    [[E3:%.*]] = zext i16 [[L3]] to i64
+; LE-NEXT:    [[E4:%.*]] = zext i16 [[L4]] to i64
+; LE-NEXT:    [[S1:%.*]] = shl i64 [[E1]], 48
+; LE-NEXT:    [[S2:%.*]] = shl i64 [[E2]], 32
+; LE-NEXT:    [[S3:%.*]] = shl i64 [[E3]], 16
+; LE-NEXT:    [[O1:%.*]] = or i64 [[S1]], [[S2]]
+; LE-NEXT:    [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; LE-NEXT:    [[O3:%.*]] = or i64 [[O2]], [[E4]]
+; LE-NEXT:    ret i64 [[O3]]
+;
+; BE-LABEL: @loadCombine_i64_BE(
+; BE-NEXT:    [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; BE-NEXT:    ret i64 [[L1]]
 ;
   %p1 = getelementptr i16, ptr %p, i32 1
   %p2 = getelementptr i16, ptr %p, i32 2
@@ -976,46 +1116,51 @@ define i16 @loadCombine_2consecutive_separateptr(ptr %p, ptr %p2) {
 }
 
 define i64 @load64_farLoads(ptr %ptr) {
-; ALL-LABEL: @load64_farLoads(
-; ALL-NEXT:  entry:
-; ALL-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
-; ALL-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i64
-; ALL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
-; ALL-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; ALL-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i64
-; ALL-NEXT:    [[SHL:%.*]] = shl i64 [[CONV2]], 8
-; ALL-NEXT:    [[OR:%.*]] = or i64 [[CONV]], [[SHL]]
-; ALL-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
-; ALL-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
-; ALL-NEXT:    [[CONV4:%.*]] = zext i8 [[TMP2]] to i64
-; ALL-NEXT:    [[SHL5:%.*]] = shl i64 [[CONV4]], 16
-; ALL-NEXT:    [[OR6:%.*]] = or i64 [[OR]], [[SHL5]]
-; ALL-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
-; ALL-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
-; ALL-NEXT:    [[CONV8:%.*]] = zext i8 [[TMP3]] to i64
-; ALL-NEXT:    [[SHL9:%.*]] = shl i64 [[CONV8]], 24
-; ALL-NEXT:    [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]]
-; ALL-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
-; ALL-NEXT:    [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1
-; ALL-NEXT:    [[CONV12:%.*]] = zext i8 [[TMP4]] to i64
-; ALL-NEXT:    [[SHL13:%.*]] = shl i64 [[CONV12]], 32
-; ALL-NEXT:    [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]]
-; ALL-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
-; ALL-NEXT:    [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
-; ALL-NEXT:    [[CONV16:%.*]] = zext i8 [[TMP5]] to i64
-; ALL-NEXT:    [[SHL17:%.*]] = shl i64 [[CONV16]], 40
-; ALL-NEXT:    [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]]
-; ALL-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
-; ALL-NEXT:    [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1
-; ALL-NEXT:    [[CONV20:%.*]] = zext i8 [[TMP6]] to i64
-; ALL-NEXT:    [[SHL21:%.*]] = shl i64 [[CONV20]], 48
-; ALL-NEXT:    [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]]
-; ALL-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7
-; ALL-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1
-; ALL-NEXT:    [[CONV24:%.*]] = zext i8 [[TMP7]] to i64
-; ALL-NEXT:    [[SHL25:%.*]] = shl i64 [[CONV24]], 56
-; ALL-NEXT:    [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]]
-; ALL-NEXT:    ret i64 [[OR26]]
+; LE-LABEL: @load64_farLoads(
+; LE-NEXT:  entry:
+; LE-NEXT:    [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1
+; LE-NEXT:    ret i64 [[TMP0]]
+;
+; BE-LABEL: @load64_farLoads(
+; BE-NEXT:  entry:
+; BE-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
+; BE-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i64
+; BE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
+; BE-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; BE-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i64
+; BE-NEXT:    [[SHL:%.*]] = shl i64 [[CONV2]], 8
+; BE-NEXT:    [[OR:%.*]] = or i64 [[CONV]], [[SHL]]
+; BE-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
+; BE-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
+; BE-NEXT:    [[CONV4:%.*]] = zext i8 [[TMP2]] to i64
+; BE-NEXT:    [[SHL5:%.*]] = shl i64 [[CONV4]], 16
+; BE-NEXT:    [[OR6:%.*]] = or i64 [[OR]], [[SHL5]]
+; BE-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
+; BE-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
+; BE-NEXT:    [[CONV8:%.*]] = zext i8 [[TMP3]] to i64
+; BE-NEXT:    [[SHL9:%.*]] = shl i64 [[CONV8]], 24
+; BE-NEXT:    [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]]
+; BE-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
+; BE-NEXT:    [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1
+; BE-NEXT:    [[CONV12:%.*]] = zext i8 [[TMP4]] to i64
+; BE-NEXT:    [[SHL13:%.*]] = shl i64 [[CONV12]], 32
+; BE-NEXT:    [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]]
+; BE-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
+; BE-NEXT:    [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
+; BE-NEXT:    [[CONV16:%.*]] = zext i8 [[TMP5]] to i64
+; BE-NEXT:    [[SHL17:%.*]] = shl i64 [[CONV16]], 40
+; BE-NEXT:    [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]]
+; BE-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
+; BE-NEXT:    [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1
+; BE-NEXT:    [[CONV20:%.*]] = zext i8 [[TMP6]] to i64
+; BE-NEXT:    [[SHL21:%.*]] = shl i64 [[CONV20]], 48
+; BE-NEXT:    [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]]
+; BE-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7
+; BE-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1
+; BE-NEXT:    [[CONV24:%.*]] = zext i8 [[TMP7]] to i64
+; BE-NEXT:    [[SHL25:%.*]] = shl i64 [[CONV24]], 56
+; BE-NEXT:    [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]]
+; BE-NEXT:    ret i64 [[OR26]]
 ;
 entry:
   %0 = load i8, ptr %ptr, align 1
@@ -1059,26 +1204,31 @@ entry:
 }
 
 define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) {
-; ALL-LABEL: @loadCombine_4consecutive_metadata(
-; ALL-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
-; ALL-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
-; ALL-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
-; ALL-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
-; ALL-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
-; ALL-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT:    ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_metadata(
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0
+; LE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; LE-NEXT:    ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_metadata(
+; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
+; BE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT:    [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT:    [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT:    [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT:    [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT:    [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT:    [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT:    [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT:    ret i32 [[O3]]
 ;
   %p1 = getelementptr i8, ptr %p, i32 1
   %p2 = getelementptr i8, ptr %p, i32 2


        


More information about the llvm-commits mailing list