[llvm] 38f3e44 - [AggressiveInstCombine] Load merge the reverse load pattern of consecutive loads.
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 19 03:23:40 PDT 2022
Author: bipmis
Date: 2022-10-19T11:22:58+01:00
New Revision: 38f3e44997f06b4a0d176aff4ec09839a99d5f51
URL: https://github.com/llvm/llvm-project/commit/38f3e44997f06b4a0d176aff4ec09839a99d5f51
DIFF: https://github.com/llvm/llvm-project/commit/38f3e44997f06b4a0d176aff4ec09839a99d5f51.diff
LOG: [AggressiveInstCombine] Load merge the reverse load pattern of consecutive loads.
This patch extends the load merge/widen in AggressiveInstCombine() to handle reverse load patterns.
Differential Revision: https://reviews.llvm.org/D135137
Added:
Modified:
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 0ac9881a5a710..52d0defae63c9 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -666,9 +666,11 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))),
m_Value(ShAmt2)))))) ||
match(V, m_OneUse(m_Or(m_Value(X),
- m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2))))))))
- foldLoadsRecursive(X, LOps, DL, AA);
- else
+ m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) {
+ if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
+ // Avoid Partial chain merge.
+ return false;
+ } else
return false;
// Check if the pattern has loads
@@ -691,18 +693,6 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
if (LI1->getParent() != LI2->getParent())
return false;
- // Swap loads if LI1 comes later as we handle only forward loads.
- // This is done as InstCombine folds lowest node forward loads to reverse.
- // The implementation will be subsequently extended to handle all reverse
- // loads.
- if (!LI1->comesBefore(LI2)) {
- if (LOps.FoundRoot == false) {
- std::swap(LI1, LI2);
- std::swap(ShAmt1, ShAmt2);
- } else
- return false;
- }
-
// Find the data layout
bool IsBigEndian = DL.isBigEndian();
@@ -719,6 +709,16 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
/* AllowNonInbounds */ true);
+ // Make sure Load with lower Offset is at LI1
+ bool Reverse = false;
+ if (Offset2.slt(Offset1)) {
+ std::swap(LI1, LI2);
+ std::swap(ShAmt1, ShAmt2);
+ std::swap(Offset1, Offset2);
+ std::swap(Load1Ptr, Load2Ptr);
+ Reverse = true;
+ }
+
// Verify if both loads have same base pointers and load sizes are same.
uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
@@ -730,9 +730,13 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
return false;
// Alias Analysis to check for store b/w the loads.
- MemoryLocation Loc = MemoryLocation::get(LI2);
+ LoadInst *Start = LI1, *End = LI2;
+ if (!LI1->comesBefore(LI2))
+ std::swap(Start, End);
+ MemoryLocation Loc = MemoryLocation::get(End);
unsigned NumScanned = 0;
- for (Instruction &Inst : make_range(LI1->getIterator(), LI2->getIterator())) {
+ for (Instruction &Inst :
+ make_range(Start->getIterator(), End->getIterator())) {
if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
return false;
if (++NumScanned > MaxInstrsToScan)
@@ -752,9 +756,13 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
Shift2 = Temp->getZExtValue();
// First load is always LI1. This is where we put the new load.
- // Use the merged load size available from LI1, if we already combined loads.
- if (LOps.FoundRoot)
- LoadSize1 = LOps.LoadSize;
+ // Use the merged load size available from LI1 for forward loads.
+ if (LOps.FoundRoot) {
+ if (!Reverse)
+ LoadSize1 = LOps.LoadSize;
+ else
+ LoadSize2 = LOps.LoadSize;
+ }
// Verify if shift amount and load index aligns and verifies that loads
// are consecutive.
@@ -769,10 +777,9 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
AAMDNodes AATags2 = LI2->getAAMetadata();
if (LOps.FoundRoot == false) {
LOps.FoundRoot = true;
- LOps.LoadSize = LoadSize1 + LoadSize2;
AATags1 = LI1->getAAMetadata();
- } else
- LOps.LoadSize = LOps.LoadSize + LoadSize2;
+ }
+ LOps.LoadSize = LoadSize1 + LoadSize2;
// Concatenate the AATags of the Merged Loads.
LOps.AATags = AATags1.concat(AATags2);
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index f3258b83ab187..0be4a7dc792c4 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1220,25 +1220,29 @@ define i16 @loadCombine_4consecutive_4bit(ptr %p) {
}
define i32 @loadCombine_4consecutive_rev(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_rev(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_rev(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_rev(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -1264,45 +1268,49 @@ define i32 @loadCombine_4consecutive_rev(ptr %p) {
}
define i64 @loadCombine_8consecutive_rev(ptr %p) {
-; ALL-LABEL: @loadCombine_8consecutive_rev(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
-; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
-; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
-; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
-; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
-; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1
-; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
-; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
-; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
-; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64
-; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64
-; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24
-; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32
-; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40
-; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 48
-; ALL-NEXT: [[S8:%.*]] = shl i64 [[E8]], 56
-; ALL-NEXT: [[O7:%.*]] = or i64 [[S8]], [[S7]]
-; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]]
-; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
-; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
-; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
-; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
-; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]]
-; ALL-NEXT: ret i64 [[O1]]
+; LE-LABEL: @loadCombine_8consecutive_rev(
+; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i64 [[L1]]
+;
+; BE-LABEL: @loadCombine_8consecutive_rev(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; BE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
+; BE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
+; BE-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
+; BE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
+; BE-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1
+; BE-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
+; BE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
+; BE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
+; BE-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64
+; BE-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64
+; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24
+; BE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32
+; BE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40
+; BE-NEXT: [[S7:%.*]] = shl i64 [[E7]], 48
+; BE-NEXT: [[S8:%.*]] = shl i64 [[E8]], 56
+; BE-NEXT: [[O7:%.*]] = or i64 [[S8]], [[S7]]
+; BE-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]]
+; BE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
+; BE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
+; BE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
+; BE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
+; BE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]]
+; BE-NEXT: ret i64 [[O1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -1348,45 +1356,49 @@ define i64 @loadCombine_8consecutive_rev(ptr %p) {
}
define i64 @loadCombine_8consecutive_rev_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_8consecutive_rev_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
-; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
-; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
-; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
-; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
-; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1
-; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
-; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
-; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
-; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64
-; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64
-; ALL-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56
-; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48
-; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40
-; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
-; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24
-; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16
-; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 8
-; ALL-NEXT: [[O7:%.*]] = or i64 [[E8]], [[S7]]
-; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]]
-; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
-; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
-; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
-; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
-; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]]
-; ALL-NEXT: ret i64 [[O1]]
+; LE-LABEL: @loadCombine_8consecutive_rev_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; LE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
+; LE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
+; LE-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
+; LE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
+; LE-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1
+; LE-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
+; LE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
+; LE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
+; LE-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64
+; LE-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64
+; LE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56
+; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48
+; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40
+; LE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
+; LE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24
+; LE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16
+; LE-NEXT: [[S7:%.*]] = shl i64 [[E7]], 8
+; LE-NEXT: [[O7:%.*]] = or i64 [[E8]], [[S7]]
+; LE-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]]
+; LE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
+; LE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
+; LE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
+; LE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
+; LE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]]
+; LE-NEXT: ret i64 [[O1]]
+;
+; BE-LABEL: @loadCombine_8consecutive_rev_BE(
+; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1
+; BE-NEXT: ret i64 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -1432,45 +1444,49 @@ define i64 @loadCombine_8consecutive_rev_BE(ptr %p) {
}
define i64 @eggs(ptr noundef readonly %arg) {
-; ALL-LABEL: @eggs(
-; ALL-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1
-; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
-; ALL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
-; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
-; ALL-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
-; ALL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 3
-; ALL-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
-; ALL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 4
-; ALL-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
-; ALL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 5
-; ALL-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
-; ALL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 6
-; ALL-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1
-; ALL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 7
-; ALL-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
-; ALL-NEXT: [[TMP18:%.*]] = zext i8 [[TMP17]] to i64
-; ALL-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 56
-; ALL-NEXT: [[TMP20:%.*]] = zext i8 [[TMP15]] to i64
-; ALL-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP20]], 48
-; ALL-NEXT: [[TMP22:%.*]] = or i64 [[TMP19]], [[TMP21]]
-; ALL-NEXT: [[TMP23:%.*]] = zext i8 [[TMP13]] to i64
-; ALL-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[TMP23]], 40
-; ALL-NEXT: [[TMP25:%.*]] = or i64 [[TMP22]], [[TMP24]]
-; ALL-NEXT: [[TMP26:%.*]] = zext i8 [[TMP11]] to i64
-; ALL-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP26]], 32
-; ALL-NEXT: [[TMP28:%.*]] = or i64 [[TMP25]], [[TMP27]]
-; ALL-NEXT: [[TMP29:%.*]] = zext i8 [[TMP9]] to i64
-; ALL-NEXT: [[TMP30:%.*]] = shl nuw nsw i64 [[TMP29]], 24
-; ALL-NEXT: [[TMP31:%.*]] = or i64 [[TMP28]], [[TMP30]]
-; ALL-NEXT: [[TMP32:%.*]] = zext i8 [[TMP7]] to i64
-; ALL-NEXT: [[TMP33:%.*]] = shl nuw nsw i64 [[TMP32]], 16
-; ALL-NEXT: [[TMP34:%.*]] = zext i8 [[TMP5]] to i64
-; ALL-NEXT: [[TMP35:%.*]] = shl nuw nsw i64 [[TMP34]], 8
-; ALL-NEXT: [[TMP36:%.*]] = or i64 [[TMP31]], [[TMP33]]
-; ALL-NEXT: [[TMP37:%.*]] = zext i8 [[TMP3]] to i64
-; ALL-NEXT: [[TMP38:%.*]] = or i64 [[TMP36]], [[TMP35]]
-; ALL-NEXT: [[TMP39:%.*]] = or i64 [[TMP38]], [[TMP37]]
-; ALL-NEXT: ret i64 [[TMP39]]
+; LE-LABEL: @eggs(
+; LE-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARG:%.*]], align 1
+; LE-NEXT: ret i64 [[TMP3]]
+;
+; BE-LABEL: @eggs(
+; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1
+; BE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
+; BE-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
+; BE-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
+; BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 3
+; BE-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
+; BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 4
+; BE-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
+; BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 5
+; BE-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
+; BE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 6
+; BE-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1
+; BE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 7
+; BE-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
+; BE-NEXT: [[TMP18:%.*]] = zext i8 [[TMP17]] to i64
+; BE-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 56
+; BE-NEXT: [[TMP20:%.*]] = zext i8 [[TMP15]] to i64
+; BE-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP20]], 48
+; BE-NEXT: [[TMP22:%.*]] = or i64 [[TMP19]], [[TMP21]]
+; BE-NEXT: [[TMP23:%.*]] = zext i8 [[TMP13]] to i64
+; BE-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[TMP23]], 40
+; BE-NEXT: [[TMP25:%.*]] = or i64 [[TMP22]], [[TMP24]]
+; BE-NEXT: [[TMP26:%.*]] = zext i8 [[TMP11]] to i64
+; BE-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP26]], 32
+; BE-NEXT: [[TMP28:%.*]] = or i64 [[TMP25]], [[TMP27]]
+; BE-NEXT: [[TMP29:%.*]] = zext i8 [[TMP9]] to i64
+; BE-NEXT: [[TMP30:%.*]] = shl nuw nsw i64 [[TMP29]], 24
+; BE-NEXT: [[TMP31:%.*]] = or i64 [[TMP28]], [[TMP30]]
+; BE-NEXT: [[TMP32:%.*]] = zext i8 [[TMP7]] to i64
+; BE-NEXT: [[TMP33:%.*]] = shl nuw nsw i64 [[TMP32]], 16
+; BE-NEXT: [[TMP34:%.*]] = zext i8 [[TMP5]] to i64
+; BE-NEXT: [[TMP35:%.*]] = shl nuw nsw i64 [[TMP34]], 8
+; BE-NEXT: [[TMP36:%.*]] = or i64 [[TMP31]], [[TMP33]]
+; BE-NEXT: [[TMP37:%.*]] = zext i8 [[TMP3]] to i64
+; BE-NEXT: [[TMP38:%.*]] = or i64 [[TMP36]], [[TMP35]]
+; BE-NEXT: [[TMP39:%.*]] = or i64 [[TMP38]], [[TMP37]]
+; BE-NEXT: ret i64 [[TMP39]]
;
%tmp3 = load i8, ptr %arg, align 1
%tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
@@ -1683,25 +1699,29 @@ define i32 @loadCombine_4consecutive_mixsize2(ptr %p) {
}
define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_lower_index_comes_before(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_lower_index_comes_before(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_lower_index_comes_before(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index b5bed52c899a3..4588929209383 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1305,19 +1305,8 @@ define i16 @loadCombine_4consecutive_4bit(ptr %p) {
define i32 @loadCombine_4consecutive_rev(ptr %p) {
; LE-LABEL: @loadCombine_4consecutive_rev(
-; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1
-; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L3]] to i32
-; LE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16
-; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S2]]
-; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]]
-; LE-NEXT: ret i32 [[O3]]
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
;
; BE-LABEL: @loadCombine_4consecutive_rev(
; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
@@ -1364,39 +1353,8 @@ define i32 @loadCombine_4consecutive_rev(ptr %p) {
define i64 @loadCombine_8consecutive_rev(ptr %p) {
; LE-LABEL: @loadCombine_8consecutive_rev(
-; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; LE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
-; LE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
-; LE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
-; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; LE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
-; LE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
-; LE-NEXT: [[L7:%.*]] = load i16, ptr [[P6]], align 1
-; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L7]] to i64
-; LE-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 48
-; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
-; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
-; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
-; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
-; LE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
-; LE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
-; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
-; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
-; LE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24
-; LE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32
-; LE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40
-; LE-NEXT: [[O6:%.*]] = or i64 [[TMP2]], [[S6]]
-; LE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
-; LE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
-; LE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
-; LE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
-; LE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]]
-; LE-NEXT: ret i64 [[O1]]
+; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i64 [[L1]]
;
; BE-LABEL: @loadCombine_8consecutive_rev(
; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
@@ -1523,39 +1481,8 @@ define i64 @loadCombine_8consecutive_rev_BE(ptr %p) {
; LE-NEXT: ret i64 [[O1]]
;
; BE-LABEL: @loadCombine_8consecutive_rev_BE(
-; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
-; BE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
-; BE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
-; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; BE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
-; BE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
-; BE-NEXT: [[L7:%.*]] = load i16, ptr [[P6]], align 1
-; BE-NEXT: [[TMP1:%.*]] = zext i16 [[L7]] to i64
-; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
-; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
-; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
-; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
-; BE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
-; BE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
-; BE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56
-; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48
-; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40
-; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
-; BE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24
-; BE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16
-; BE-NEXT: [[O6:%.*]] = or i64 [[TMP1]], [[S6]]
-; BE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
-; BE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
-; BE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
-; BE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
-; BE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]]
-; BE-NEXT: ret i64 [[O1]]
+; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1
+; BE-NEXT: ret i64 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -1602,39 +1529,8 @@ define i64 @loadCombine_8consecutive_rev_BE(ptr %p) {
define i64 @eggs(ptr noundef readonly %arg) {
; LE-LABEL: @eggs(
-; LE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1
-; LE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
-; LE-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
-; LE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
-; LE-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
-; LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 3
-; LE-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
-; LE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 4
-; LE-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
-; LE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 5
-; LE-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
-; LE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 6
-; LE-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP14]], align 1
-; LE-NEXT: [[TMP1:%.*]] = zext i16 [[TMP15]] to i64
-; LE-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 48
-; LE-NEXT: [[TMP23:%.*]] = zext i8 [[TMP13]] to i64
-; LE-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[TMP23]], 40
-; LE-NEXT: [[TMP25:%.*]] = or i64 [[TMP2]], [[TMP24]]
-; LE-NEXT: [[TMP26:%.*]] = zext i8 [[TMP11]] to i64
-; LE-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP26]], 32
-; LE-NEXT: [[TMP28:%.*]] = or i64 [[TMP25]], [[TMP27]]
-; LE-NEXT: [[TMP29:%.*]] = zext i8 [[TMP9]] to i64
-; LE-NEXT: [[TMP30:%.*]] = shl nuw nsw i64 [[TMP29]], 24
-; LE-NEXT: [[TMP31:%.*]] = or i64 [[TMP28]], [[TMP30]]
-; LE-NEXT: [[TMP32:%.*]] = zext i8 [[TMP7]] to i64
-; LE-NEXT: [[TMP33:%.*]] = shl nuw nsw i64 [[TMP32]], 16
-; LE-NEXT: [[TMP34:%.*]] = zext i8 [[TMP5]] to i64
-; LE-NEXT: [[TMP35:%.*]] = shl nuw nsw i64 [[TMP34]], 8
-; LE-NEXT: [[TMP36:%.*]] = or i64 [[TMP31]], [[TMP33]]
-; LE-NEXT: [[TMP37:%.*]] = zext i8 [[TMP3]] to i64
-; LE-NEXT: [[TMP38:%.*]] = or i64 [[TMP36]], [[TMP35]]
-; LE-NEXT: [[TMP39:%.*]] = or i64 [[TMP38]], [[TMP37]]
-; LE-NEXT: ret i64 [[TMP39]]
+; LE-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARG:%.*]], align 1
+; LE-NEXT: ret i64 [[TMP3]]
;
; BE-LABEL: @eggs(
; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1
@@ -1917,25 +1813,29 @@ define i32 @loadCombine_4consecutive_mixsize2(ptr %p) {
}
define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_lower_index_comes_before(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; ALL-NEXT: ret i32 [[O3]]
+; LE-LABEL: @loadCombine_4consecutive_lower_index_comes_before(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_lower_index_comes_before(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll b/llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll
index db4b6072cf595..524688343726b 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll
@@ -313,19 +313,8 @@ define i32 @loadCombine_4consecutive_2143(ptr %p) {
define i32 @loadCombine_4consecutive_2314(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_2314(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 8
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[E1]]
-; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; CHECK-NEXT: ret i32 [[O3]]
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; CHECK-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -352,19 +341,8 @@ define i32 @loadCombine_4consecutive_2314(ptr %p) {
define i32 @loadCombine_4consecutive_2341(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_2341(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 8
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[E1]]
-; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; CHECK-NEXT: ret i32 [[O3]]
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; CHECK-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -567,19 +545,8 @@ define i32 @loadCombine_4consecutive_3142(ptr %p) {
define i32 @loadCombine_4consecutive_3214(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_3214(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 8
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[E1]]
-; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; CHECK-NEXT: ret i32 [[O3]]
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; CHECK-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -606,19 +573,8 @@ define i32 @loadCombine_4consecutive_3214(ptr %p) {
define i32 @loadCombine_4consecutive_3241(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_3241(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 8
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[E1]]
-; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
-; CHECK-NEXT: ret i32 [[O3]]
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; CHECK-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -684,19 +640,8 @@ define i32 @loadCombine_4consecutive_3412(ptr %p) {
define i32 @loadCombine_4consecutive_3421(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_3421(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L3]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[O2:%.*]] = or i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[TMP2]]
-; CHECK-NEXT: ret i32 [[O3]]
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; CHECK-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -938,19 +883,8 @@ define i32 @loadCombine_4consecutive_4312(ptr %p) {
define i32 @loadCombine_4consecutive_4321(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_4321(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L3]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[O2:%.*]] = or i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[TMP2]]
-; CHECK-NEXT: ret i32 [[O3]]
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; CHECK-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
More information about the llvm-commits
mailing list