[llvm] [AggressiveInstCombine] Merge consecutive loads of mixed sizes (PR #129263)
Zhenyang Xu via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 00:32:42 PST 2025
https://github.com/Cancelll updated https://github.com/llvm/llvm-project/pull/129263
>From 343b383dc4ad620f2c8bbbb77250405ab08f348b Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Fri, 28 Feb 2025 23:45:23 +0800
Subject: [PATCH 01/12] [InstCombine] Add pre-commit tests. NFC.
---
.../AggressiveInstCombine/AArch64/or-load.ll | 112 +++++++++++++++
.../AggressiveInstCombine/X86/or-load.ll | 127 ++++++++++++++++++
2 files changed, 239 insertions(+)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 10c4c9b0ca4c9..4f7579658da89 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1698,6 +1698,118 @@ define i32 @loadCombine_4consecutive_mixsize2(ptr %p) {
ret i32 %o2
}
+define i32 @loadCombine_4consecutive_mixsize3(ptr %p) {
+; ALL-LABEL: @loadCombine_4consecutive_mixsize3(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
+; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; ALL-NEXT: ret i32 [[O2]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 3
+ %l1 = load i8, ptr %p
+ %l2 = load i16, ptr %p1
+ %l3 = load i8, ptr %p2
+
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i16 %l2 to i32
+ %e3 = zext i8 %l3 to i32
+
+ %s2 = shl i32 %e2, 8
+ %s3 = shl i32 %e3, 24
+
+ %o1 = or i32 %e1, %s2
+ %o2 = or i32 %o1, %s3
+ ret i32 %o2
+}
+
+define i16 @loadCombine_mixsize_4bit(ptr %p) {
+; ALL-LABEL: @loadCombine_mixsize_4bit(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 2
+; ALL-NEXT: [[P2:%.*]] = getelementptr i4, ptr [[P]], i32 3
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i4, ptr [[P1]], align 1
+; ALL-NEXT: [[L3:%.*]] = load i4, ptr [[P2]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16
+; ALL-NEXT: [[E2:%.*]] = zext i4 [[L2]] to i16
+; ALL-NEXT: [[E3:%.*]] = zext i4 [[L3]] to i16
+; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8
+; ALL-NEXT: [[S3:%.*]] = shl i16 [[E3]], 12
+; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]]
+; ALL-NEXT: [[O2:%.*]] = or i16 [[O1]], [[S3]]
+; ALL-NEXT: ret i16 [[O2]]
+;
+ %p1 = getelementptr i4, ptr %p, i32 2
+ %p2 = getelementptr i4, ptr %p, i32 3
+ %l1 = load i8, ptr %p
+ %l2 = load i4, ptr %p1
+ %l3 = load i4, ptr %p2
+
+ %e1 = zext i8 %l1 to i16
+ %e2 = zext i4 %l2 to i16
+ %e3 = zext i4 %l3 to i16
+
+ %s2 = shl i16 %e2, 8
+ %s3 = shl i16 %e3, 12
+
+ %o1 = or i16 %e1, %s2
+ %o2 = or i16 %o1, %s3
+ ret i16 %o2
+}
+
+define i64 @loadCombine_8consecutive_mixsize(ptr %p) {
+; ALL-LABEL: @loadCombine_8consecutive_mixsize(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
+; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 4
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
+; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i64
+; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
+; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
+; ALL-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
+; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; ALL-NEXT: ret i64 [[O3]]
+;
+ %p1 = getelementptr i8, ptr %p, i64 1
+ %p2 = getelementptr i8, ptr %p, i64 2
+ %p3 = getelementptr i8, ptr %p, i64 4
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i16, ptr %p2
+ %l4 = load i32, ptr %p3
+
+ %e1 = zext i8 %l1 to i64
+ %e2 = zext i8 %l2 to i64
+ %e3 = zext i16 %l3 to i64
+ %e4 = zext i32 %l4 to i64
+
+ %s2 = shl i64 %e2, 8
+ %s3 = shl i64 %e3, 16
+ %s4 = shl i64 %e4, 32
+
+ %o1 = or i64 %e1, %s2
+ %o2 = or i64 %o1, %s3
+ %o3 = or i64 %o2, %s4
+ ret i64 %o3
+}
+
define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) {
; LE-LABEL: @loadCombine_4consecutive_lower_index_comes_before(
; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 1b53c8f71222b..0fe27da6307bb 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1812,6 +1812,133 @@ define i32 @loadCombine_4consecutive_mixsize2(ptr %p) {
ret i32 %o2
}
+define i32 @loadCombine_4consecutive_mixsize3(ptr %p) {
+; ALL-LABEL: @loadCombine_4consecutive_mixsize3(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
+; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; ALL-NEXT: ret i32 [[O2]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 3
+ %l1 = load i8, ptr %p
+ %l2 = load i16, ptr %p1
+ %l3 = load i8, ptr %p2
+
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i16 %l2 to i32
+ %e3 = zext i8 %l3 to i32
+
+ %s2 = shl i32 %e2, 8
+ %s3 = shl i32 %e3, 24
+
+ %o1 = or i32 %e1, %s2
+ %o2 = or i32 %o1, %s3
+ ret i32 %o2
+}
+
+define i16 @loadCombine_mixsize_4bit(ptr %p) {
+; ALL-LABEL: @loadCombine_mixsize_4bit(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 2
+; ALL-NEXT: [[P2:%.*]] = getelementptr i4, ptr [[P]], i32 3
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i4, ptr [[P1]], align 1
+; ALL-NEXT: [[L3:%.*]] = load i4, ptr [[P2]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16
+; ALL-NEXT: [[E2:%.*]] = zext i4 [[L2]] to i16
+; ALL-NEXT: [[E3:%.*]] = zext i4 [[L3]] to i16
+; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8
+; ALL-NEXT: [[S3:%.*]] = shl i16 [[E3]], 12
+; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]]
+; ALL-NEXT: [[O2:%.*]] = or i16 [[O1]], [[S3]]
+; ALL-NEXT: ret i16 [[O2]]
+;
+ %p1 = getelementptr i4, ptr %p, i32 2
+ %p2 = getelementptr i4, ptr %p, i32 3
+ %l1 = load i8, ptr %p
+ %l2 = load i4, ptr %p1
+ %l3 = load i4, ptr %p2
+
+ %e1 = zext i8 %l1 to i16
+ %e2 = zext i4 %l2 to i16
+ %e3 = zext i4 %l3 to i16
+
+ %s2 = shl i16 %e2, 8
+ %s3 = shl i16 %e3, 12
+
+ %o1 = or i16 %e1, %s2
+ %o2 = or i16 %o1, %s3
+ ret i16 %o2
+}
+
+define i64 @loadCombine_8consecutive_mixsize(ptr %p) {
+; LE-LABEL: @loadCombine_8consecutive_mixsize(
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 4
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1
+; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i64
+; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i64
+; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; LE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
+; LE-NEXT: [[O2:%.*]] = or i64 [[TMP1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; LE-NEXT: ret i64 [[O3]]
+;
+; BE-LABEL: @loadCombine_8consecutive_mixsize(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 4
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
+; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i64
+; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
+; BE-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; BE-NEXT: ret i64 [[O3]]
+;
+ %p1 = getelementptr i8, ptr %p, i64 1
+ %p2 = getelementptr i8, ptr %p, i64 2
+ %p3 = getelementptr i8, ptr %p, i64 4
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i16, ptr %p2
+ %l4 = load i32, ptr %p3
+
+ %e1 = zext i8 %l1 to i64
+ %e2 = zext i8 %l2 to i64
+ %e3 = zext i16 %l3 to i64
+ %e4 = zext i32 %l4 to i64
+
+ %s2 = shl i64 %e2, 8
+ %s3 = shl i64 %e3, 16
+ %s4 = shl i64 %e4, 32
+
+ %o1 = or i64 %e1, %s2
+ %o2 = or i64 %o1, %s3
+ %o3 = or i64 %o2, %s4
+ ret i64 %o3
+}
+
define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) {
; LE-LABEL: @loadCombine_4consecutive_lower_index_comes_before(
; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
>From 16243e2a05b9e13e0d5287825034c73d14b193aa Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Fri, 28 Feb 2025 23:49:04 +0800
Subject: [PATCH 02/12] [InstCombine] Handle mixsize consecutive loads
---
.../AggressiveInstCombine.cpp | 2 +-
.../AggressiveInstCombine/AArch64/or-load.ll | 234 ++++++++++--------
.../AggressiveInstCombine/X86/or-load.ll | 133 +++++-----
3 files changed, 190 insertions(+), 179 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index fe7b3b1676e08..6ea81b08d42c7 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -678,7 +678,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
// Verify if both loads have same base pointers and load sizes are same.
uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
- if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2)
+ if (Load1Ptr != Load2Ptr)
return false;
// Support Loadsizes greater or equal to 8bits and only power of 2.
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 4f7579658da89..3e8cb04349583 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1529,20 +1529,24 @@ define i64 @eggs(ptr noundef readonly %arg) {
}
define i32 @loadCombine_4consecutive_mixsize1(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_mixsize1(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_4consecutive_mixsize1(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 2
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_mixsize1(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: ret i32 [[O2]]
;
%p1 = getelementptr i8, ptr %p, i32 2
%p2 = getelementptr i8, ptr %p, i32 3
@@ -1563,20 +1567,24 @@ define i32 @loadCombine_4consecutive_mixsize1(ptr %p) {
}
define i32 @loadCombine_4consecutive_mixsize1_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_mixsize1_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 16
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[E3]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_4consecutive_mixsize1_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 16
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[E3]]
+; LE-NEXT: ret i32 [[O2]]
+;
+; BE-LABEL: @loadCombine_4consecutive_mixsize1_BE(
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 2
+; BE-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 2
%p2 = getelementptr i8, ptr %p, i32 3
@@ -1597,20 +1605,24 @@ define i32 @loadCombine_4consecutive_mixsize1_BE(ptr %p) {
}
define i32 @loadCombine_4consecutive_rev_mixsize1(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_rev_mixsize1(
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S3]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[E1]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_4consecutive_rev_mixsize1(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 2
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_rev_mixsize1(
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[S3]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[E1]]
+; BE-NEXT: ret i32 [[O2]]
;
%p2 = getelementptr i8, ptr %p, i32 2
%p3 = getelementptr i8, ptr %p, i32 3
@@ -1631,20 +1643,24 @@ define i32 @loadCombine_4consecutive_rev_mixsize1(ptr %p) {
}
define i32 @loadCombine_4consecutive_rev_mixsize1_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_rev_mixsize1_BE(
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P3]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 16
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E3]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S1]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_4consecutive_rev_mixsize1_BE(
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 16
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[E3]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S1]]
+; LE-NEXT: ret i32 [[O2]]
+;
+; BE-LABEL: @loadCombine_4consecutive_rev_mixsize1_BE(
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 2
+; BE-NEXT: ret i32 [[L1]]
;
%p2 = getelementptr i8, ptr %p, i32 2
%p3 = getelementptr i8, ptr %p, i32 3
@@ -1665,20 +1681,24 @@ define i32 @loadCombine_4consecutive_rev_mixsize1_BE(ptr %p) {
}
define i32 @loadCombine_4consecutive_mixsize2(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_mixsize2(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_4consecutive_mixsize2(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_mixsize2(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: ret i32 [[O2]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 2
@@ -1699,20 +1719,24 @@ define i32 @loadCombine_4consecutive_mixsize2(ptr %p) {
}
define i32 @loadCombine_4consecutive_mixsize3(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_mixsize3(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_4consecutive_mixsize3(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_mixsize3(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: ret i32 [[O2]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 3
@@ -1767,25 +1791,29 @@ define i16 @loadCombine_mixsize_4bit(ptr %p) {
}
define i64 @loadCombine_8consecutive_mixsize(ptr %p) {
-; ALL-LABEL: @loadCombine_8consecutive_mixsize(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 4
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
-; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
-; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i64
-; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
-; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
-; ALL-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
-; ALL-NEXT: ret i64 [[O3]]
+; LE-LABEL: @loadCombine_8consecutive_mixsize(
+; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i64 [[L1]]
+;
+; BE-LABEL: @loadCombine_8consecutive_mixsize(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 4
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
+; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i64
+; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
+; BE-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; BE-NEXT: ret i64 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i64 1
%p2 = getelementptr i8, ptr %p, i64 2
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 0fe27da6307bb..760342b085586 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1613,20 +1613,24 @@ define i64 @eggs(ptr noundef readonly %arg) {
}
define i32 @loadCombine_4consecutive_mixsize1(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_mixsize1(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_4consecutive_mixsize1(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 2
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_mixsize1(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: ret i32 [[O2]]
;
%p1 = getelementptr i8, ptr %p, i32 2
%p2 = getelementptr i8, ptr %p, i32 3
@@ -1647,20 +1651,24 @@ define i32 @loadCombine_4consecutive_mixsize1(ptr %p) {
}
define i32 @loadCombine_4consecutive_mixsize1_BE(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_mixsize1_BE(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 16
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[E3]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_4consecutive_mixsize1_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 16
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[E3]]
+; LE-NEXT: ret i32 [[O2]]
+;
+; BE-LABEL: @loadCombine_4consecutive_mixsize1_BE(
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 2
+; BE-NEXT: ret i32 [[L1]]
;
%p1 = getelementptr i8, ptr %p, i32 2
%p2 = getelementptr i8, ptr %p, i32 3
@@ -1682,13 +1690,7 @@ define i32 @loadCombine_4consecutive_mixsize1_BE(ptr %p) {
define i32 @loadCombine_4consecutive_rev_mixsize1(ptr %p) {
; LE-LABEL: @loadCombine_4consecutive_rev_mixsize1(
-; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P2]], align 1
-; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32
-; LE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16
-; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
-; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[E1]]
+; LE-NEXT: [[O2:%.*]] = load i32, ptr [[P:%.*]], align 2
; LE-NEXT: ret i32 [[O2]]
;
; BE-LABEL: @loadCombine_4consecutive_rev_mixsize1(
@@ -1741,13 +1743,7 @@ define i32 @loadCombine_4consecutive_rev_mixsize1_BE(ptr %p) {
; LE-NEXT: ret i32 [[O2]]
;
; BE-LABEL: @loadCombine_4consecutive_rev_mixsize1_BE(
-; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P2]], align 1
-; BE-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32
-; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
-; BE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 16
-; BE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S1]]
+; BE-NEXT: [[O2:%.*]] = load i32, ptr [[P:%.*]], align 2
; BE-NEXT: ret i32 [[O2]]
;
%p2 = getelementptr i8, ptr %p, i32 2
@@ -1770,13 +1766,7 @@ define i32 @loadCombine_4consecutive_rev_mixsize1_BE(ptr %p) {
define i32 @loadCombine_4consecutive_mixsize2(ptr %p) {
; LE-LABEL: @loadCombine_4consecutive_mixsize2(
-; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1
-; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32
-; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; LE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i32
-; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
-; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]]
+; LE-NEXT: [[O2:%.*]] = load i32, ptr [[P:%.*]], align 1
; LE-NEXT: ret i32 [[O2]]
;
; BE-LABEL: @loadCombine_4consecutive_mixsize2(
@@ -1813,20 +1803,24 @@ define i32 @loadCombine_4consecutive_mixsize2(ptr %p) {
}
define i32 @loadCombine_4consecutive_mixsize3(ptr %p) {
-; ALL-LABEL: @loadCombine_4consecutive_mixsize3(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
-; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; ALL-NEXT: ret i32 [[O2]]
+; LE-LABEL: @loadCombine_4consecutive_mixsize3(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_mixsize3(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: ret i32 [[O2]]
;
%p1 = getelementptr i8, ptr %p, i32 1
%p2 = getelementptr i8, ptr %p, i32 3
@@ -1882,18 +1876,7 @@ define i16 @loadCombine_mixsize_4bit(ptr %p) {
define i64 @loadCombine_8consecutive_mixsize(ptr %p) {
; LE-LABEL: @loadCombine_8consecutive_mixsize(
-; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2
-; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 4
-; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1
-; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i64
-; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; LE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
-; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i64
-; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
-; LE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
-; LE-NEXT: [[O2:%.*]] = or i64 [[TMP1]], [[S3]]
-; LE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; LE-NEXT: [[O3:%.*]] = load i64, ptr [[P:%.*]], align 1
; LE-NEXT: ret i64 [[O3]]
;
; BE-LABEL: @loadCombine_8consecutive_mixsize(
>From 35098bfa0c49f84a61b35eea93c6470ac4f147df Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Sat, 1 Mar 2025 00:11:31 +0800
Subject: [PATCH 03/12] [InstCombine] Modify the comment accordingly. NFC.
---
.../Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 6ea81b08d42c7..d610fa8002654 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -675,7 +675,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
/* AllowNonInbounds */ true);
- // Verify if both loads have same base pointers and load sizes are same.
+ // Verify if both loads have same base pointers
uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
if (Load1Ptr != Load2Ptr)
>From f386a1d759f78082dc1dcf561104111a33c0f028 Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Sun, 2 Mar 2025 18:06:28 +0800
Subject: [PATCH 04/12] [AggressiveInstCombine] Add more tests. NFC.
---
.../AggressiveInstCombine/AArch64/or-load.ll | 130 ++++++++++++++++++
.../AggressiveInstCombine/X86/or-load.ll | 130 ++++++++++++++++++
2 files changed, 260 insertions(+)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 3e8cb04349583..cdca55cf955a9 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1790,6 +1790,136 @@ define i16 @loadCombine_mixsize_4bit(ptr %p) {
ret i16 %o2
}
+define i32 @loadCombine_2consecutive_mixsize_4bit(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_4bit(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[L1:%.*]] = load i4, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i28, ptr [[P1]], align 4
+; ALL-NEXT: [[E1:%.*]] = zext i4 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i28 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 4
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i4, ptr %p, i32 1
+ %l1 = load i4, ptr %p
+ %l2 = load i28, ptr %p1
+ %e1 = zext i4 %l1 to i32
+ %e2 = zext i28 %l2 to i32
+ %s2 = shl i32 %e2, 4
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_mixsize_4bit2(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_4bit2(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 7
+; ALL-NEXT: [[L1:%.*]] = load i28, ptr [[P]], align 4
+; ALL-NEXT: [[L2:%.*]] = load i4, ptr [[P1]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i28 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i4 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 28
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i4, ptr %p, i32 7
+ %l1 = load i28, ptr %p
+ %l2 = load i4, ptr %p1
+ %e1 = zext i28 %l1 to i32
+ %e2 = zext i4 %l2 to i32
+ %s2 = shl i32 %e2, 28
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_mixsize_not_power_of_two(ptr %p) {
+; LE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i24, ptr [[P1]], align 4
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i24 [[L2]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %l1 = load i8, ptr %p
+ %l2 = load i24, ptr %p1
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i24 %l2 to i32
+ %s2 = shl i32 %e2, 8
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_mixsize_not_power_of_two2(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3
+; ALL-NEXT: [[L1:%.*]] = load i24, ptr [[P]], align 4
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i24 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 3
+ %l1 = load i24, ptr %p
+ %l2 = load i8, ptr %p1
+ %e1 = zext i24 %l1 to i32
+ %e2 = zext i8 %l2 to i32
+ %s2 = shl i32 %e2, 24
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_sum_size_not_legal(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_sum_size_not_legal(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 2
+ %l1 = load i16, ptr %p
+ %l2 = load i8, ptr %p1
+ %e1 = zext i16 %l1 to i32
+ %e2 = zext i8 %l2 to i32
+ %s2 = shl i32 %e2, 16
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_sum_size_not_legal2(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_sum_size_not_legal2(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %l1 = load i8, ptr %p
+ %l2 = load i16, ptr %p1
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i16 %l2 to i32
+ %s2 = shl i32 %e2, 8
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
define i64 @loadCombine_8consecutive_mixsize(ptr %p) {
; LE-LABEL: @loadCombine_8consecutive_mixsize(
; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 760342b085586..7fd3788bced3a 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1874,6 +1874,136 @@ define i16 @loadCombine_mixsize_4bit(ptr %p) {
ret i16 %o2
}
+define i32 @loadCombine_2consecutive_mixsize_4bit(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_4bit(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[L1:%.*]] = load i4, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i28, ptr [[P1]], align 4
+; ALL-NEXT: [[E1:%.*]] = zext i4 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i28 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 4
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i4, ptr %p, i32 1
+ %l1 = load i4, ptr %p
+ %l2 = load i28, ptr %p1
+ %e1 = zext i4 %l1 to i32
+ %e2 = zext i28 %l2 to i32
+ %s2 = shl i32 %e2, 4
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_mixsize_4bit2(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_4bit2(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 7
+; ALL-NEXT: [[L1:%.*]] = load i28, ptr [[P]], align 4
+; ALL-NEXT: [[L2:%.*]] = load i4, ptr [[P1]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i28 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i4 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 28
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i4, ptr %p, i32 7
+ %l1 = load i28, ptr %p
+ %l2 = load i4, ptr %p1
+ %e1 = zext i28 %l1 to i32
+ %e2 = zext i4 %l2 to i32
+ %s2 = shl i32 %e2, 28
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_mixsize_not_power_of_two(ptr %p) {
+; LE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i24, ptr [[P1]], align 4
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i24 [[L2]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %l1 = load i8, ptr %p
+ %l2 = load i24, ptr %p1
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i24 %l2 to i32
+ %s2 = shl i32 %e2, 8
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_mixsize_not_power_of_two2(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3
+; ALL-NEXT: [[L1:%.*]] = load i24, ptr [[P]], align 4
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i24 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 3
+ %l1 = load i24, ptr %p
+ %l2 = load i8, ptr %p1
+ %e1 = zext i24 %l1 to i32
+ %e2 = zext i8 %l2 to i32
+ %s2 = shl i32 %e2, 24
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_sum_size_not_legal(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_sum_size_not_legal(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 2
+ %l1 = load i16, ptr %p
+ %l2 = load i8, ptr %p1
+ %e1 = zext i16 %l1 to i32
+ %e2 = zext i8 %l2 to i32
+ %s2 = shl i32 %e2, 16
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_sum_size_not_legal2(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_sum_size_not_legal2(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %l1 = load i8, ptr %p
+ %l2 = load i16, ptr %p1
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i16 %l2 to i32
+ %s2 = shl i32 %e2, 8
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
define i64 @loadCombine_8consecutive_mixsize(ptr %p) {
; LE-LABEL: @loadCombine_8consecutive_mixsize(
; LE-NEXT: [[O3:%.*]] = load i64, ptr [[P:%.*]], align 1
>From 3ca730d256b39c3d96a3f177988d2a0933c33237 Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Sun, 2 Mar 2025 18:09:39 +0800
Subject: [PATCH 05/12] [AggressiveInstCombine] Add check for LoadSize2 and
remove the power of two check.
---
.../AggressiveInstCombine.cpp | 2 +-
.../AggressiveInstCombine/AArch64/or-load.ll | 22 +++++++++++--------
.../AggressiveInstCombine/X86/or-load.ll | 22 +++++++++++--------
3 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index d610fa8002654..35f63f8935256 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -682,7 +682,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
return false;
// Support Loadsizes greater or equal to 8bits and only power of 2.
- if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1))
+ if (LoadSize1 < 8 || LoadSize2 < 8)
return false;
// Alias Analysis to check for stores b/w the loads.
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index cdca55cf955a9..ac1763e631748 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1858,15 +1858,19 @@ define i32 @loadCombine_2consecutive_mixsize_not_power_of_two(ptr %p) {
}
define i32 @loadCombine_2consecutive_mixsize_not_power_of_two2(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i24, ptr [[P]], align 4
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i24 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: ret i32 [[O1]]
+; LE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i24, ptr [[P]], align 4
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i24 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: ret i32 [[O1]]
;
%p1 = getelementptr i8, ptr %p, i32 3
%l1 = load i24, ptr %p
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 7fd3788bced3a..68eeb7b1bf19f 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1942,15 +1942,19 @@ define i32 @loadCombine_2consecutive_mixsize_not_power_of_two(ptr %p) {
}
define i32 @loadCombine_2consecutive_mixsize_not_power_of_two2(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i24, ptr [[P]], align 4
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i24 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: ret i32 [[O1]]
+; LE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i24, ptr [[P]], align 4
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i24 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: ret i32 [[O1]]
;
%p1 = getelementptr i8, ptr %p, i32 3
%l1 = load i24, ptr %p
>From f45e2db4f26c69e645b6aae87190b922e8294066 Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Sun, 2 Mar 2025 18:12:02 +0800
Subject: [PATCH 06/12] [AggressiveInstCombine] Modify the comment accordingly.
NFC.
---
.../Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 35f63f8935256..d1ab9835fa95f 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -681,7 +681,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
if (Load1Ptr != Load2Ptr)
return false;
- // Support Loadsizes greater or equal to 8bits and only power of 2.
+ // Support Loadsizes greater or equal to 8bits.
if (LoadSize1 < 8 || LoadSize2 < 8)
return false;
>From b03986b0c0062406fadad1626d3780208dd59cd9 Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Mon, 3 Mar 2025 01:04:28 +0800
Subject: [PATCH 07/12] [AggressiveInstCombine] Rename two tests to more
clearly show the purpose. NFC.
---
.../Transforms/AggressiveInstCombine/AArch64/or-load.ll | 8 ++++----
llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll | 8 ++++----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index ac1763e631748..20ffd6d0eb158 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1790,8 +1790,8 @@ define i16 @loadCombine_mixsize_4bit(ptr %p) {
ret i16 %o2
}
-define i32 @loadCombine_2consecutive_mixsize_4bit(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_4bit(
+define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size(
; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[L1:%.*]] = load i4, ptr [[P]], align 1
; ALL-NEXT: [[L2:%.*]] = load i28, ptr [[P1]], align 4
@@ -1811,8 +1811,8 @@ define i32 @loadCombine_2consecutive_mixsize_4bit(ptr %p) {
ret i32 %o1
}
-define i32 @loadCombine_2consecutive_mixsize_4bit2(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_4bit2(
+define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size2(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size2(
; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 7
; ALL-NEXT: [[L1:%.*]] = load i28, ptr [[P]], align 4
; ALL-NEXT: [[L2:%.*]] = load i4, ptr [[P1]], align 1
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 68eeb7b1bf19f..60173053e9816 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1874,8 +1874,8 @@ define i16 @loadCombine_mixsize_4bit(ptr %p) {
ret i16 %o2
}
-define i32 @loadCombine_2consecutive_mixsize_4bit(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_4bit(
+define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size(
; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[L1:%.*]] = load i4, ptr [[P]], align 1
; ALL-NEXT: [[L2:%.*]] = load i28, ptr [[P1]], align 4
@@ -1895,8 +1895,8 @@ define i32 @loadCombine_2consecutive_mixsize_4bit(ptr %p) {
ret i32 %o1
}
-define i32 @loadCombine_2consecutive_mixsize_4bit2(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_4bit2(
+define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size2(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size2(
; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 7
; ALL-NEXT: [[L1:%.*]] = load i28, ptr [[P]], align 4
; ALL-NEXT: [[L2:%.*]] = load i4, ptr [[P1]], align 1
>From 7c7c8fc5d16ae2c82dc91e08df6101eab76ce826 Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Mon, 3 Mar 2025 02:09:41 +0800
Subject: [PATCH 08/12] [AggressiveInstCombine] Add more negative tests. NFC.
---
.../AggressiveInstCombine/AArch64/or-load.ll | 42 +++++++++++++++++++
.../AggressiveInstCombine/X86/or-load.ll | 42 +++++++++++++++++++
2 files changed, 84 insertions(+)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 20ffd6d0eb158..282ea82a0f47b 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1832,6 +1832,48 @@ define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size2(ptr %p) {
ret i32 %o1
}
+define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size3(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size3(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i1, ptr [[P:%.*]], i32 23
+; ALL-NEXT: [[L1:%.*]] = load i23, ptr [[P]], align 4
+; ALL-NEXT: [[L2:%.*]] = load i9, ptr [[P1]], align 2
+; ALL-NEXT: [[E1:%.*]] = zext i23 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i9 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i1, ptr %p, i32 23
+ %l1 = load i23, ptr %p
+ %l2 = load i9, ptr %p1
+ %e1 = zext i23 %l1 to i32
+ %e2 = zext i9 %l2 to i32
+ %s2 = shl i32 %e2, 24
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size4(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size4(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i1, ptr [[P:%.*]], i32 9
+; ALL-NEXT: [[L1:%.*]] = load i9, ptr [[P]], align 2
+; ALL-NEXT: [[L2:%.*]] = load i23, ptr [[P1]], align 4
+; ALL-NEXT: [[E1:%.*]] = zext i9 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i23 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i1, ptr %p, i32 9
+ %l1 = load i9, ptr %p
+ %l2 = load i23, ptr %p1
+ %e1 = zext i9 %l1 to i32
+ %e2 = zext i23 %l2 to i32
+ %s2 = shl i32 %e2, 24
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
define i32 @loadCombine_2consecutive_mixsize_not_power_of_two(ptr %p) {
; LE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two(
; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 60173053e9816..2e3bd4f7b1150 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1916,6 +1916,48 @@ define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size2(ptr %p) {
ret i32 %o1
}
+define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size3(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size3(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i1, ptr [[P:%.*]], i32 23
+; ALL-NEXT: [[L1:%.*]] = load i23, ptr [[P]], align 4
+; ALL-NEXT: [[L2:%.*]] = load i9, ptr [[P1]], align 2
+; ALL-NEXT: [[E1:%.*]] = zext i23 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i9 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i1, ptr %p, i32 23
+ %l1 = load i23, ptr %p
+ %l2 = load i9, ptr %p1
+ %e1 = zext i23 %l1 to i32
+ %e2 = zext i9 %l2 to i32
+ %s2 = shl i32 %e2, 24
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
+define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size4(ptr %p) {
+; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size4(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i1, ptr [[P:%.*]], i32 9
+; ALL-NEXT: [[L1:%.*]] = load i9, ptr [[P]], align 2
+; ALL-NEXT: [[L2:%.*]] = load i23, ptr [[P1]], align 4
+; ALL-NEXT: [[E1:%.*]] = zext i9 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i23 [[L2]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; ALL-NEXT: ret i32 [[O1]]
+;
+ %p1 = getelementptr i1, ptr %p, i32 9
+ %l1 = load i9, ptr %p
+ %l2 = load i23, ptr %p1
+ %e1 = zext i9 %l1 to i32
+ %e2 = zext i23 %l2 to i32
+ %s2 = shl i32 %e2, 24
+ %o1 = or i32 %e1, %s2
+ ret i32 %o1
+}
+
define i32 @loadCombine_2consecutive_mixsize_not_power_of_two(ptr %p) {
; LE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two(
; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
>From f1e291434d9792741690a17ccd90e20507193d25 Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Mon, 3 Mar 2025 02:30:28 +0800
Subject: [PATCH 09/12] [AggressiveInstCombine] Check load sizes equal to store
sizes.
---
.../AggressiveInstCombine/AggressiveInstCombine.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index d1ab9835fa95f..cbb0e22b66bb4 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -681,8 +681,8 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
if (Load1Ptr != Load2Ptr)
return false;
- // Support Loadsizes greater or equal to 8bits.
- if (LoadSize1 < 8 || LoadSize2 < 8)
+ // Support Loadsizes that equal to the corresponding store sizes.
+ if (!DL.typeSizeEqualsStoreSize(LI1->getType()) || !DL.typeSizeEqualsStoreSize(LI2->getType()))
return false;
// Alias Analysis to check for stores b/w the loads.
>From ad75d16ca30e11de384a4c555f6d9374d160e058 Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Mon, 3 Mar 2025 14:34:11 +0800
Subject: [PATCH 10/12] [AggressiveInstCombine] Address review comments. NFC.
---
.../AggressiveInstCombine/AggressiveInstCombine.cpp | 2 +-
.../AggressiveInstCombine/AArch64/or-load.ll | 12 ++++++------
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index cbb0e22b66bb4..2dd14e54b7568 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -681,7 +681,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
if (Load1Ptr != Load2Ptr)
return false;
- // Support Loadsizes that equal to the corresponding store sizes.
+ // Make sure that there are no padding bits.
if (!DL.typeSizeEqualsStoreSize(LI1->getType()) || !DL.typeSizeEqualsStoreSize(LI2->getType()))
return false;
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 282ea82a0f47b..0e3fe56bb58de 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1813,7 +1813,7 @@ define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size(ptr %p) {
define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size2(ptr %p) {
; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size2(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 7
+; ALL-NEXT: [[P1:%.*]] = getelementptr i28, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[L1:%.*]] = load i28, ptr [[P]], align 4
; ALL-NEXT: [[L2:%.*]] = load i4, ptr [[P1]], align 1
; ALL-NEXT: [[E1:%.*]] = zext i28 [[L1]] to i32
@@ -1822,7 +1822,7 @@ define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size2(ptr %p) {
; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
; ALL-NEXT: ret i32 [[O1]]
;
- %p1 = getelementptr i4, ptr %p, i32 7
+ %p1 = getelementptr i28, ptr %p, i32 1
%l1 = load i28, ptr %p
%l2 = load i4, ptr %p1
%e1 = zext i28 %l1 to i32
@@ -1834,7 +1834,7 @@ define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size2(ptr %p) {
define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size3(ptr %p) {
; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size3(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i1, ptr [[P:%.*]], i32 23
+; ALL-NEXT: [[P1:%.*]] = getelementptr i23, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[L1:%.*]] = load i23, ptr [[P]], align 4
; ALL-NEXT: [[L2:%.*]] = load i9, ptr [[P1]], align 2
; ALL-NEXT: [[E1:%.*]] = zext i23 [[L1]] to i32
@@ -1843,7 +1843,7 @@ define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size3(ptr %p) {
; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
; ALL-NEXT: ret i32 [[O1]]
;
- %p1 = getelementptr i1, ptr %p, i32 23
+ %p1 = getelementptr i23, ptr %p, i32 1
%l1 = load i23, ptr %p
%l2 = load i9, ptr %p1
%e1 = zext i23 %l1 to i32
@@ -1855,7 +1855,7 @@ define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size3(ptr %p) {
define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size4(ptr %p) {
; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size4(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i1, ptr [[P:%.*]], i32 9
+; ALL-NEXT: [[P1:%.*]] = getelementptr i9, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[L1:%.*]] = load i9, ptr [[P]], align 2
; ALL-NEXT: [[L2:%.*]] = load i23, ptr [[P1]], align 4
; ALL-NEXT: [[E1:%.*]] = zext i9 [[L1]] to i32
@@ -1864,7 +1864,7 @@ define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size4(ptr %p) {
; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
; ALL-NEXT: ret i32 [[O1]]
;
- %p1 = getelementptr i1, ptr %p, i32 9
+ %p1 = getelementptr i9, ptr %p, i32 1
%l1 = load i9, ptr %p
%l2 = load i23, ptr %p1
%e1 = zext i9 %l1 to i32
>From 1541a9b81b0d8e5047f7efa625c32a38e2b31fe5 Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Mon, 3 Mar 2025 16:02:37 +0800
Subject: [PATCH 11/12] [AggressiveInstCombine] Remove unnecessarily repeated
tests. NFC.
---
.../AggressiveInstCombine/X86/or-load.ll | 296 ------------------
1 file changed, 296 deletions(-)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 2e3bd4f7b1150..f6716c817b739 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1802,302 +1802,6 @@ define i32 @loadCombine_4consecutive_mixsize2(ptr %p) {
ret i32 %o2
}
-define i32 @loadCombine_4consecutive_mixsize3(ptr %p) {
-; LE-LABEL: @loadCombine_4consecutive_mixsize3(
-; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
-; LE-NEXT: ret i32 [[L1]]
-;
-; BE-LABEL: @loadCombine_4consecutive_mixsize3(
-; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; BE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
-; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 24
-; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
-; BE-NEXT: ret i32 [[O2]]
-;
- %p1 = getelementptr i8, ptr %p, i32 1
- %p2 = getelementptr i8, ptr %p, i32 3
- %l1 = load i8, ptr %p
- %l2 = load i16, ptr %p1
- %l3 = load i8, ptr %p2
-
- %e1 = zext i8 %l1 to i32
- %e2 = zext i16 %l2 to i32
- %e3 = zext i8 %l3 to i32
-
- %s2 = shl i32 %e2, 8
- %s3 = shl i32 %e3, 24
-
- %o1 = or i32 %e1, %s2
- %o2 = or i32 %o1, %s3
- ret i32 %o2
-}
-
-define i16 @loadCombine_mixsize_4bit(ptr %p) {
-; ALL-LABEL: @loadCombine_mixsize_4bit(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 2
-; ALL-NEXT: [[P2:%.*]] = getelementptr i4, ptr [[P]], i32 3
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i4, ptr [[P1]], align 1
-; ALL-NEXT: [[L3:%.*]] = load i4, ptr [[P2]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16
-; ALL-NEXT: [[E2:%.*]] = zext i4 [[L2]] to i16
-; ALL-NEXT: [[E3:%.*]] = zext i4 [[L3]] to i16
-; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8
-; ALL-NEXT: [[S3:%.*]] = shl i16 [[E3]], 12
-; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]]
-; ALL-NEXT: [[O2:%.*]] = or i16 [[O1]], [[S3]]
-; ALL-NEXT: ret i16 [[O2]]
-;
- %p1 = getelementptr i4, ptr %p, i32 2
- %p2 = getelementptr i4, ptr %p, i32 3
- %l1 = load i8, ptr %p
- %l2 = load i4, ptr %p1
- %l3 = load i4, ptr %p2
-
- %e1 = zext i8 %l1 to i16
- %e2 = zext i4 %l2 to i16
- %e3 = zext i4 %l3 to i16
-
- %s2 = shl i16 %e2, 8
- %s3 = shl i16 %e3, 12
-
- %o1 = or i16 %e1, %s2
- %o2 = or i16 %o1, %s3
- ret i16 %o2
-}
-
-define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[L1:%.*]] = load i4, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i28, ptr [[P1]], align 4
-; ALL-NEXT: [[E1:%.*]] = zext i4 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i28 [[L2]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 4
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: ret i32 [[O1]]
-;
- %p1 = getelementptr i4, ptr %p, i32 1
- %l1 = load i4, ptr %p
- %l2 = load i28, ptr %p1
- %e1 = zext i4 %l1 to i32
- %e2 = zext i28 %l2 to i32
- %s2 = shl i32 %e2, 4
- %o1 = or i32 %e1, %s2
- ret i32 %o1
-}
-
-define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size2(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size2(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i4, ptr [[P:%.*]], i32 7
-; ALL-NEXT: [[L1:%.*]] = load i28, ptr [[P]], align 4
-; ALL-NEXT: [[L2:%.*]] = load i4, ptr [[P1]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i28 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i4 [[L2]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 28
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: ret i32 [[O1]]
-;
- %p1 = getelementptr i4, ptr %p, i32 7
- %l1 = load i28, ptr %p
- %l2 = load i4, ptr %p1
- %e1 = zext i28 %l1 to i32
- %e2 = zext i4 %l2 to i32
- %s2 = shl i32 %e2, 28
- %o1 = or i32 %e1, %s2
- ret i32 %o1
-}
-
-define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size3(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size3(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i1, ptr [[P:%.*]], i32 23
-; ALL-NEXT: [[L1:%.*]] = load i23, ptr [[P]], align 4
-; ALL-NEXT: [[L2:%.*]] = load i9, ptr [[P1]], align 2
-; ALL-NEXT: [[E1:%.*]] = zext i23 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i9 [[L2]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: ret i32 [[O1]]
-;
- %p1 = getelementptr i1, ptr %p, i32 23
- %l1 = load i23, ptr %p
- %l2 = load i9, ptr %p1
- %e1 = zext i23 %l1 to i32
- %e2 = zext i9 %l2 to i32
- %s2 = shl i32 %e2, 24
- %o1 = or i32 %e1, %s2
- ret i32 %o1
-}
-
-define i32 @loadCombine_2consecutive_mixsize_not_equal_store_size4(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_mixsize_not_equal_store_size4(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i1, ptr [[P:%.*]], i32 9
-; ALL-NEXT: [[L1:%.*]] = load i9, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i23, ptr [[P1]], align 4
-; ALL-NEXT: [[E1:%.*]] = zext i9 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i23 [[L2]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: ret i32 [[O1]]
-;
- %p1 = getelementptr i1, ptr %p, i32 9
- %l1 = load i9, ptr %p
- %l2 = load i23, ptr %p1
- %e1 = zext i9 %l1 to i32
- %e2 = zext i23 %l2 to i32
- %s2 = shl i32 %e2, 24
- %o1 = or i32 %e1, %s2
- ret i32 %o1
-}
-
-define i32 @loadCombine_2consecutive_mixsize_not_power_of_two(ptr %p) {
-; LE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two(
-; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
-; LE-NEXT: ret i32 [[L1]]
-;
-; BE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two(
-; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; BE-NEXT: [[L2:%.*]] = load i24, ptr [[P1]], align 4
-; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; BE-NEXT: [[E2:%.*]] = zext i24 [[L2]] to i32
-; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; BE-NEXT: ret i32 [[O1]]
-;
- %p1 = getelementptr i8, ptr %p, i32 1
- %l1 = load i8, ptr %p
- %l2 = load i24, ptr %p1
- %e1 = zext i8 %l1 to i32
- %e2 = zext i24 %l2 to i32
- %s2 = shl i32 %e2, 8
- %o1 = or i32 %e1, %s2
- ret i32 %o1
-}
-
-define i32 @loadCombine_2consecutive_mixsize_not_power_of_two2(ptr %p) {
-; LE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
-; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
-; LE-NEXT: ret i32 [[L1]]
-;
-; BE-LABEL: @loadCombine_2consecutive_mixsize_not_power_of_two2(
-; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3
-; BE-NEXT: [[L1:%.*]] = load i24, ptr [[P]], align 4
-; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; BE-NEXT: [[E1:%.*]] = zext i24 [[L1]] to i32
-; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 24
-; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; BE-NEXT: ret i32 [[O1]]
-;
- %p1 = getelementptr i8, ptr %p, i32 3
- %l1 = load i24, ptr %p
- %l2 = load i8, ptr %p1
- %e1 = zext i24 %l1 to i32
- %e2 = zext i8 %l2 to i32
- %s2 = shl i32 %e2, 24
- %o1 = or i32 %e1, %s2
- ret i32 %o1
-}
-
-define i32 @loadCombine_2consecutive_sum_size_not_legal(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_sum_size_not_legal(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
-; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: ret i32 [[O1]]
-;
- %p1 = getelementptr i8, ptr %p, i32 2
- %l1 = load i16, ptr %p
- %l2 = load i8, ptr %p1
- %e1 = zext i16 %l1 to i32
- %e2 = zext i8 %l2 to i32
- %s2 = shl i32 %e2, 16
- %o1 = or i32 %e1, %s2
- ret i32 %o1
-}
-
-define i32 @loadCombine_2consecutive_sum_size_not_legal2(ptr %p) {
-; ALL-LABEL: @loadCombine_2consecutive_sum_size_not_legal2(
-; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
-; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i32
-; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
-; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
-; ALL-NEXT: ret i32 [[O1]]
-;
- %p1 = getelementptr i8, ptr %p, i32 1
- %l1 = load i8, ptr %p
- %l2 = load i16, ptr %p1
- %e1 = zext i8 %l1 to i32
- %e2 = zext i16 %l2 to i32
- %s2 = shl i32 %e2, 8
- %o1 = or i32 %e1, %s2
- ret i32 %o1
-}
-
-define i64 @loadCombine_8consecutive_mixsize(ptr %p) {
-; LE-LABEL: @loadCombine_8consecutive_mixsize(
-; LE-NEXT: [[O3:%.*]] = load i64, ptr [[P:%.*]], align 1
-; LE-NEXT: ret i64 [[O3]]
-;
-; BE-LABEL: @loadCombine_8consecutive_mixsize(
-; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 4
-; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
-; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
-; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
-; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
-; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
-; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i64
-; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
-; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
-; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
-; BE-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
-; BE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
-; BE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
-; BE-NEXT: ret i64 [[O3]]
-;
- %p1 = getelementptr i8, ptr %p, i64 1
- %p2 = getelementptr i8, ptr %p, i64 2
- %p3 = getelementptr i8, ptr %p, i64 4
- %l1 = load i8, ptr %p
- %l2 = load i8, ptr %p1
- %l3 = load i16, ptr %p2
- %l4 = load i32, ptr %p3
-
- %e1 = zext i8 %l1 to i64
- %e2 = zext i8 %l2 to i64
- %e3 = zext i16 %l3 to i64
- %e4 = zext i32 %l4 to i64
-
- %s2 = shl i64 %e2, 8
- %s3 = shl i64 %e3, 16
- %s4 = shl i64 %e4, 32
-
- %o1 = or i64 %e1, %s2
- %o2 = or i64 %o1, %s3
- %o3 = or i64 %o2, %s4
- ret i64 %o3
-}
-
define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) {
; LE-LABEL: @loadCombine_4consecutive_lower_index_comes_before(
; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
>From 79aa2da67000ba608c0e5235c7913e466370e5a4 Mon Sep 17 00:00:00 2001
From: Cancelll <639610709 at qq.com>
Date: Wed, 5 Mar 2025 16:32:06 +0800
Subject: [PATCH 12/12] [AggressiveInstCombine] Fix the format issue. NFC.
---
.../Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 2dd14e54b7568..6b0f568864fd5 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -682,7 +682,8 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
return false;
// Make sure that there are no padding bits.
- if (!DL.typeSizeEqualsStoreSize(LI1->getType()) || !DL.typeSizeEqualsStoreSize(LI2->getType()))
+ if (!DL.typeSizeEqualsStoreSize(LI1->getType()) ||
+ !DL.typeSizeEqualsStoreSize(LI2->getType()))
return false;
// Alias Analysis to check for stores b/w the loads.
More information about the llvm-commits
mailing list