[llvm] 1dd7e57 - Add reverse load tests to test load combine patch
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 28 08:52:15 PDT 2022
Author: bipmis
Date: 2022-09-28T16:51:23+01:00
New Revision: 1dd7e576d77fb4a76b8fba265c8319adb7b9b7cb
URL: https://github.com/llvm/llvm-project/commit/1dd7e576d77fb4a76b8fba265c8319adb7b9b7cb
DIFF: https://github.com/llvm/llvm-project/commit/1dd7e576d77fb4a76b8fba265c8319adb7b9b7cb.diff
LOG: Add reverse load tests to test load combine patch
Added:
Modified:
llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 59a459e8243a0..fc1de92b22bee 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -46,6 +46,30 @@ define i16 @loadCombine_2consecutive_BE(ptr %p) {
}
define i32 @loadCombine_4consecutive(ptr %p) {
+; LE-LABEL: @loadCombine_4consecutive(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
+;
; ALL-LABEL: @loadCombine_4consecutive(
; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
@@ -90,6 +114,30 @@ define i32 @loadCombine_4consecutive(ptr %p) {
}
define i32 @loadCombine_4consecutive_BE(ptr %p) {
+; LE-LABEL: @loadCombine_4consecutive_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_BE(
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT: ret i32 [[L1]]
+;
; ALL-LABEL: @loadCombine_4consecutive_BE(
; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
@@ -134,6 +182,32 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) {
}
define i32 @loadCombine_4consecutive_alias(ptr %p) {
+; LE-LABEL: @loadCombine_4consecutive_alias(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; LE-NEXT: store i8 10, ptr [[P]], align 1
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; BE-NEXT: store i8 10, ptr [[P]], align 1
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
+;
; ALL-LABEL: @loadCombine_4consecutive_alias(
; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
@@ -180,6 +254,32 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) {
}
define i32 @loadCombine_4consecutive_alias_BE(ptr %p) {
+; LE-LABEL: @loadCombine_4consecutive_alias_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; LE-NEXT: store i8 10, ptr [[P]], align 1
+; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24
+; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16
+; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8
+; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]]
+; LE-NEXT: ret i32 [[O3]]
+;
+; BE-LABEL: @loadCombine_4consecutive_alias_BE(
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; BE-NEXT: store i8 10, ptr [[P]], align 1
+; BE-NEXT: ret i32 [[L1]]
+;
; ALL-LABEL: @loadCombine_4consecutive_alias_BE(
; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
@@ -711,6 +811,41 @@ define i32 @loadCombine_parLoad1(ptr %p) {
}
define i128 @loadCombine_i128(ptr %p) {
+; LE-LABEL: @loadCombine_i128(
+; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4
+; LE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64
+; LE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96
+; LE-NEXT: [[O2:%.*]] = or i128 [[TMP1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; LE-NEXT: ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128(
+; BE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
+; BE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
+; BE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
+; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 32
+; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64
+; BE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96
+; BE-NEXT: [[O1:%.*]] = or i128 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]]
+; BE-NEXT: ret i128 [[O3]]
+;
; ALL-LABEL: @loadCombine_i128(
; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
@@ -755,6 +890,41 @@ define i128 @loadCombine_i128(ptr %p) {
}
define i128 @loadCombine_i128_BE(ptr %p) {
+; LE-LABEL: @loadCombine_i128_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4
+; LE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4
+; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; LE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128
+; LE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128
+; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; LE-NEXT: [[S1:%.*]] = shl i128 [[E1]], 96
+; LE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 64
+; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32
+; LE-NEXT: [[O1:%.*]] = or i128 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; LE-NEXT: ret i128 [[O3]]
+;
+; BE-LABEL: @loadCombine_i128_BE(
+; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4
+; BE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128
+; BE-NEXT: [[TMP2:%.*]] = shl i128 [[TMP1]], 64
+; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4
+; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4
+; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128
+; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128
+; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32
+; BE-NEXT: [[O2:%.*]] = or i128 [[TMP2]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]]
+; BE-NEXT: ret i128 [[O3]]
+;
; ALL-LABEL: @loadCombine_i128_BE(
; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2
@@ -799,6 +969,30 @@ define i128 @loadCombine_i128_BE(ptr %p) {
}
define i64 @loadCombine_i64(ptr %p) {
+; LE-LABEL: @loadCombine_i64(
+; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; LE-NEXT: ret i64 [[L1]]
+;
+; BE-LABEL: @loadCombine_i64(
+; BE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; BE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
+; BE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
+; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; BE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
+; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 16
+; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 32
+; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 48
+; BE-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]]
+; BE-NEXT: ret i64 [[O3]]
+;
; ALL-LABEL: @loadCombine_i64(
; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
@@ -843,6 +1037,30 @@ define i64 @loadCombine_i64(ptr %p) {
}
define i64 @loadCombine_i64_BE(ptr %p) {
+; LE-LABEL: @loadCombine_i64_BE(
+; LE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; LE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2
+; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2
+; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2
+; LE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2
+; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64
+; LE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64
+; LE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64
+; LE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64
+; LE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 48
+; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 32
+; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; LE-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]]
+; LE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]]
+; LE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]]
+; LE-NEXT: ret i64 [[O3]]
+;
+; BE-LABEL: @loadCombine_i64_BE(
+; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2
+; BE-NEXT: ret i64 [[L1]]
+;
; ALL-LABEL: @loadCombine_i64_BE(
; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2
@@ -975,6 +1193,52 @@ define i16 @loadCombine_2consecutive_separateptr(ptr %p, ptr %p2) {
}
define i64 @load64_farLoads(ptr %ptr) {
+; LE-LABEL: @load64_farLoads(
+; LE-NEXT: entry:
+; LE-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1
+; LE-NEXT: ret i64 [[TMP0]]
+;
+; BE-LABEL: @load64_farLoads(
+; BE-NEXT: entry:
+; BE-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
+; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64
+; BE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
+; BE-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; BE-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64
+; BE-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 8
+; BE-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[SHL]]
+; BE-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
+; BE-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
+; BE-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64
+; BE-NEXT: [[SHL5:%.*]] = shl i64 [[CONV4]], 16
+; BE-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]]
+; BE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
+; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
+; BE-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64
+; BE-NEXT: [[SHL9:%.*]] = shl i64 [[CONV8]], 24
+; BE-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]]
+; BE-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
+; BE-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1
+; BE-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64
+; BE-NEXT: [[SHL13:%.*]] = shl i64 [[CONV12]], 32
+; BE-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]]
+; BE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
+; BE-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
+; BE-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64
+; BE-NEXT: [[SHL17:%.*]] = shl i64 [[CONV16]], 40
+; BE-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]]
+; BE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
+; BE-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1
+; BE-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64
+; BE-NEXT: [[SHL21:%.*]] = shl i64 [[CONV20]], 48
+; BE-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]]
+; BE-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7
+; BE-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1
+; BE-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64
+; BE-NEXT: [[SHL25:%.*]] = shl i64 [[CONV24]], 56
+; BE-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]]
+; BE-NEXT: ret i64 [[OR26]]
+;
; ALL-LABEL: @load64_farLoads(
; ALL-NEXT: entry:
; ALL-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
@@ -1058,6 +1322,32 @@ entry:
}
define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) {
+; LE-LABEL: @loadCombine_4consecutive_metadata(
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0
+; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; LE-NEXT: ret i32 [[L1]]
+;
+; BE-LABEL: @loadCombine_4consecutive_metadata(
+; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
+; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
+; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
+; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
+; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]]
+; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]]
+; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
+; BE-NEXT: ret i32 [[O3]]
+;
; ALL-LABEL: @loadCombine_4consecutive_metadata(
; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
@@ -1151,3 +1441,296 @@ define i16 @loadCombine_4consecutive_4bit(ptr %p) {
%o3 = or i16 %o2, %s4
ret i16 %o3
}
+
+define i32 @loadCombine_4consecutive_rev(ptr %p) {
+; ALL-LABEL: @loadCombine_4consecutive_rev(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]]
+; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]]
+; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]]
+; ALL-NEXT: ret i32 [[O3]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 2
+ %p3 = getelementptr i8, ptr %p, i32 3
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i8, ptr %p2
+ %l4 = load i8, ptr %p3
+
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i8 %l2 to i32
+ %e3 = zext i8 %l3 to i32
+ %e4 = zext i8 %l4 to i32
+
+ %s2 = shl i32 %e2, 8
+ %s3 = shl i32 %e3, 16
+ %s4 = shl i32 %e4, 24
+
+ %o1 = or i32 %s4, %s3
+ %o2 = or i32 %o1, %s2
+ %o3 = or i32 %o2, %e1
+ ret i32 %o3
+}
+
+define i64 @loadCombine_8consecutive_rev(ptr %p) {
+; ALL-LABEL: @loadCombine_8consecutive_rev(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
+; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
+; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
+; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
+; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1
+; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
+; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
+; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
+; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
+; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
+; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64
+; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64
+; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
+; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24
+; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32
+; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40
+; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 48
+; ALL-NEXT: [[S8:%.*]] = shl i64 [[E8]], 56
+; ALL-NEXT: [[O7:%.*]] = or i64 [[S8]], [[S7]]
+; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]]
+; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
+; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
+; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
+; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
+; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]]
+; ALL-NEXT: ret i64 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 2
+ %p3 = getelementptr i8, ptr %p, i32 3
+ %p4 = getelementptr i8, ptr %p, i32 4
+ %p5 = getelementptr i8, ptr %p, i32 5
+ %p6 = getelementptr i8, ptr %p, i32 6
+ %p7 = getelementptr i8, ptr %p, i32 7
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i8, ptr %p2
+ %l4 = load i8, ptr %p3
+ %l5 = load i8, ptr %p4
+ %l6 = load i8, ptr %p5
+ %l7 = load i8, ptr %p6
+ %l8 = load i8, ptr %p7
+
+ %e1 = zext i8 %l1 to i64
+ %e2 = zext i8 %l2 to i64
+ %e3 = zext i8 %l3 to i64
+ %e4 = zext i8 %l4 to i64
+ %e5 = zext i8 %l5 to i64
+ %e6 = zext i8 %l6 to i64
+ %e7 = zext i8 %l7 to i64
+ %e8 = zext i8 %l8 to i64
+
+ %s2 = shl i64 %e2, 8
+ %s3 = shl i64 %e3, 16
+ %s4 = shl i64 %e4, 24
+ %s5 = shl i64 %e5, 32
+ %s6 = shl i64 %e6, 40
+ %s7 = shl i64 %e7, 48
+ %s8 = shl i64 %e8, 56
+
+ %o7 = or i64 %s8, %s7
+ %o6 = or i64 %o7, %s6
+ %o5 = or i64 %o6, %s5
+ %o4 = or i64 %o5, %s4
+ %o3 = or i64 %o4, %s3
+ %o2 = or i64 %o3, %s2
+ %o1 = or i64 %o2, %e1
+ ret i64 %o1
+}
+
+define i64 @loadCombine_8consecutive_rev_BE(ptr %p) {
+; ALL-LABEL: @loadCombine_8consecutive_rev_BE(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
+; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
+; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
+; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
+; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1
+; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
+; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
+; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
+; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
+; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
+; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64
+; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64
+; ALL-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56
+; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48
+; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40
+; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
+; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24
+; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16
+; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 8
+; ALL-NEXT: [[O7:%.*]] = or i64 [[E8]], [[S7]]
+; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]]
+; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
+; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
+; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
+; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
+; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]]
+; ALL-NEXT: ret i64 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 2
+ %p3 = getelementptr i8, ptr %p, i32 3
+ %p4 = getelementptr i8, ptr %p, i32 4
+ %p5 = getelementptr i8, ptr %p, i32 5
+ %p6 = getelementptr i8, ptr %p, i32 6
+ %p7 = getelementptr i8, ptr %p, i32 7
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i8, ptr %p2
+ %l4 = load i8, ptr %p3
+ %l5 = load i8, ptr %p4
+ %l6 = load i8, ptr %p5
+ %l7 = load i8, ptr %p6
+ %l8 = load i8, ptr %p7
+
+ %e1 = zext i8 %l1 to i64
+ %e2 = zext i8 %l2 to i64
+ %e3 = zext i8 %l3 to i64
+ %e4 = zext i8 %l4 to i64
+ %e5 = zext i8 %l5 to i64
+ %e6 = zext i8 %l6 to i64
+ %e7 = zext i8 %l7 to i64
+ %e8 = zext i8 %l8 to i64
+
+ %s1 = shl i64 %e1, 56
+ %s2 = shl i64 %e2, 48
+ %s3 = shl i64 %e3, 40
+ %s4 = shl i64 %e4, 32
+ %s5 = shl i64 %e5, 24
+ %s6 = shl i64 %e6, 16
+ %s7 = shl i64 %e7, 8
+
+ %o7 = or i64 %e8, %s7
+ %o6 = or i64 %o7, %s6
+ %o5 = or i64 %o6, %s5
+ %o4 = or i64 %o5, %s4
+ %o3 = or i64 %o4, %s3
+ %o2 = or i64 %o3, %s2
+ %o1 = or i64 %o2, %s1
+ ret i64 %o1
+}
+
+define i64 @eggs(ptr noundef readonly %arg) {
+; ALL-LABEL: @eggs(
+; ALL-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1
+; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
+; ALL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
+; ALL-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
+; ALL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 3
+; ALL-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
+; ALL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 4
+; ALL-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
+; ALL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 5
+; ALL-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
+; ALL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 6
+; ALL-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1
+; ALL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 7
+; ALL-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
+; ALL-NEXT: [[TMP18:%.*]] = zext i8 [[TMP17]] to i64
+; ALL-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 56
+; ALL-NEXT: [[TMP20:%.*]] = zext i8 [[TMP15]] to i64
+; ALL-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP20]], 48
+; ALL-NEXT: [[TMP22:%.*]] = or i64 [[TMP19]], [[TMP21]]
+; ALL-NEXT: [[TMP23:%.*]] = zext i8 [[TMP13]] to i64
+; ALL-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[TMP23]], 40
+; ALL-NEXT: [[TMP25:%.*]] = or i64 [[TMP22]], [[TMP24]]
+; ALL-NEXT: [[TMP26:%.*]] = zext i8 [[TMP11]] to i64
+; ALL-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP26]], 32
+; ALL-NEXT: [[TMP28:%.*]] = or i64 [[TMP25]], [[TMP27]]
+; ALL-NEXT: [[TMP29:%.*]] = zext i8 [[TMP9]] to i64
+; ALL-NEXT: [[TMP30:%.*]] = shl nuw nsw i64 [[TMP29]], 24
+; ALL-NEXT: [[TMP31:%.*]] = or i64 [[TMP28]], [[TMP30]]
+; ALL-NEXT: [[TMP32:%.*]] = zext i8 [[TMP7]] to i64
+; ALL-NEXT: [[TMP33:%.*]] = shl nuw nsw i64 [[TMP32]], 16
+; ALL-NEXT: [[TMP34:%.*]] = zext i8 [[TMP5]] to i64
+; ALL-NEXT: [[TMP35:%.*]] = shl nuw nsw i64 [[TMP34]], 8
+; ALL-NEXT: [[TMP36:%.*]] = or i64 [[TMP31]], [[TMP33]]
+; ALL-NEXT: [[TMP37:%.*]] = zext i8 [[TMP3]] to i64
+; ALL-NEXT: [[TMP38:%.*]] = or i64 [[TMP36]], [[TMP35]]
+; ALL-NEXT: [[TMP39:%.*]] = or i64 [[TMP38]], [[TMP37]]
+; ALL-NEXT: ret i64 [[TMP39]]
+;
+ %tmp3 = load i8, ptr %arg, align 1
+ %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
+ %tmp5 = load i8, ptr %tmp4, align 1
+ %tmp6 = getelementptr inbounds i8, ptr %arg, i64 2
+ %tmp7 = load i8, ptr %tmp6, align 1
+ %tmp8 = getelementptr inbounds i8, ptr %arg, i64 3
+ %tmp9 = load i8, ptr %tmp8, align 1
+ %tmp10 = getelementptr inbounds i8, ptr %arg, i64 4
+ %tmp11 = load i8, ptr %tmp10, align 1
+ %tmp12 = getelementptr inbounds i8, ptr %arg, i64 5
+ %tmp13 = load i8, ptr %tmp12, align 1
+ %tmp14 = getelementptr inbounds i8, ptr %arg, i64 6
+ %tmp15 = load i8, ptr %tmp14, align 1
+ %tmp16 = getelementptr inbounds i8, ptr %arg, i64 7
+ %tmp17 = load i8, ptr %tmp16, align 1
+ %tmp18 = zext i8 %tmp17 to i64
+ %tmp19 = shl nuw i64 %tmp18, 56
+ %tmp20 = zext i8 %tmp15 to i64
+ %tmp21 = shl nuw nsw i64 %tmp20, 48
+ %tmp22 = or i64 %tmp19, %tmp21
+ %tmp23 = zext i8 %tmp13 to i64
+ %tmp24 = shl nuw nsw i64 %tmp23, 40
+ %tmp25 = or i64 %tmp22, %tmp24
+ %tmp26 = zext i8 %tmp11 to i64
+ %tmp27 = shl nuw nsw i64 %tmp26, 32
+ %tmp28 = or i64 %tmp25, %tmp27
+ %tmp29 = zext i8 %tmp9 to i64
+ %tmp30 = shl nuw nsw i64 %tmp29, 24
+ %tmp31 = or i64 %tmp28, %tmp30
+ %tmp32 = zext i8 %tmp7 to i64
+ %tmp33 = shl nuw nsw i64 %tmp32, 16
+ %tmp34 = zext i8 %tmp5 to i64
+ %tmp35 = shl nuw nsw i64 %tmp34, 8
+ %tmp36 = or i64 %tmp31, %tmp33
+ %tmp37 = zext i8 %tmp3 to i64
+ %tmp38 = or i64 %tmp36, %tmp35
+ %tmp39 = or i64 %tmp38, %tmp37
+ ret i64 %tmp39
+}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 9258fa4f59e82..c2cb01279fc50 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1152,3 +1152,296 @@ define i16 @loadCombine_4consecutive_4bit(ptr %p) {
%o3 = or i16 %o2, %s4
ret i16 %o3
}
+
+define i32 @loadCombine_4consecutive_rev(ptr %p) {
+; ALL-LABEL: @loadCombine_4consecutive_rev(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
+; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
+; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
+; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
+; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
+; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
+; ALL-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]]
+; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]]
+; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]]
+; ALL-NEXT: ret i32 [[O3]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 2
+ %p3 = getelementptr i8, ptr %p, i32 3
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i8, ptr %p2
+ %l4 = load i8, ptr %p3
+
+ %e1 = zext i8 %l1 to i32
+ %e2 = zext i8 %l2 to i32
+ %e3 = zext i8 %l3 to i32
+ %e4 = zext i8 %l4 to i32
+
+ %s2 = shl i32 %e2, 8
+ %s3 = shl i32 %e3, 16
+ %s4 = shl i32 %e4, 24
+
+ %o1 = or i32 %s4, %s3
+ %o2 = or i32 %o1, %s2
+ %o3 = or i32 %o2, %e1
+ ret i32 %o3
+}
+
+define i64 @loadCombine_8consecutive_rev(ptr %p) {
+; ALL-LABEL: @loadCombine_8consecutive_rev(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
+; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
+; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
+; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
+; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1
+; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
+; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
+; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
+; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
+; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
+; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64
+; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64
+; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8
+; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16
+; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24
+; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32
+; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40
+; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 48
+; ALL-NEXT: [[S8:%.*]] = shl i64 [[E8]], 56
+; ALL-NEXT: [[O7:%.*]] = or i64 [[S8]], [[S7]]
+; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]]
+; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
+; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
+; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
+; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
+; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]]
+; ALL-NEXT: ret i64 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 2
+ %p3 = getelementptr i8, ptr %p, i32 3
+ %p4 = getelementptr i8, ptr %p, i32 4
+ %p5 = getelementptr i8, ptr %p, i32 5
+ %p6 = getelementptr i8, ptr %p, i32 6
+ %p7 = getelementptr i8, ptr %p, i32 7
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i8, ptr %p2
+ %l4 = load i8, ptr %p3
+ %l5 = load i8, ptr %p4
+ %l6 = load i8, ptr %p5
+ %l7 = load i8, ptr %p6
+ %l8 = load i8, ptr %p7
+
+ %e1 = zext i8 %l1 to i64
+ %e2 = zext i8 %l2 to i64
+ %e3 = zext i8 %l3 to i64
+ %e4 = zext i8 %l4 to i64
+ %e5 = zext i8 %l5 to i64
+ %e6 = zext i8 %l6 to i64
+ %e7 = zext i8 %l7 to i64
+ %e8 = zext i8 %l8 to i64
+
+ %s2 = shl i64 %e2, 8
+ %s3 = shl i64 %e3, 16
+ %s4 = shl i64 %e4, 24
+ %s5 = shl i64 %e5, 32
+ %s6 = shl i64 %e6, 40
+ %s7 = shl i64 %e7, 48
+ %s8 = shl i64 %e8, 56
+
+ %o7 = or i64 %s8, %s7
+ %o6 = or i64 %o7, %s6
+ %o5 = or i64 %o6, %s5
+ %o4 = or i64 %o5, %s4
+ %o3 = or i64 %o4, %s3
+ %o2 = or i64 %o3, %s2
+ %o1 = or i64 %o2, %e1
+ ret i64 %o1
+}
+
+define i64 @loadCombine_8consecutive_rev_BE(ptr %p) {
+; ALL-LABEL: @loadCombine_8consecutive_rev_BE(
+; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5
+; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6
+; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7
+; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
+; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
+; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
+; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
+; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1
+; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1
+; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1
+; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1
+; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64
+; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64
+; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64
+; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64
+; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64
+; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64
+; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64
+; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64
+; ALL-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56
+; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48
+; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40
+; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32
+; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24
+; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16
+; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 8
+; ALL-NEXT: [[O7:%.*]] = or i64 [[E8]], [[S7]]
+; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]]
+; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]]
+; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]]
+; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]]
+; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]]
+; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]]
+; ALL-NEXT: ret i64 [[O1]]
+;
+ %p1 = getelementptr i8, ptr %p, i32 1
+ %p2 = getelementptr i8, ptr %p, i32 2
+ %p3 = getelementptr i8, ptr %p, i32 3
+ %p4 = getelementptr i8, ptr %p, i32 4
+ %p5 = getelementptr i8, ptr %p, i32 5
+ %p6 = getelementptr i8, ptr %p, i32 6
+ %p7 = getelementptr i8, ptr %p, i32 7
+ %l1 = load i8, ptr %p
+ %l2 = load i8, ptr %p1
+ %l3 = load i8, ptr %p2
+ %l4 = load i8, ptr %p3
+ %l5 = load i8, ptr %p4
+ %l6 = load i8, ptr %p5
+ %l7 = load i8, ptr %p6
+ %l8 = load i8, ptr %p7
+
+ %e1 = zext i8 %l1 to i64
+ %e2 = zext i8 %l2 to i64
+ %e3 = zext i8 %l3 to i64
+ %e4 = zext i8 %l4 to i64
+ %e5 = zext i8 %l5 to i64
+ %e6 = zext i8 %l6 to i64
+ %e7 = zext i8 %l7 to i64
+ %e8 = zext i8 %l8 to i64
+
+ %s1 = shl i64 %e1, 56
+ %s2 = shl i64 %e2, 48
+ %s3 = shl i64 %e3, 40
+ %s4 = shl i64 %e4, 32
+ %s5 = shl i64 %e5, 24
+ %s6 = shl i64 %e6, 16
+ %s7 = shl i64 %e7, 8
+
+ %o7 = or i64 %e8, %s7
+ %o6 = or i64 %o7, %s6
+ %o5 = or i64 %o6, %s5
+ %o4 = or i64 %o5, %s4
+ %o3 = or i64 %o4, %s3
+ %o2 = or i64 %o3, %s2
+ %o1 = or i64 %o2, %s1
+ ret i64 %o1
+}
+
+define i64 @eggs(ptr noundef readonly %arg) {
+; ALL-LABEL: @eggs(
+; ALL-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1
+; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
+; ALL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
+; ALL-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
+; ALL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 3
+; ALL-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
+; ALL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 4
+; ALL-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
+; ALL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 5
+; ALL-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
+; ALL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 6
+; ALL-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1
+; ALL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 7
+; ALL-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
+; ALL-NEXT: [[TMP18:%.*]] = zext i8 [[TMP17]] to i64
+; ALL-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 56
+; ALL-NEXT: [[TMP20:%.*]] = zext i8 [[TMP15]] to i64
+; ALL-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP20]], 48
+; ALL-NEXT: [[TMP22:%.*]] = or i64 [[TMP19]], [[TMP21]]
+; ALL-NEXT: [[TMP23:%.*]] = zext i8 [[TMP13]] to i64
+; ALL-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[TMP23]], 40
+; ALL-NEXT: [[TMP25:%.*]] = or i64 [[TMP22]], [[TMP24]]
+; ALL-NEXT: [[TMP26:%.*]] = zext i8 [[TMP11]] to i64
+; ALL-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP26]], 32
+; ALL-NEXT: [[TMP28:%.*]] = or i64 [[TMP25]], [[TMP27]]
+; ALL-NEXT: [[TMP29:%.*]] = zext i8 [[TMP9]] to i64
+; ALL-NEXT: [[TMP30:%.*]] = shl nuw nsw i64 [[TMP29]], 24
+; ALL-NEXT: [[TMP31:%.*]] = or i64 [[TMP28]], [[TMP30]]
+; ALL-NEXT: [[TMP32:%.*]] = zext i8 [[TMP7]] to i64
+; ALL-NEXT: [[TMP33:%.*]] = shl nuw nsw i64 [[TMP32]], 16
+; ALL-NEXT: [[TMP34:%.*]] = zext i8 [[TMP5]] to i64
+; ALL-NEXT: [[TMP35:%.*]] = shl nuw nsw i64 [[TMP34]], 8
+; ALL-NEXT: [[TMP36:%.*]] = or i64 [[TMP31]], [[TMP33]]
+; ALL-NEXT: [[TMP37:%.*]] = zext i8 [[TMP3]] to i64
+; ALL-NEXT: [[TMP38:%.*]] = or i64 [[TMP36]], [[TMP35]]
+; ALL-NEXT: [[TMP39:%.*]] = or i64 [[TMP38]], [[TMP37]]
+; ALL-NEXT: ret i64 [[TMP39]]
+;
+ %tmp3 = load i8, ptr %arg, align 1
+ %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
+ %tmp5 = load i8, ptr %tmp4, align 1
+ %tmp6 = getelementptr inbounds i8, ptr %arg, i64 2
+ %tmp7 = load i8, ptr %tmp6, align 1
+ %tmp8 = getelementptr inbounds i8, ptr %arg, i64 3
+ %tmp9 = load i8, ptr %tmp8, align 1
+ %tmp10 = getelementptr inbounds i8, ptr %arg, i64 4
+ %tmp11 = load i8, ptr %tmp10, align 1
+ %tmp12 = getelementptr inbounds i8, ptr %arg, i64 5
+ %tmp13 = load i8, ptr %tmp12, align 1
+ %tmp14 = getelementptr inbounds i8, ptr %arg, i64 6
+ %tmp15 = load i8, ptr %tmp14, align 1
+ %tmp16 = getelementptr inbounds i8, ptr %arg, i64 7
+ %tmp17 = load i8, ptr %tmp16, align 1
+ %tmp18 = zext i8 %tmp17 to i64
+ %tmp19 = shl nuw i64 %tmp18, 56
+ %tmp20 = zext i8 %tmp15 to i64
+ %tmp21 = shl nuw nsw i64 %tmp20, 48
+ %tmp22 = or i64 %tmp19, %tmp21
+ %tmp23 = zext i8 %tmp13 to i64
+ %tmp24 = shl nuw nsw i64 %tmp23, 40
+ %tmp25 = or i64 %tmp22, %tmp24
+ %tmp26 = zext i8 %tmp11 to i64
+ %tmp27 = shl nuw nsw i64 %tmp26, 32
+ %tmp28 = or i64 %tmp25, %tmp27
+ %tmp29 = zext i8 %tmp9 to i64
+ %tmp30 = shl nuw nsw i64 %tmp29, 24
+ %tmp31 = or i64 %tmp28, %tmp30
+ %tmp32 = zext i8 %tmp7 to i64
+ %tmp33 = shl nuw nsw i64 %tmp32, 16
+ %tmp34 = zext i8 %tmp5 to i64
+ %tmp35 = shl nuw nsw i64 %tmp34, 8
+ %tmp36 = or i64 %tmp31, %tmp33
+ %tmp37 = zext i8 %tmp3 to i64
+ %tmp38 = or i64 %tmp36, %tmp35
+ %tmp39 = or i64 %tmp38, %tmp37
+ ret i64 %tmp39
+}
More information about the llvm-commits
mailing list