[llvm] [AggressiveInstCombine] Support store merge with non-consecutive parts (PR #149807)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 05:43:07 PDT 2025
https://github.com/nikic created https://github.com/llvm/llvm-project/pull/149807
This is a minor extension of #147540, resolving one of the FIXMEs. If the collected parts contain some non-consecutive elements, we can still handle smaller ranges that *are* consecutive.
This is not common in practice and mostly shows up when the same value is stored at two different offsets.
llvm-opt-benchmark results: https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2593
>From d3f21ab3bfb87b89b913dce44226731623818cc8 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Mon, 21 Jul 2025 12:56:23 +0200
Subject: [PATCH] [AggressiveInstCombine] Support store merge with
non-consecutive parts
This is a minor extension of #147540, resolving one of the FIXMEs.
If the collected parts contain some non-consecutive elements, we
can still handle smaller ranges that *are* consecutive.
This is not common in practice and mostly shows up when the same
value is stored at two different offsets.
llvm-opt-benchmark results: https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2593
---
.../AggressiveInstCombine.cpp | 60 +++++++----
.../AggressiveInstCombine/X86/store-merge.ll | 99 +++++++++++++++++++
2 files changed, 138 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index a3a0e31f887ab..7fa6e6c5161cf 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -886,35 +886,20 @@ static std::optional<PartStore> matchPartStore(Instruction &I,
return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
}
-static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,
- const DataLayout &DL, TargetTransformInfo &TTI) {
+static bool mergeConsecutivePartStores(ArrayRef<PartStore> Parts,
+ unsigned Width, const DataLayout &DL,
+ TargetTransformInfo &TTI) {
if (Parts.size() < 2)
return false;
- // We now have multiple parts of the same value stored to the same pointer.
- // Sort the parts by pointer offset, and make sure they are consistent with
- // the value offsets. Also check that the value is fully covered without
- // overlaps.
- // FIXME: We could support merging stores for only part of the value here.
- llvm::sort(Parts);
- int64_t LastEndOffsetFromFirst = 0;
- const PartStore &First = Parts[0];
- for (const PartStore &Part : Parts) {
- APInt PtrOffsetFromFirst = Part.PtrOffset - First.PtrOffset;
- int64_t ValOffsetFromFirst = Part.ValOffset - First.ValOffset;
- if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
- LastEndOffsetFromFirst != ValOffsetFromFirst)
- return false;
- LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
- }
-
// Check whether combining the stores is profitable.
// FIXME: We could generate smaller stores if we can't produce a large one.
+ const PartStore &First = Parts.front();
LLVMContext &Ctx = First.Store->getContext();
- Type *NewTy = Type::getIntNTy(Ctx, LastEndOffsetFromFirst);
+ Type *NewTy = Type::getIntNTy(Ctx, Width);
unsigned Fast = 0;
if (!TTI.isTypeLegal(NewTy) ||
- !TTI.allowsMisalignedMemoryAccesses(Ctx, LastEndOffsetFromFirst,
+ !TTI.allowsMisalignedMemoryAccesses(Ctx, Width,
First.Store->getPointerAddressSpace(),
First.Store->getAlign(), &Fast) ||
!Fast)
@@ -941,6 +926,39 @@ static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,
return true;
}
+static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,
+ const DataLayout &DL, TargetTransformInfo &TTI) {
+ if (Parts.size() < 2)
+ return false;
+
+ // We now have multiple parts of the same value stored to the same pointer.
+ // Sort the parts by pointer offset, and make sure they are consistent with
+ // the value offsets. Also check that the value is fully covered without
+ // overlaps.
+ bool Changed = false;
+ llvm::sort(Parts);
+ int64_t LastEndOffsetFromFirst = 0;
+ const PartStore *First = &Parts[0];
+ for (const PartStore &Part : Parts) {
+ APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;
+ int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;
+ if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
+ LastEndOffsetFromFirst != ValOffsetFromFirst) {
+ Changed |= mergeConsecutivePartStores(ArrayRef(First, &Part),
+ LastEndOffsetFromFirst, DL, TTI);
+ First = &Part;
+ LastEndOffsetFromFirst = Part.ValWidth;
+ continue;
+ }
+
+ LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
+ }
+
+ Changed |= mergeConsecutivePartStores(ArrayRef(First, Parts.end()),
+ LastEndOffsetFromFirst, DL, TTI);
+ return Changed;
+}
+
static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL,
TargetTransformInfo &TTI, AliasAnalysis &AA) {
// FIXME: Add big endian support.
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll
index 38a55e1566a77..4ab8d18eb69b5 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll
@@ -792,6 +792,105 @@ define void @test_i32_tbaa(i32 %x, ptr %p) {
ret void
}
+define void @test_multiple_parts_with_gap1(i32 %x, ptr %p) {
+; CHECK-LABEL: define void @test_multiple_parts_with_gap1(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X]] to i16
+; CHECK-NEXT: store i16 [[TMP1]], ptr [[P]], align 1
+; CHECK-NEXT: [[SHR_3:%.*]] = lshr i32 [[X]], 24
+; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[SHR_3]] to i8
+; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[P]], i64 3
+; CHECK-NEXT: store i8 [[X_3]], ptr [[GEP_3]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i32 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.1 = lshr i32 %x, 8
+ %x.1 = trunc i32 %shr.1 to i8
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.1, ptr %gep.1
+ %shr.3 = lshr i32 %x, 24
+ %x.3 = trunc i32 %shr.3 to i8
+ %gep.3 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.3, ptr %gep.3
+ ret void
+}
+
+define void @test_multiple_parts_with_gap2(i32 %x, ptr %p) {
+; CHECK-LABEL: define void @test_multiple_parts_with_gap2(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i8
+; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 16
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT: store i16 [[TMP2]], ptr [[GEP_2]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i32 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.2 = lshr i32 %x, 16
+ %x.2 = trunc i32 %shr.2 to i8
+ %gep.2 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.2, ptr %gep.2
+ %shr.3 = lshr i32 %x, 24
+ %x.3 = trunc i32 %shr.3 to i8
+ %gep.3 = getelementptr i8, ptr %p, i64 2
+ store i8 %x.3, ptr %gep.3
+ ret void
+}
+
+define void @test_multiple_parts_with_gap3(i64 %x, ptr %p) {
+; CHECK-LABEL: define void @test_multiple_parts_with_gap3(
+; CHECK-SAME: i64 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i16
+; CHECK-NEXT: store i16 [[TMP1]], ptr [[P]], align 1
+; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[P]], i64 3
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[X]], 24
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i16
+; CHECK-NEXT: store i16 [[TMP3]], ptr [[GEP_3]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i64 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.1 = lshr i64 %x, 8
+ %x.1 = trunc i64 %shr.1 to i8
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.1, ptr %gep.1
+ %shr.3 = lshr i64 %x, 24
+ %x.3 = trunc i64 %shr.3 to i8
+ %gep.3 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.3, ptr %gep.3
+ %shr.4 = lshr i64 %x, 32
+ %x.4 = trunc i64 %shr.4 to i8
+ %gep.4 = getelementptr i8, ptr %p, i64 4
+ store i8 %x.4, ptr %gep.4
+ ret void
+}
+
+define void @test_store_same_parts_twice(i32 %x, ptr %p) {
+; CHECK-LABEL: define void @test_store_same_parts_twice(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X]] to i16
+; CHECK-NEXT: store i16 [[TMP1]], ptr [[P]], align 1
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[X]] to i16
+; CHECK-NEXT: store i16 [[TMP2]], ptr [[GEP_2]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i32 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.1 = lshr i32 %x, 8
+ %x.1 = trunc i32 %shr.1 to i8
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.1, ptr %gep.1
+ %gep.2 = getelementptr i8, ptr %p, i64 2
+ store i8 %x.0, ptr %gep.2
+ %gep.3 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.1, ptr %gep.3
+ ret void
+}
+
!0 = !{!1}
!1 = !{!1, !2}
!2 = !{!2}
More information about the llvm-commits
mailing list