[llvm] [SROA] Add Stored Value Size Check for Tree-Structured Merge (PR #162921)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 10 14:12:03 PDT 2025
https://github.com/Chengjunp created https://github.com/llvm/llvm-project/pull/162921
The change fixes a bug in the SROA where tree-structured merge optimization was incorrectly applied when the size of the stored value was not a multiple of the new allocated element type size. A simple repro would be
```
define <1 x i32> @foo(<1 x i16> %a, <1 x i16> %b) {
entry:
%alloca = alloca [1 x i32]
%ptr0 = getelementptr inbounds [2 x i16], ptr %alloca, i32 0, i32 0
store <1 x i16> %a, ptr %ptr0
%ptr1 = getelementptr inbounds [2 x i16], ptr %alloca, i32 0, i32 1
store <1 x i16> %b, ptr %ptr1
%result = load <1 x i32>, ptr %alloca
ret <1 x i32> %result
}
```
Currently, this will lead to a compile time crash.
In this change, we will skip the tree-structured merge for this case and fall back to normal SROA.
>From db3efa1452151fc3a1c42bb966f7dc7a877b9c29 Mon Sep 17 00:00:00 2001
From: chengjunp <chengjunp at nvidia.com>
Date: Fri, 10 Oct 2025 21:11:08 +0000
Subject: [PATCH] Fix SROA issue
---
llvm/lib/Transforms/Scalar/SROA.cpp | 8 ++++++++
...vector-promotion-cannot-tree-structure-merge.ll | 14 ++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 45d3d493a9e68..dc24adab42a3b 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2961,6 +2961,7 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
isa<FixedVectorType>(NewAI.getAllocatedType())
? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType()
: Type::getInt8Ty(NewAI.getContext());
+ unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
// Helper to check if a type is
// 1. A fixed vector type
@@ -2991,9 +2992,16 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
// Do not handle the case if
// 1. The store does not meet the conditions in the helper function
// 2. The store is volatile
+ // 3. The store value type size is less than the allocated element
+ // type size
if (!IsTypeValidForTreeStructuredMerge(
SI->getValueOperand()->getType()) ||
SI->isVolatile())
+ return std::nullopt;
+ auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
+ if (NumElts * EltSize % AllocatedEltTySize != 0)
return std::nullopt;
StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
SI->getValueOperand());
diff --git a/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll b/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll
index c858d071451e8..ead6e027ed37c 100644
--- a/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll
@@ -219,4 +219,18 @@ entry:
}
+define <1 x i32> @test_store_value_size_not_multiple_of_allocated_element_type_size(<1 x i16> %a, <1 x i16> %b) {
+entry:
+ %alloca = alloca [2 x i16]
+
+ %ptr0 = getelementptr inbounds [2 x i16], ptr %alloca, i32 0, i32 0
+ store <1 x i16> %a, ptr %ptr0
+
+ %ptr1 = getelementptr inbounds [2 x i16], ptr %alloca, i32 0, i32 1
+ store <1 x i16> %b, ptr %ptr1
+
+ %result = load <1 x i32>, ptr %alloca
+ ret <1 x i32> %result
+}
+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
More information about the llvm-commits
mailing list