[PATCH] D102146: [VectorComine] Restrict single-element-store index to inbounds constant
Qiu Chaofan via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun May 9 19:50:43 PDT 2021
qiucf created this revision.
qiucf added reviewers: nlopes, fhahn, spatel.
Herald added subscribers: arphaman, hiraditya.
qiucf requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
As @nlopes illustrated in rG2db4979 <https://reviews.llvm.org/rG2db4979c0fe05eac82ca770516057b7b9c4433e3>, we need to restrict the index to known inbounds constant otherwise the transformation is incorrect.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D102146
Files:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/load-insert-store.ll
Index: llvm/test/Transforms/VectorCombine/load-insert-store.ll
===================================================================
--- llvm/test/Transforms/VectorCombine/load-insert-store.ll
+++ llvm/test/Transforms/VectorCombine/load-insert-store.ll
@@ -30,6 +30,22 @@
ret void
}
+; To verify case when index is out of bounds
+define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store_outofbounds(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9
+; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <8 x i16>, <8 x i16>* %q
+ %vecins = insertelement <8 x i16> %0, i16 %s, i32 9
+ store <8 x i16> %vecins, <8 x i16>* %q
+ ret void
+}
+
define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) {
; CHECK-LABEL: @insert_store_v9i4(
; CHECK-NEXT: entry:
@@ -82,8 +98,9 @@
define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
; CHECK-LABEL: @insert_store_nonconst(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX:%.*]]
-; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]]
+; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
; CHECK-NEXT: ret void
;
entry:
@@ -93,17 +110,17 @@
ret void
}
-define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
+define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) {
; CHECK-LABEL: @insert_store_ptr_strip(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>*
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 [[IDX:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 3
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
; CHECK-NEXT: ret void
;
entry:
%0 = load <16 x i8>, <16 x i8>* %q
- %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
+ %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
%addr0 = bitcast <16 x i8>* %q to <2 x i64>*
%addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0
%addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>*
Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -781,24 +781,28 @@
// store i32 %b, i32* %1
bool VectorCombine::foldSingleElementStore(Instruction &I) {
StoreInst *SI = dyn_cast<StoreInst>(&I);
- if (!SI || !SI->isSimple() || !SI->getValueOperand()->getType()->isVectorTy())
+ if (!SI || !SI->isSimple() ||
+ !isa<FixedVectorType>(SI->getValueOperand()->getType()))
return false;
// TODO: Combine more complicated patterns (multiple insert) by referencing
// TargetTransformInfo.
Instruction *Source;
- Value *NewElement, *Idx;
+ Value *NewElement;
+ ConstantInt *Idx;
if (!match(SI->getValueOperand(),
m_InsertElt(m_Instruction(Source), m_Value(NewElement),
- m_Value(Idx))))
+ m_ConstantInt(Idx))))
return false;
if (auto *Load = dyn_cast<LoadInst>(Source)) {
+ auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
const DataLayout &DL = I.getModule()->getDataLayout();
Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
// Don't optimize for atomic/volatile load or stores.
if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
!DL.typeSizeEqualsStoreSize(Load->getType()) ||
+ Idx->uge(VecTy->getNumElements()) ||
SrcAddr != SI->getPointerOperand()->stripPointerCasts() ||
isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
MemoryLocation::get(SI), AA))
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D102146.343958.patch
Type: text/x-patch
Size: 4276 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210510/6632c1fc/attachment.bin>
More information about the llvm-commits
mailing list