[PATCH] D102146: [VectorComine] Restrict single-element-store index to inbounds constant

Sun May 9 19:50:43 PDT 2021

qiucf created this revision.
qiucf added reviewers: nlopes, fhahn, spatel.
Herald added subscribers: arphaman, hiraditya.
qiucf requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

As @nlopes illustrated in rG2db4979 <https://reviews.llvm.org/rG2db4979c0fe05eac82ca770516057b7b9c4433e3>, we need to restrict the index to known inbounds constant otherwise the transformation is incorrect.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D102146

Files:
  llvm/lib/Transforms/Vectorize/VectorCombine.cpp
  llvm/test/Transforms/VectorCombine/load-insert-store.ll


Index: llvm/test/Transforms/VectorCombine/load-insert-store.ll
===================================================================

--- llvm/test/Transforms/VectorCombine/load-insert-store.ll
+++ llvm/test/Transforms/VectorCombine/load-insert-store.ll
@@ -30,6 +30,22 @@
   ret void
 }
 
+; To verify case when index is out of bounds
+define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store_outofbounds(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9
+; CHECK-NEXT:    store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load <8 x i16>, <8 x i16>* %q
+  %vecins = insertelement <8 x i16> %0, i16 %s, i32 9
+  store <8 x i16> %vecins, <8 x i16>* %q
+  ret void
+}
+
 define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) {
 ; CHECK-LABEL: @insert_store_v9i4(
 ; CHECK-NEXT:  entry:
@@ -82,8 +98,9 @@
 define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
 ; CHECK-LABEL: @insert_store_nonconst(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX:%.*]]
-; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -93,17 +110,17 @@
   ret void
 }
 
-define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
+define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) {
 ; CHECK-LABEL: @insert_store_ptr_strip(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 3
 ; CHECK-NEXT:    store i8 [[S:%.*]], i8* [[TMP0]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %0 = load <16 x i8>, <16 x i8>* %q
-  %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
+  %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
   %addr0 = bitcast <16 x i8>* %q to <2 x i64>*
   %addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0
   %addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>*
Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -781,24 +781,28 @@
 //   store i32 %b, i32* %1
 bool VectorCombine::foldSingleElementStore(Instruction &I) {
   StoreInst *SI = dyn_cast<StoreInst>(&I);
-  if (!SI || !SI->isSimple() || !SI->getValueOperand()->getType()->isVectorTy())
+  if (!SI || !SI->isSimple() ||
+      !isa<FixedVectorType>(SI->getValueOperand()->getType()))
     return false;
 
   // TODO: Combine more complicated patterns (multiple insert) by referencing
   // TargetTransformInfo.
   Instruction *Source;
-  Value *NewElement, *Idx;
+  Value *NewElement;
+  ConstantInt *Idx;
   if (!match(SI->getValueOperand(),
              m_InsertElt(m_Instruction(Source), m_Value(NewElement),
-                         m_Value(Idx))))
+                         m_ConstantInt(Idx))))
     return false;
 
   if (auto *Load = dyn_cast<LoadInst>(Source)) {
+    auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
     const DataLayout &DL = I.getModule()->getDataLayout();
     Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
     // Don't optimize for atomic/volatile load or stores.
     if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
         !DL.typeSizeEqualsStoreSize(Load->getType()) ||
+        Idx->uge(VecTy->getNumElements()) ||
         SrcAddr != SI->getPointerOperand()->stripPointerCasts() ||
         isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
                              MemoryLocation::get(SI), AA))


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D102146.343958.patch
Type: text/x-patch
Size: 4276 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210510/6632c1fc/attachment.bin>