<div dir="ltr"><div>It looks like this change crashes on some targets. Here is the (slightly simplified) traceback:</div><div><br></div> #0  SignalHandler(int)<br> #1  __restore_rt<br> #2  (anonymous namespace)::do_free_pages(void*, unsigned long)<div> #3  llvm::detail::DenseMapPair<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*>* llvm::DenseMapBase<llvm::DenseMap<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*, llvm::DenseMapInfo<std::__u::pair<llvm::Type*, llvm::ElementCount> >, llvm::detail::DenseMapPair<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*> >, std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*, llvm::DenseMapInfo<std::__u::pair<llvm::Type*, llvm::ElementCount> >, llvm::detail::DenseMapPair<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*> >::InsertIntoBucketImpl<std::__u::pair<llvm::Type*, llvm::ElementCount> >(std::__u::pair<llvm::Type*, llvm::ElementCount> const&, std::__u::pair<llvm::Type*, llvm::ElementCount> const&, llvm::detail::DenseMapPair<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*>*)<div> #4  llvm::VectorType::get(llvm::Type*, llvm::ElementCount)<div> #5  llvm::ShuffleVectorInst::ShuffleVectorInst(llvm::Value*, llvm::Value*, llvm::Value*, llvm::Twine const&, llvm::Instruction*)<br> #6  llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>::CreateShuffleVector(llvm::Value*, llvm::Value*, llvm::Value*, llvm::Twine const&) <br> #7  llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::slpvectorizer::BoUpSLP::TreeEntry*) <br> #8  llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::MapVector<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>, llvm::DenseMap<llvm::Value*, unsigned int, llvm::DenseMapInfo<llvm::Value*>, llvm::detail::DenseMapPair<llvm::Value*, unsigned int> >, std::__u::vector<std::__u::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u> >, std::__u::allocator<std::__u::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u> > > > >&) </div><div> #9  llvm::slpvectorizer::BoUpSLP::vectorizeTree()<br>#10  llvm::SLPVectorizerPass::vectorizeStoreChain(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, unsigned int) <br></div></div></div><div><br></div><div><br></div><div>I will try to reduce a small example of the input code.</div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed, Oct 30, 2019 at 10:37 AM Alexey Bataev via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><br>
Author: Alexey Bataev<br>
Date: 2019-10-30T13:33:52-04:00<br>
New Revision: 21d498c9c0f32dcab5bc89ac593aa813b533b43a<br>
<br>
URL: <a href="https://github.com/llvm/llvm-project/commit/21d498c9c0f32dcab5bc89ac593aa813b533b43a" rel="noreferrer" target="_blank">https://github.com/llvm/llvm-project/commit/21d498c9c0f32dcab5bc89ac593aa813b533b43a</a><br>
DIFF: <a href="https://github.com/llvm/llvm-project/commit/21d498c9c0f32dcab5bc89ac593aa813b533b43a.diff" rel="noreferrer" target="_blank">https://github.com/llvm/llvm-project/commit/21d498c9c0f32dcab5bc89ac593aa813b533b43a.diff</a><br>
<br>
LOG: [SLP] Vectorize jumbled stores.<br>
<br>
Summary:<br>
Patch adds support for vectorization of the jumbled stores. The value<br>
operands are vectorized and then shuffled in the right order before<br>
store.<br>
<br>
Reviewers: RKSimon, spatel, hfinkel, mkuper<br>
<br>
Subscribers: llvm-commits<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D43339" rel="noreferrer" target="_blank">https://reviews.llvm.org/D43339</a><br>
<br>
Added: <br>
<br>
<br>
Modified: <br>
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
    llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll<br>
    llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll<br>
<br>
Removed: <br>
<br>
<br>
<br>
################################################################################<br>
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
index bdcd5bfb9815..ad860ccb4ef1 100644<br>
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
@@ -2666,24 +2666,74 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,<br>
     }<br>
     case Instruction::Store: {<br>
       // Check if the stores are consecutive or if we need to swizzle them.<br>
-      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)<br>
-        if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {<br>
+      llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();<br>
+      // Make sure all stores in the bundle are simple - we can't vectorize<br>
+      // atomic or volatile stores.<br>
+      SmallVector<Value *, 4> PointerOps(VL.size());<br>
+      ValueList Operands(VL.size());<br>
+      auto POIter = PointerOps.begin();<br>
+      auto OIter = Operands.begin();<br>
+      for (Value *V : VL) {<br>
+        auto *SI = cast<StoreInst>(V);<br>
+        if (!SI->isSimple()) {<br>
           BS.cancelScheduling(VL, VL0);<br>
           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,<br>
                        ReuseShuffleIndicies);<br>
-          LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");<br>
+          LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n");<br>
           return;<br>
         }<br>
+        *POIter = SI->getPointerOperand();<br>
+        *OIter = SI->getValueOperand();<br>
+        ++POIter;<br>
+        ++OIter;<br>
+      }<br>
<br>
-      TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,<br>
-                                   ReuseShuffleIndicies);<br>
-      LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");<br>
+      OrdersType CurrentOrder;<br>
+      // Check the order of pointer operands.<br>
+      if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {<br>
+        Value *Ptr0;<br>
+        Value *PtrN;<br>
+        if (CurrentOrder.empty()) {<br>
+          Ptr0 = PointerOps.front();<br>
+          PtrN = PointerOps.back();<br>
+        } else {<br>
+          Ptr0 = PointerOps[CurrentOrder.front()];<br>
+          PtrN = PointerOps[CurrentOrder.back()];<br>
+        }<br>
+        const SCEV *Scev0 = SE->getSCEV(Ptr0);<br>
+        const SCEV *ScevN = SE->getSCEV(PtrN);<br>
+        const auto *Diff =<br>
+            dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));<br>
+        uint64_t Size = DL->getTypeAllocSize(ScalarTy);<br>
+        // Check that the sorted pointer operands are consecutive.<br>
+        if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {<br>
+          if (CurrentOrder.empty()) {<br>
+            // Original stores are consecutive and does not require reordering.<br>
+            ++NumOpsWantToKeepOriginalOrder;<br>
+            TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,<br>
+                                         UserTreeIdx, ReuseShuffleIndicies);<br>
+            TE->setOperandsInOrder();<br>
+            buildTree_rec(Operands, Depth + 1, {TE, 0});<br>
+            LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");<br>
+          } else {<br>
+            // Need to reorder.<br>
+            auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;<br>
+            ++(I->getSecond());<br>
+            TreeEntry *TE =<br>
+                newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,<br>
+                             ReuseShuffleIndicies, I->getFirst());<br>
+            TE->setOperandsInOrder();<br>
+            buildTree_rec(Operands, Depth + 1, {TE, 0});<br>
+            LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");<br>
+          }<br>
+          return;<br>
+        }<br>
+      }<br>
<br>
-      ValueList Operands;<br>
-      for (Value *V : VL)<br>
-        Operands.push_back(cast<Instruction>(V)->getOperand(0));<br>
-      TE->setOperandsInOrder();<br>
-      buildTree_rec(Operands, Depth + 1, {TE, 0});<br>
+      BS.cancelScheduling(VL, VL0);<br>
+      newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,<br>
+                   ReuseShuffleIndicies);<br>
+      LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");<br>
       return;<br>
     }<br>
     case Instruction::Call: {<br>
@@ -3181,15 +3231,23 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {<br>
     }<br>
     case Instruction::Store: {<br>
       // We know that we can merge the stores. Calculate the cost.<br>
-      MaybeAlign alignment(cast<StoreInst>(VL0)->getAlignment());<br>
+      bool IsReorder = !E->ReorderIndices.empty();<br>
+      auto *SI =<br>
+          cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);<br>
+      MaybeAlign Alignment(SI->getAlignment());<br>
       int ScalarEltCost =<br>
-          TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);<br>
+          TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, VL0);<br>
       if (NeedToShuffleReuses) {<br>
         ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;<br>
       }<br>
       int ScalarStCost = VecTy->getNumElements() * ScalarEltCost;<br>
-      int VecStCost =<br>
-          TTI->getMemoryOpCost(Instruction::Store, VecTy, alignment, 0, VL0);<br>
+      int VecStCost = TTI->getMemoryOpCost(Instruction::Store,<br>
+                                           VecTy, Alignment, 0, VL0);<br>
+      if (IsReorder) {<br>
+        // TODO: Merge this shuffle with the ReuseShuffleCost.<br>
+        VecStCost += TTI->getShuffleCost(<br>
+            TargetTransformInfo::SK_PermuteSingleSrc, VecTy);<br>
+      }<br>
       return ReuseShuffleCost + VecStCost - ScalarStCost;<br>
     }<br>
     case Instruction::Call: {<br>
@@ -4051,13 +4109,22 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {<br>
       return V;<br>
     }<br>
     case Instruction::Store: {<br>
-      StoreInst *SI = cast<StoreInst>(VL0);<br>
+      bool IsReorder = !E->ReorderIndices.empty();<br>
+      auto *SI = cast<StoreInst>(<br>
+          IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0);<br>
       unsigned Alignment = SI->getAlignment();<br>
       unsigned AS = SI->getPointerAddressSpace();<br>
<br>
       setInsertPointAfterBundle(E);<br>
<br>
       Value *VecValue = vectorizeTree(E->getOperand(0));<br>
+      if (IsReorder) {<br>
+        OrdersType Mask;<br>
+        inversePermutation(E->ReorderIndices, Mask);<br>
+        VecValue = Builder.CreateShuffleVector(<br>
+            VecValue, UndefValue::get(VecValue->getType()), E->ReorderIndices,<br>
+            "reorder_shuffle");<br>
+      }<br>
       Value *ScalarPtr = SI->getPointerOperand();<br>
       Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS));<br>
       StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);<br>
@@ -5347,6 +5414,14 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,<br>
                     << "\n");<br>
<br>
   R.buildTree(Chain);<br>
+  Optional<ArrayRef<unsigned>> Order = R.bestOrder();<br>
+  if (Order) {<br>
+    // TODO: reorder tree nodes without tree rebuilding.<br>
+    SmallVector<Value *, 4> ReorderedOps(Chain.rbegin(), Chain.rend());<br>
+    llvm::transform(*Order, ReorderedOps.begin(),<br>
+                    [Chain](const unsigned Idx) { return Chain[Idx]; });<br>
+    R.buildTree(ReorderedOps);<br>
+  }<br>
   if (R.isTreeTinyAndNotFullyVectorizable())<br>
     return false;<br>
<br>
<br>
diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll<br>
index 2255a12342f4..e99864205bfc 100644<br>
--- a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll<br>
+++ b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll<br>
@@ -11,21 +11,20 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn<br>
 ; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3<br>
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*<br>
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4<br>
-; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2><br>
 ; CHECK-NEXT:    [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0<br>
 ; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1<br>
 ; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2<br>
 ; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3<br>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>*<br>
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4<br>
-; CHECK-NEXT:    [[REORDER_SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2><br>
-; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[REORDER_SHUFFLE]], [[REORDER_SHUFFLE1]]<br>
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP2]], [[TMP4]]<br>
 ; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0<br>
 ; CHECK-NEXT:    [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1<br>
 ; CHECK-NEXT:    [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2<br>
 ; CHECK-NEXT:    [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3<br>
+; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2><br>
 ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*<br>
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4<br>
+; CHECK-NEXT:    store <4 x i32> [[REORDER_SHUFFLE]], <4 x i32>* [[TMP6]], align 4<br>
 ; CHECK-NEXT:    ret i32 undef<br>
 ;<br>
   %in.addr = getelementptr inbounds i32, i32* %in, i64 0<br>
<br>
diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll<br>
index 425f3e634167..fd697f1a7ca5 100644<br>
--- a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll<br>
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll<br>
@@ -92,15 +92,14 @@ define void @store_reverse(i64* %p3) {<br>
 ; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3<br>
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[P3]] to <4 x i64>*<br>
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8<br>
-; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0><br>
 ; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 11<br>
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[ARRAYIDX1]] to <4 x i64>*<br>
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8<br>
-; CHECK-NEXT:    [[REORDER_SHUFFLE1:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0><br>
-; CHECK-NEXT:    [[TMP4:%.*]] = shl <4 x i64> [[REORDER_SHUFFLE]], [[REORDER_SHUFFLE1]]<br>
+; CHECK-NEXT:    [[TMP4:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]]<br>
 ; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 4<br>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[ARRAYIDX14]] to <4 x i64>*<br>
-; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8<br>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0><br>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[ARRAYIDX14]] to <4 x i64>*<br>
+; CHECK-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* [[TMP6]], align 8<br>
 ; CHECK-NEXT:    ret void<br>
 ;<br>
 entry:<br>
<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote></div>