<div dir="ltr"><div>It looks like this change crashes on some targets. Here is the (slightly simplified) traceback:</div><div><br></div> #0 SignalHandler(int)<br> #1 __restore_rt<br> #2 (anonymous namespace)::do_free_pages(void*, unsigned long)<div> #3 llvm::detail::DenseMapPair<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*>* llvm::DenseMapBase<llvm::DenseMap<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*, llvm::DenseMapInfo<std::__u::pair<llvm::Type*, llvm::ElementCount> >, llvm::detail::DenseMapPair<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*> >, std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*, llvm::DenseMapInfo<std::__u::pair<llvm::Type*, llvm::ElementCount> >, llvm::detail::DenseMapPair<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*> >::InsertIntoBucketImpl<std::__u::pair<llvm::Type*, llvm::ElementCount> >(std::__u::pair<llvm::Type*, llvm::ElementCount> const&, std::__u::pair<llvm::Type*, llvm::ElementCount> const&, llvm::detail::DenseMapPair<std::__u::pair<llvm::Type*, llvm::ElementCount>, llvm::VectorType*>*)<div> #4 llvm::VectorType::get(llvm::Type*, llvm::ElementCount)<div> #5 llvm::ShuffleVectorInst::ShuffleVectorInst(llvm::Value*, llvm::Value*, llvm::Value*, llvm::Twine const&, llvm::Instruction*)<br> #6 llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>::CreateShuffleVector(llvm::Value*, llvm::Value*, llvm::Value*, llvm::Twine const&) <br> #7 llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::slpvectorizer::BoUpSLP::TreeEntry*) <br> #8 llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::MapVector<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>, llvm::DenseMap<llvm::Value*, unsigned int, llvm::DenseMapInfo<llvm::Value*>, llvm::detail::DenseMapPair<llvm::Value*, unsigned int> >, std::__u::vector<std::__u::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u> >, std::__u::allocator<std::__u::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u> > > > >&) </div><div> #9 llvm::slpvectorizer::BoUpSLP::vectorizeTree()<br>#10 llvm::SLPVectorizerPass::vectorizeStoreChain(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, unsigned int) <br></div></div></div><div><br></div><div><br></div><div>I will try to reduce a small example of the input code.</div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed, Oct 30, 2019 at 10:37 AM Alexey Bataev via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><br>
Author: Alexey Bataev<br>
Date: 2019-10-30T13:33:52-04:00<br>
New Revision: 21d498c9c0f32dcab5bc89ac593aa813b533b43a<br>
<br>
URL: <a href="https://github.com/llvm/llvm-project/commit/21d498c9c0f32dcab5bc89ac593aa813b533b43a" rel="noreferrer" target="_blank">https://github.com/llvm/llvm-project/commit/21d498c9c0f32dcab5bc89ac593aa813b533b43a</a><br>
DIFF: <a href="https://github.com/llvm/llvm-project/commit/21d498c9c0f32dcab5bc89ac593aa813b533b43a.diff" rel="noreferrer" target="_blank">https://github.com/llvm/llvm-project/commit/21d498c9c0f32dcab5bc89ac593aa813b533b43a.diff</a><br>
<br>
LOG: [SLP] Vectorize jumbled stores.<br>
<br>
Summary:<br>
Patch adds support for vectorization of the jumbled stores. The value<br>
operands are vectorized and then shuffled in the right order before<br>
store.<br>
<br>
Reviewers: RKSimon, spatel, hfinkel, mkuper<br>
<br>
Subscribers: llvm-commits<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D43339" rel="noreferrer" target="_blank">https://reviews.llvm.org/D43339</a><br>
<br>
Added: <br>
<br>
<br>
Modified: <br>
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll<br>
llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll<br>
<br>
Removed: <br>
<br>
<br>
<br>
################################################################################<br>
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
index bdcd5bfb9815..ad860ccb4ef1 100644<br>
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
@@ -2666,24 +2666,74 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,<br>
}<br>
case Instruction::Store: {<br>
// Check if the stores are consecutive or if we need to swizzle them.<br>
- for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)<br>
- if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {<br>
+ llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();<br>
+ // Make sure all stores in the bundle are simple - we can't vectorize<br>
+ // atomic or volatile stores.<br>
+ SmallVector<Value *, 4> PointerOps(VL.size());<br>
+ ValueList Operands(VL.size());<br>
+ auto POIter = PointerOps.begin();<br>
+ auto OIter = Operands.begin();<br>
+ for (Value *V : VL) {<br>
+ auto *SI = cast<StoreInst>(V);<br>
+ if (!SI->isSimple()) {<br>
BS.cancelScheduling(VL, VL0);<br>
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,<br>
ReuseShuffleIndicies);<br>
- LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");<br>
+ LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n");<br>
return;<br>
}<br>
+ *POIter = SI->getPointerOperand();<br>
+ *OIter = SI->getValueOperand();<br>
+ ++POIter;<br>
+ ++OIter;<br>
+ }<br>
<br>
- TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,<br>
- ReuseShuffleIndicies);<br>
- LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");<br>
+ OrdersType CurrentOrder;<br>
+ // Check the order of pointer operands.<br>
+ if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {<br>
+ Value *Ptr0;<br>
+ Value *PtrN;<br>
+ if (CurrentOrder.empty()) {<br>
+ Ptr0 = PointerOps.front();<br>
+ PtrN = PointerOps.back();<br>
+ } else {<br>
+ Ptr0 = PointerOps[CurrentOrder.front()];<br>
+ PtrN = PointerOps[CurrentOrder.back()];<br>
+ }<br>
+ const SCEV *Scev0 = SE->getSCEV(Ptr0);<br>
+ const SCEV *ScevN = SE->getSCEV(PtrN);<br>
+ const auto *Diff =<br>
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));<br>
+ uint64_t Size = DL->getTypeAllocSize(ScalarTy);<br>
+ // Check that the sorted pointer operands are consecutive.<br>
+ if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {<br>
+ if (CurrentOrder.empty()) {<br>
+ // Original stores are consecutive and does not require reordering.<br>
+ ++NumOpsWantToKeepOriginalOrder;<br>
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,<br>
+ UserTreeIdx, ReuseShuffleIndicies);<br>
+ TE->setOperandsInOrder();<br>
+ buildTree_rec(Operands, Depth + 1, {TE, 0});<br>
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");<br>
+ } else {<br>
+ // Need to reorder.<br>
+ auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;<br>
+ ++(I->getSecond());<br>
+ TreeEntry *TE =<br>
+ newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,<br>
+ ReuseShuffleIndicies, I->getFirst());<br>
+ TE->setOperandsInOrder();<br>
+ buildTree_rec(Operands, Depth + 1, {TE, 0});<br>
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");<br>
+ }<br>
+ return;<br>
+ }<br>
+ }<br>
<br>
- ValueList Operands;<br>
- for (Value *V : VL)<br>
- Operands.push_back(cast<Instruction>(V)->getOperand(0));<br>
- TE->setOperandsInOrder();<br>
- buildTree_rec(Operands, Depth + 1, {TE, 0});<br>
+ BS.cancelScheduling(VL, VL0);<br>
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,<br>
+ ReuseShuffleIndicies);<br>
+ LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");<br>
return;<br>
}<br>
case Instruction::Call: {<br>
@@ -3181,15 +3231,23 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {<br>
}<br>
case Instruction::Store: {<br>
// We know that we can merge the stores. Calculate the cost.<br>
- MaybeAlign alignment(cast<StoreInst>(VL0)->getAlignment());<br>
+ bool IsReorder = !E->ReorderIndices.empty();<br>
+ auto *SI =<br>
+ cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);<br>
+ MaybeAlign Alignment(SI->getAlignment());<br>
int ScalarEltCost =<br>
- TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);<br>
+ TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, VL0);<br>
if (NeedToShuffleReuses) {<br>
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;<br>
}<br>
int ScalarStCost = VecTy->getNumElements() * ScalarEltCost;<br>
- int VecStCost =<br>
- TTI->getMemoryOpCost(Instruction::Store, VecTy, alignment, 0, VL0);<br>
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store,<br>
+ VecTy, Alignment, 0, VL0);<br>
+ if (IsReorder) {<br>
+ // TODO: Merge this shuffle with the ReuseShuffleCost.<br>
+ VecStCost += TTI->getShuffleCost(<br>
+ TargetTransformInfo::SK_PermuteSingleSrc, VecTy);<br>
+ }<br>
return ReuseShuffleCost + VecStCost - ScalarStCost;<br>
}<br>
case Instruction::Call: {<br>
@@ -4051,13 +4109,22 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {<br>
return V;<br>
}<br>
case Instruction::Store: {<br>
- StoreInst *SI = cast<StoreInst>(VL0);<br>
+ bool IsReorder = !E->ReorderIndices.empty();<br>
+ auto *SI = cast<StoreInst>(<br>
+ IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0);<br>
unsigned Alignment = SI->getAlignment();<br>
unsigned AS = SI->getPointerAddressSpace();<br>
<br>
setInsertPointAfterBundle(E);<br>
<br>
Value *VecValue = vectorizeTree(E->getOperand(0));<br>
+ if (IsReorder) {<br>
+ OrdersType Mask;<br>
+ inversePermutation(E->ReorderIndices, Mask);<br>
+ VecValue = Builder.CreateShuffleVector(<br>
+ VecValue, UndefValue::get(VecValue->getType()), E->ReorderIndices,<br>
+ "reorder_shuffle");<br>
+ }<br>
Value *ScalarPtr = SI->getPointerOperand();<br>
Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS));<br>
StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);<br>
@@ -5347,6 +5414,14 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,<br>
<< "\n");<br>
<br>
R.buildTree(Chain);<br>
+ Optional<ArrayRef<unsigned>> Order = R.bestOrder();<br>
+ if (Order) {<br>
+ // TODO: reorder tree nodes without tree rebuilding.<br>
+ SmallVector<Value *, 4> ReorderedOps(Chain.rbegin(), Chain.rend());<br>
+ llvm::transform(*Order, ReorderedOps.begin(),<br>
+ [Chain](const unsigned Idx) { return Chain[Idx]; });<br>
+ R.buildTree(ReorderedOps);<br>
+ }<br>
if (R.isTreeTinyAndNotFullyVectorizable())<br>
return false;<br>
<br>
<br>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll<br>
index 2255a12342f4..e99864205bfc 100644<br>
--- a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll<br>
+++ b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll<br>
@@ -11,21 +11,20 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn<br>
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3<br>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*<br>
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4<br>
-; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2><br>
; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0<br>
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1<br>
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2<br>
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3<br>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>*<br>
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4<br>
-; CHECK-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2><br>
-; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[REORDER_SHUFFLE]], [[REORDER_SHUFFLE1]]<br>
+; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP2]], [[TMP4]]<br>
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0<br>
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1<br>
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2<br>
; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3<br>
+; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2><br>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*<br>
-; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4<br>
+; CHECK-NEXT: store <4 x i32> [[REORDER_SHUFFLE]], <4 x i32>* [[TMP6]], align 4<br>
; CHECK-NEXT: ret i32 undef<br>
;<br>
%in.addr = getelementptr inbounds i32, i32* %in, i64 0<br>
<br>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll<br>
index 425f3e634167..fd697f1a7ca5 100644<br>
--- a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll<br>
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll<br>
@@ -92,15 +92,14 @@ define void @store_reverse(i64* %p3) {<br>
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3<br>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[P3]] to <4 x i64>*<br>
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8<br>
-; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0><br>
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 11<br>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[ARRAYIDX1]] to <4 x i64>*<br>
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8<br>
-; CHECK-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0><br>
-; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[REORDER_SHUFFLE]], [[REORDER_SHUFFLE1]]<br>
+; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]]<br>
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 4<br>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[ARRAYIDX14]] to <4 x i64>*<br>
-; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8<br>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0><br>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[ARRAYIDX14]] to <4 x i64>*<br>
+; CHECK-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* [[TMP6]], align 8<br>
; CHECK-NEXT: ret void<br>
;<br>
entry:<br>
<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote></div>