<html><head><meta http-equiv="Content-Type" content="text/html charset=us-ascii"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;"><div><div><div></div><div>On Jan 27, 2015, at 12:50 PM, Sanjay Patel <<a href="mailto:spatel@rotateright.com">spatel@rotateright.com</a>> wrote:</div><br class="Apple-interchange-newline"><blockquote type="cite">Author: spatel<br>Date: Tue Jan 27 14:50:27 2015<br>New Revision: 227242<br><br>URL: <a href="http://llvm.org/viewvc/llvm-project?rev=227242&view=rev">http://llvm.org/viewvc/llvm-project?rev=227242&view=rev</a><br>Log:<br>Merge vector stores into wider vector stores (PR21711)<br><br>This patch resolves part of PR21711 ( <a href="http://llvm.org/bugs/show_bug.cgi?id=21711">http://llvm.org/bugs/show_bug.cgi?id=21711</a> ).<br><br>The 'f3' test case in that report presents a situation where we have two 128-bit<br>stores extracted from a 256-bit source vector. <br><br>Instead of producing this:<br><br>vmovaps %xmm0, (%rdi)<br>vextractf128 $1, %ymm0, 16(%rdi)<br><br>This patch merges the 128-bit stores into a single 256-bit store:<br><br>vmovups %ymm0, (%rdi)<br><br>Differential Revision: <a href="http://reviews.llvm.org/D7208">http://reviews.llvm.org/D7208</a><br><br><br>Modified:<br> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp<br> llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll<br><br>Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp<br>URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=227242&r1=227241&r2=227242&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=227242&r1=227241&r2=227242&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)<br>+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jan 27 14:50:27 2015<br>@@ -382,7 +382,7 @@ namespace {<br> /// vector elements, try to merge them into one larger store.<br> /// \return True if a merged store was created.<br> bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,<br>- EVT MemVT, unsigned NumElem,<br>+ EVT MemVT, unsigned StoresToMerge,<br> bool IsConstantSrc, bool UseVector);<br><br> /// Merge consecutive store operations into a wide store.<br>@@ -9730,16 +9730,16 @@ struct BaseIndexOffset {<br><br> bool DAGCombiner::MergeStoresOfConstantsOrVecElts(<br> SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,<br>- unsigned NumElem, bool IsConstantSrc, bool UseVector) {<br>+ unsigned StoresToMerge, bool IsConstantSrc, bool UseVector) {<br> // Make sure we have something to merge.<br>- if (NumElem < 2)<br>+ if (StoresToMerge < 2)<br> return false;<br><br> int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;<br> LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;<br> unsigned EarliestNodeUsed = 0;<br><br>- for (unsigned i=0; i < NumElem; ++i) {<br>+ for (unsigned i=0; i < StoresToMerge; ++i) {<br> // Find a chain for the new wide-store operand. Notice that some<br> // of the store nodes that we found may not be selected for inclusion<br> // in the wide store. The chain we use needs to be the chain of the<br>@@ -9754,9 +9754,16 @@ bool DAGCombiner::MergeStoresOfConstants<br><br> SDValue StoredVal;<br> if (UseVector) {<br>- // Find a legal type for the vector store.<br>- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);<br>+ bool IsVec = MemVT.isVector();<br>+ unsigned Elts = StoresToMerge;<br>+ if (IsVec) {<br>+ // When merging vector stores, get the total number of elements.<br>+ Elts *= MemVT.getVectorNumElements();<br>+ }<br>+ // Get the type for the merged vector store.<br>+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);<br> assert(TLI.isTypeLegal(Ty) && "Illegal vector store");<br>+<br> if (IsConstantSrc) {<br> // A vector store with a constant source implies that the constant is<br> // zero; we only handle merging stores of constant zeros because the zero<br>@@ -9766,31 +9773,31 @@ bool DAGCombiner::MergeStoresOfConstants<br> StoredVal = DAG.getConstant(0, Ty);<br> } else {<br> SmallVector<SDValue, 8> Ops;<br>- for (unsigned i = 0; i < NumElem ; ++i) {<br>+ for (unsigned i = 0; i < StoresToMerge ; ++i) {<br> StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);<br> SDValue Val = St->getValue();<br>- // All of the operands of a BUILD_VECTOR must have the same type.<br>+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.<br> if (Val.getValueType() != MemVT)<br> return false;<br> Ops.push_back(Val);<br> }<br>- <br> // Build the extracted vector elements back into a vector.<br>- StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);<br>+ StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,<br>+ DL, Ty, Ops);<br> }<br> } else {<br> // We should always use a vector store when merging extracted vector<br> // elements, so this path implies a store of constants.<br> assert(IsConstantSrc && "Merged vector elements should use vector store");<br><br>- unsigned StoreBW = NumElem * ElementSizeBytes * 8;<br>+ unsigned StoreBW = StoresToMerge * ElementSizeBytes * 8;<br> APInt StoreInt(StoreBW, 0);<br><br> // Construct a single integer constant which is made of the smaller<br> // constant inputs.<br> bool IsLE = TLI.isLittleEndian();<br>- for (unsigned i = 0; i < NumElem ; ++i) {<br>- unsigned Idx = IsLE ? (NumElem - 1 - i) : i;<br>+ for (unsigned i = 0; i < StoresToMerge ; ++i) {<br>+ unsigned Idx = IsLE ? (StoresToMerge - 1 - i) : i;<br> StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);<br> SDValue Val = St->getValue();<br> StoreInt <<= ElementSizeBytes*8;<br>@@ -9817,7 +9824,7 @@ bool DAGCombiner::MergeStoresOfConstants<br> // Replace the first store with the new store<br> CombineTo(EarliestOp, NewStore);<br> // Erase all other stores.<br>- for (unsigned i = 0; i < NumElem ; ++i) {<br>+ for (unsigned i = 0; i < StoresToMerge ; ++i) {<br> if (StoreNodes[i].MemNode == EarliestOp)<br> continue;<br> StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);<br>@@ -9840,25 +9847,35 @@ bool DAGCombiner::MergeStoresOfConstants<br> }<br><br> bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {<br>- EVT MemVT = St->getMemoryVT();<br>- int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;<br> bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().<br> hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);<br><br>- // Don't merge vectors into wider inputs.<br>- if (MemVT.isVector() || !MemVT.isSimple())<br>- return false;<br>-<br> // Perform an early exit check. Do not bother looking at stored values that<br> // are not constants, loads, or extracted vector elements.<br> SDValue StoredVal = St->getValue();<br> bool IsLoadSrc = isa<LoadSDNode>(StoredVal);<br> bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||<br> isa<ConstantFPSDNode>(StoredVal);<br>- bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);<br>+ bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||<br>+ StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);<br><br>- if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)<br>+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)<br>+ return false;<br>+<br>+ EVT MemVT = St->getMemoryVT();<br>+ int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;<br>+<br>+ // Don't merge vectors into wider vectors if the source data comes from loads.<br>+ // TODO: This restriction can be lifted by using logic similar to the<br>+ // ExtractVecSrc case.<br>+ // There's no such thing as a vector constant node; that merge case should be<br>+ // handled by looking through a BUILD_VECTOR source with all constant inputs.<br>+ if (MemVT.isVector() && IsLoadSrc)<br> return false;<br>+ <br>+ if (!MemVT.isSimple())<br>+ return false;<br>+ <br><br> // Only look at ends of store sequences.<br> SDValue Chain = SDValue(St, 0);<br>@@ -10054,26 +10071,33 @@ bool DAGCombiner::MergeConsecutiveStores<br><br> // When extracting multiple vector elements, try to store them<br> // in one vector store rather than a sequence of scalar stores.<br>- if (IsExtractVecEltSrc) {<br>- unsigned NumElem = 0;<br>+ if (IsExtractVecSrc) {<br>+ unsigned StoresToMerge = 0;<br>+ bool IsVec = MemVT.isVector();<br> for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {<br> StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);<br>- SDValue StoredVal = St->getValue();<br>+ unsigned StoreValOpcode = St->getValue().getOpcode();<br> // This restriction could be loosened.<br> // Bail out if any stored values are not elements extracted from a vector.<br> // It should be possible to handle mixed sources, but load sources need<br> // more careful handling (see the block of code below that handles<br> // consecutive loads).<br>- if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)<br>+ if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&<br>+ StoreValOpcode != ISD::EXTRACT_SUBVECTOR)<br> return false;<br><br> // Find a legal type for the vector store.<br>- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);<br>- if (TLI.isTypeLegal(Ty))<br>- NumElem = i + 1;<br>+ unsigned Elts = i + 1;<br>+ if (IsVec) {<br>+ // When merging vector stores, get the total number of elements.<br>+ Elts *= MemVT.getVectorNumElements();<br>+ }<br>+ if (TLI.isTypeLegal(EVT::getVectorVT(*DAG.getContext(),<br>+ MemVT.getScalarType(), Elts)))<br>+ StoresToMerge = i + 1;<br> }<br><br>- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,<br>+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, StoresToMerge,<br> false, true);<br> }<br><br><br>Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll<br>URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=227242&r1=227241&r2=227242&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=227242&r1=227241&r2=227242&view=diff</a><br>==============================================================================<br>--- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)<br>+++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Tue Jan 27 14:50:27 2015<br>@@ -468,6 +468,64 @@ define void @merge_vec_element_store(<8<br> ; CHECK-NEXT: retq<br> }<br><br>+; PR21711 - Merge vector stores into wider vector stores.<br>+define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) {<br>+ %idx0 = getelementptr inbounds <4 x float>* %ptr, i64 3<br>+ %idx1 = getelementptr inbounds <4 x float>* %ptr, i64 4<br>+ %idx2 = getelementptr inbounds <4 x float>* %ptr, i64 5<br>+ %idx3 = getelementptr inbounds <4 x float>* %ptr, i64 6<br>+ %shuffle0 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3><br>+ %shuffle1 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7><br>+ %shuffle2 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3><br>+ %shuffle3 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7><br>+ store <4 x float> %shuffle0, <4 x float>* %idx0, align 16<br>+ store <4 x float> %shuffle1, <4 x float>* %idx1, align 16<br>+ store <4 x float> %shuffle2, <4 x float>* %idx2, align 16<br>+ store <4 x float> %shuffle3, <4 x float>* %idx3, align 16<br>+ ret void<br>+<br>+; CHECK-LABEL: merge_vec_extract_stores<br>+; CHECK: vmovups %ymm0, 48(%rdi)<br>+; CHECK-NEXT: vmovups %ymm1, 80(%rdi)<br>+; CHECK-NEXT: vzeroupper<br>+; CHECK-NEXT: retq<br>+}<br>+<br>+; Merging vector stores when sourced from vector loads is not currently handled.<br>+define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {<br>+ %load_idx0 = getelementptr inbounds <4 x float>* %v, i64 0<br>+ %load_idx1 = getelementptr inbounds <4 x float>* %v, i64 1<br>+ %v0 = load <4 x float>* %load_idx0<br>+ %v1 = load <4 x float>* %load_idx1<br>+ %store_idx0 = getelementptr inbounds <4 x float>* %ptr, i64 0<br>+ %store_idx1 = getelementptr inbounds <4 x float>* %ptr, i64 1<br>+ store <4 x float> %v0, <4 x float>* %store_idx0, align 16<br>+ store <4 x float> %v1, <4 x float>* %store_idx1, align 16<br>+ ret void<br>+<br>+; CHECK-LABEL: merge_vec_stores_from_loads<br>+; CHECK: vmovaps<br>+; CHECK-NEXT: vmovaps<br>+; CHECK-NEXT: vmovaps<br>+; CHECK-NEXT: vmovaps<br>+; CHECK-NEXT: retq<br>+}<br>+<br>+; Merging vector stores when sourced from a constant vector is not currently handled. <br>+define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {<br>+ %idx0 = getelementptr inbounds <4 x i32>* %ptr, i64 3<br>+ %idx1 = getelementptr inbounds <4 x i32>* %ptr, i64 4<br>+ store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx0, align 16<br>+ store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx1, align 16<br>+ ret void<br>+<br>+; CHECK-LABEL: merge_vec_stores_of_constants<br>+; CHECK: vxorps<br>+; CHECK-NEXT: vmovaps<br>+; CHECK-NEXT: vmovaps<br>+; CHECK-NEXT: retq<br>+}<br>+<br> ; This is a minimized test based on real code that was failing.<br> ; We could merge stores (and loads) like this...<br><br><br><br>_______________________________________________<br>llvm-commits mailing list<br><a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits<br></blockquote></div><br></div></body></html>