[llvm] r227242 - Merge vector stores into wider vector stores (PR21711)
Quentin Colombet
qcolombet at apple.com
Tue Jan 27 16:02:11 PST 2015
Hi Sanjay,
I had to revert this in r227272.
This commit causes infinite loop in the LLVM test suite for aarch64 with mcpu=cyclone.
I have attached a bitcode file from the dhrystone test of the LLVM test suite for you to reproduce.
To reproduce:
llc fldry.llvm.bc -mcpu=cyclone
And watch llc consuming all your RAM :).
Cheers,
-Quentin
On Jan 27, 2015, at 12:50 PM, Sanjay Patel <spatel at rotateright.com> wrote:
> Author: spatel
> Date: Tue Jan 27 14:50:27 2015
> New Revision: 227242
>
> URL: http://llvm.org/viewvc/llvm-project?rev=227242&view=rev
> Log:
> Merge vector stores into wider vector stores (PR21711)
>
> This patch resolves part of PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ).
>
> The 'f3' test case in that report presents a situation where we have two 128-bit
> stores extracted from a 256-bit source vector.
>
> Instead of producing this:
>
> vmovaps %xmm0, (%rdi)
> vextractf128 $1, %ymm0, 16(%rdi)
>
> This patch merges the 128-bit stores into a single 256-bit store:
>
> vmovups %ymm0, (%rdi)
>
> Differential Revision: http://reviews.llvm.org/D7208
>
>
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=227242&r1=227241&r2=227242&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jan 27 14:50:27 2015
> @@ -382,7 +382,7 @@ namespace {
> /// vector elements, try to merge them into one larger store.
> /// \return True if a merged store was created.
> bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
> - EVT MemVT, unsigned NumElem,
> + EVT MemVT, unsigned StoresToMerge,
> bool IsConstantSrc, bool UseVector);
>
> /// Merge consecutive store operations into a wide store.
> @@ -9730,16 +9730,16 @@ struct BaseIndexOffset {
>
> bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
> SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
> - unsigned NumElem, bool IsConstantSrc, bool UseVector) {
> + unsigned StoresToMerge, bool IsConstantSrc, bool UseVector) {
> // Make sure we have something to merge.
> - if (NumElem < 2)
> + if (StoresToMerge < 2)
> return false;
>
> int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
> LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> unsigned EarliestNodeUsed = 0;
>
> - for (unsigned i=0; i < NumElem; ++i) {
> + for (unsigned i=0; i < StoresToMerge; ++i) {
> // Find a chain for the new wide-store operand. Notice that some
> // of the store nodes that we found may not be selected for inclusion
> // in the wide store. The chain we use needs to be the chain of the
> @@ -9754,9 +9754,16 @@ bool DAGCombiner::MergeStoresOfConstants
>
> SDValue StoredVal;
> if (UseVector) {
> - // Find a legal type for the vector store.
> - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
> + bool IsVec = MemVT.isVector();
> + unsigned Elts = StoresToMerge;
> + if (IsVec) {
> + // When merging vector stores, get the total number of elements.
> + Elts *= MemVT.getVectorNumElements();
> + }
> + // Get the type for the merged vector store.
> + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
> assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
> +
> if (IsConstantSrc) {
> // A vector store with a constant source implies that the constant is
> // zero; we only handle merging stores of constant zeros because the zero
> @@ -9766,31 +9773,31 @@ bool DAGCombiner::MergeStoresOfConstants
> StoredVal = DAG.getConstant(0, Ty);
> } else {
> SmallVector<SDValue, 8> Ops;
> - for (unsigned i = 0; i < NumElem ; ++i) {
> + for (unsigned i = 0; i < StoresToMerge ; ++i) {
> StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> SDValue Val = St->getValue();
> - // All of the operands of a BUILD_VECTOR must have the same type.
> + // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
> if (Val.getValueType() != MemVT)
> return false;
> Ops.push_back(Val);
> }
> -
> // Build the extracted vector elements back into a vector.
> - StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
> + StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
> + DL, Ty, Ops);
> }
> } else {
> // We should always use a vector store when merging extracted vector
> // elements, so this path implies a store of constants.
> assert(IsConstantSrc && "Merged vector elements should use vector store");
>
> - unsigned StoreBW = NumElem * ElementSizeBytes * 8;
> + unsigned StoreBW = StoresToMerge * ElementSizeBytes * 8;
> APInt StoreInt(StoreBW, 0);
>
> // Construct a single integer constant which is made of the smaller
> // constant inputs.
> bool IsLE = TLI.isLittleEndian();
> - for (unsigned i = 0; i < NumElem ; ++i) {
> - unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
> + for (unsigned i = 0; i < StoresToMerge ; ++i) {
> + unsigned Idx = IsLE ? (StoresToMerge - 1 - i) : i;
> StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
> SDValue Val = St->getValue();
> StoreInt <<= ElementSizeBytes*8;
> @@ -9817,7 +9824,7 @@ bool DAGCombiner::MergeStoresOfConstants
> // Replace the first store with the new store
> CombineTo(EarliestOp, NewStore);
> // Erase all other stores.
> - for (unsigned i = 0; i < NumElem ; ++i) {
> + for (unsigned i = 0; i < StoresToMerge ; ++i) {
> if (StoreNodes[i].MemNode == EarliestOp)
> continue;
> StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> @@ -9840,25 +9847,35 @@ bool DAGCombiner::MergeStoresOfConstants
> }
>
> bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
> - EVT MemVT = St->getMemoryVT();
> - int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
> bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
> hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
>
> - // Don't merge vectors into wider inputs.
> - if (MemVT.isVector() || !MemVT.isSimple())
> - return false;
> -
> // Perform an early exit check. Do not bother looking at stored values that
> // are not constants, loads, or extracted vector elements.
> SDValue StoredVal = St->getValue();
> bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
> bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
> isa<ConstantFPSDNode>(StoredVal);
> - bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
> + bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
> + StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
>
> - if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
> + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
> + return false;
> +
> + EVT MemVT = St->getMemoryVT();
> + int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
> +
> + // Don't merge vectors into wider vectors if the source data comes from loads.
> + // TODO: This restriction can be lifted by using logic similar to the
> + // ExtractVecSrc case.
> + // There's no such thing as a vector constant node; that merge case should be
> + // handled by looking through a BUILD_VECTOR source with all constant inputs.
> + if (MemVT.isVector() && IsLoadSrc)
> return false;
> +
> + if (!MemVT.isSimple())
> + return false;
> +
>
> // Only look at ends of store sequences.
> SDValue Chain = SDValue(St, 0);
> @@ -10054,26 +10071,33 @@ bool DAGCombiner::MergeConsecutiveStores
>
> // When extracting multiple vector elements, try to store them
> // in one vector store rather than a sequence of scalar stores.
> - if (IsExtractVecEltSrc) {
> - unsigned NumElem = 0;
> + if (IsExtractVecSrc) {
> + unsigned StoresToMerge = 0;
> + bool IsVec = MemVT.isVector();
> for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
> StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> - SDValue StoredVal = St->getValue();
> + unsigned StoreValOpcode = St->getValue().getOpcode();
> // This restriction could be loosened.
> // Bail out if any stored values are not elements extracted from a vector.
> // It should be possible to handle mixed sources, but load sources need
> // more careful handling (see the block of code below that handles
> // consecutive loads).
> - if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
> + if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
> + StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
> return false;
>
> // Find a legal type for the vector store.
> - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
> - if (TLI.isTypeLegal(Ty))
> - NumElem = i + 1;
> + unsigned Elts = i + 1;
> + if (IsVec) {
> + // When merging vector stores, get the total number of elements.
> + Elts *= MemVT.getVectorNumElements();
> + }
> + if (TLI.isTypeLegal(EVT::getVectorVT(*DAG.getContext(),
> + MemVT.getScalarType(), Elts)))
> + StoresToMerge = i + 1;
> }
>
> - return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
> + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, StoresToMerge,
> false, true);
> }
>
>
> Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=227242&r1=227241&r2=227242&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Tue Jan 27 14:50:27 2015
> @@ -468,6 +468,64 @@ define void @merge_vec_element_store(<8
> ; CHECK-NEXT: retq
> }
>
> +; PR21711 - Merge vector stores into wider vector stores.
> +define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) {
> + %idx0 = getelementptr inbounds <4 x float>* %ptr, i64 3
> + %idx1 = getelementptr inbounds <4 x float>* %ptr, i64 4
> + %idx2 = getelementptr inbounds <4 x float>* %ptr, i64 5
> + %idx3 = getelementptr inbounds <4 x float>* %ptr, i64 6
> + %shuffle0 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
> + %shuffle1 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
> + %shuffle2 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
> + %shuffle3 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
> + store <4 x float> %shuffle0, <4 x float>* %idx0, align 16
> + store <4 x float> %shuffle1, <4 x float>* %idx1, align 16
> + store <4 x float> %shuffle2, <4 x float>* %idx2, align 16
> + store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
> + ret void
> +
> +; CHECK-LABEL: merge_vec_extract_stores
> +; CHECK: vmovups %ymm0, 48(%rdi)
> +; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
> +; CHECK-NEXT: vzeroupper
> +; CHECK-NEXT: retq
> +}
> +
> +; Merging vector stores when sourced from vector loads is not currently handled.
> +define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {
> + %load_idx0 = getelementptr inbounds <4 x float>* %v, i64 0
> + %load_idx1 = getelementptr inbounds <4 x float>* %v, i64 1
> + %v0 = load <4 x float>* %load_idx0
> + %v1 = load <4 x float>* %load_idx1
> + %store_idx0 = getelementptr inbounds <4 x float>* %ptr, i64 0
> + %store_idx1 = getelementptr inbounds <4 x float>* %ptr, i64 1
> + store <4 x float> %v0, <4 x float>* %store_idx0, align 16
> + store <4 x float> %v1, <4 x float>* %store_idx1, align 16
> + ret void
> +
> +; CHECK-LABEL: merge_vec_stores_from_loads
> +; CHECK: vmovaps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: retq
> +}
> +
> +; Merging vector stores when sourced from a constant vector is not currently handled.
> +define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
> + %idx0 = getelementptr inbounds <4 x i32>* %ptr, i64 3
> + %idx1 = getelementptr inbounds <4 x i32>* %ptr, i64 4
> + store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx0, align 16
> + store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx1, align 16
> + ret void
> +
> +; CHECK-LABEL: merge_vec_stores_of_constants
> +; CHECK: vxorps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: retq
> +}
> +
> ; This is a minimized test based on real code that was failing.
> ; We could merge stores (and loads) like this...
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150127/c75b922d/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: fldry.llvm.bc
Type: application/octet-stream
Size: 7008 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150127/c75b922d/attachment.obj>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150127/c75b922d/attachment-0001.html>
More information about the llvm-commits
mailing list