[llvm] r227242 - Merge vector stores into wider vector stores (PR21711)

Tue Jan 27 16:02:11 PST 2015

Hi Sanjay,

I had to revert this in r227272.

This commit causes infinite loop in the LLVM test suite for aarch64 with mcpu=cyclone.

I have attached a bitcode file from the dhrystone test of the LLVM test suite for you to reproduce.

To reproduce:

llc fldry.llvm.bc -mcpu=cyclone

And watch llc consuming all your RAM :).

Cheers,
-Quentin

On Jan 27, 2015, at 12:50 PM, Sanjay Patel <spatel at rotateright.com> wrote:

> Author: spatel
> Date: Tue Jan 27 14:50:27 2015
> New Revision: 227242
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=227242&view=rev
> Log:
> Merge vector stores into wider vector stores (PR21711)
> 
> This patch resolves part of PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ).
> 
> The 'f3' test case in that report presents a situation where we have two 128-bit
> stores extracted from a 256-bit source vector. 
> 
> Instead of producing this:
> 
> vmovaps %xmm0, (%rdi)
> vextractf128    $1, %ymm0, 16(%rdi)
> 
> This patch merges the 128-bit stores into a single 256-bit store:
> 
> vmovups %ymm0, (%rdi)
> 
> Differential Revision: http://reviews.llvm.org/D7208
> 
> 
> Modified:
>    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>    llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
> 
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=227242&r1=227241&r2=227242&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jan 27 14:50:27 2015
> @@ -382,7 +382,7 @@ namespace {
>     /// vector elements, try to merge them into one larger store.
>     /// \return True if a merged store was created.
>     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
> -                                         EVT MemVT, unsigned NumElem,
> +                                         EVT MemVT, unsigned StoresToMerge,
>                                          bool IsConstantSrc, bool UseVector);
> 
>     /// Merge consecutive store operations into a wide store.
> @@ -9730,16 +9730,16 @@ struct BaseIndexOffset {
> 
> bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
>                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
> -                  unsigned NumElem, bool IsConstantSrc, bool UseVector) {
> +                  unsigned StoresToMerge, bool IsConstantSrc, bool UseVector) {
>   // Make sure we have something to merge.
> -  if (NumElem < 2)
> +  if (StoresToMerge < 2)
>     return false;
> 
>   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
>   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
>   unsigned EarliestNodeUsed = 0;
> 
> -  for (unsigned i=0; i < NumElem; ++i) {
> +  for (unsigned i=0; i < StoresToMerge; ++i) {
>     // Find a chain for the new wide-store operand. Notice that some
>     // of the store nodes that we found may not be selected for inclusion
>     // in the wide store. The chain we use needs to be the chain of the
> @@ -9754,9 +9754,16 @@ bool DAGCombiner::MergeStoresOfConstants
> 
>   SDValue StoredVal;
>   if (UseVector) {
> -    // Find a legal type for the vector store.
> -    EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
> +    bool IsVec = MemVT.isVector();
> +    unsigned Elts = StoresToMerge;
> +    if (IsVec) {
> +      // When merging vector stores, get the total number of elements.
> +      Elts *= MemVT.getVectorNumElements();
> +    }
> +    // Get the type for the merged vector store.
> +    EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
>     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
> +
>     if (IsConstantSrc) {
>       // A vector store with a constant source implies that the constant is
>       // zero; we only handle merging stores of constant zeros because the zero
> @@ -9766,31 +9773,31 @@ bool DAGCombiner::MergeStoresOfConstants
>       StoredVal = DAG.getConstant(0, Ty);
>     } else {
>       SmallVector<SDValue, 8> Ops;
> -      for (unsigned i = 0; i < NumElem ; ++i) {
> +      for (unsigned i = 0; i < StoresToMerge ; ++i) {
>         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
>         SDValue Val = St->getValue();
> -        // All of the operands of a BUILD_VECTOR must have the same type.
> +        // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
>         if (Val.getValueType() != MemVT)
>           return false;
>         Ops.push_back(Val);
>       }
> -      
>       // Build the extracted vector elements back into a vector.
> -      StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
> +      StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
> +                              DL, Ty, Ops);
>     }
>   } else {
>     // We should always use a vector store when merging extracted vector
>     // elements, so this path implies a store of constants.
>     assert(IsConstantSrc && "Merged vector elements should use vector store");
> 
> -    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
> +    unsigned StoreBW = StoresToMerge * ElementSizeBytes * 8;
>     APInt StoreInt(StoreBW, 0);
> 
>     // Construct a single integer constant which is made of the smaller
>     // constant inputs.
>     bool IsLE = TLI.isLittleEndian();
> -    for (unsigned i = 0; i < NumElem ; ++i) {
> -      unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
> +    for (unsigned i = 0; i < StoresToMerge ; ++i) {
> +      unsigned Idx = IsLE ? (StoresToMerge - 1 - i) : i;
>       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
>       SDValue Val = St->getValue();
>       StoreInt <<= ElementSizeBytes*8;
> @@ -9817,7 +9824,7 @@ bool DAGCombiner::MergeStoresOfConstants
>   // Replace the first store with the new store
>   CombineTo(EarliestOp, NewStore);
>   // Erase all other stores.
> -  for (unsigned i = 0; i < NumElem ; ++i) {
> +  for (unsigned i = 0; i < StoresToMerge ; ++i) {
>     if (StoreNodes[i].MemNode == EarliestOp)
>       continue;
>     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> @@ -9840,25 +9847,35 @@ bool DAGCombiner::MergeStoresOfConstants
> }
> 
> bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
> -  EVT MemVT = St->getMemoryVT();
> -  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
>   bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
>     hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
> 
> -  // Don't merge vectors into wider inputs.
> -  if (MemVT.isVector() || !MemVT.isSimple())
> -    return false;
> -
>   // Perform an early exit check. Do not bother looking at stored values that
>   // are not constants, loads, or extracted vector elements.
>   SDValue StoredVal = St->getValue();
>   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
>   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
>                        isa<ConstantFPSDNode>(StoredVal);
> -  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
> +  bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
> +                          StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
> 
> -  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
> +  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
> +    return false;
> +
> +  EVT MemVT = St->getMemoryVT();
> +  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
> +
> +  // Don't merge vectors into wider vectors if the source data comes from loads.
> +  // TODO: This restriction can be lifted by using logic similar to the
> +  // ExtractVecSrc case.
> +  // There's no such thing as a vector constant node; that merge case should be
> +  // handled by looking through a BUILD_VECTOR source with all constant inputs.
> +  if (MemVT.isVector() && IsLoadSrc)
>     return false;
> +  
> +  if (!MemVT.isSimple())
> +    return false;
> +  
> 
>   // Only look at ends of store sequences.
>   SDValue Chain = SDValue(St, 0);
> @@ -10054,26 +10071,33 @@ bool DAGCombiner::MergeConsecutiveStores
> 
>   // When extracting multiple vector elements, try to store them
>   // in one vector store rather than a sequence of scalar stores.
> -  if (IsExtractVecEltSrc) {
> -    unsigned NumElem = 0;
> +  if (IsExtractVecSrc) {
> +    unsigned StoresToMerge = 0;
> +    bool IsVec = MemVT.isVector();
>     for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
>       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
> -      SDValue StoredVal = St->getValue();
> +      unsigned StoreValOpcode = St->getValue().getOpcode();
>       // This restriction could be loosened.
>       // Bail out if any stored values are not elements extracted from a vector.
>       // It should be possible to handle mixed sources, but load sources need
>       // more careful handling (see the block of code below that handles
>       // consecutive loads).
> -      if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
> +      if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
> +          StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
>         return false;
> 
>       // Find a legal type for the vector store.
> -      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
> -      if (TLI.isTypeLegal(Ty))
> -        NumElem = i + 1;
> +      unsigned Elts = i + 1;
> +      if (IsVec) {
> +        // When merging vector stores, get the total number of elements.
> +        Elts *= MemVT.getVectorNumElements();
> +      }
> +      if (TLI.isTypeLegal(EVT::getVectorVT(*DAG.getContext(),
> +                                           MemVT.getScalarType(), Elts)))
> +        StoresToMerge = i + 1;
>     }
> 
> -    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
> +    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, StoresToMerge,
>                                            false, true);
>   }
> 
> 
> Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=227242&r1=227241&r2=227242&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Tue Jan 27 14:50:27 2015
> @@ -468,6 +468,64 @@ define void @merge_vec_element_store(<8
> ; CHECK-NEXT: retq
> }
> 
> +; PR21711 - Merge vector stores into wider vector stores.
> +define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) {
> +  %idx0 = getelementptr inbounds <4 x float>* %ptr, i64 3
> +  %idx1 = getelementptr inbounds <4 x float>* %ptr, i64 4
> +  %idx2 = getelementptr inbounds <4 x float>* %ptr, i64 5
> +  %idx3 = getelementptr inbounds <4 x float>* %ptr, i64 6
> +  %shuffle0 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
> +  %shuffle1 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
> +  %shuffle2 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
> +  %shuffle3 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
> +  store <4 x float> %shuffle0, <4 x float>* %idx0, align 16
> +  store <4 x float> %shuffle1, <4 x float>* %idx1, align 16
> +  store <4 x float> %shuffle2, <4 x float>* %idx2, align 16
> +  store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
> +  ret void
> +
> +; CHECK-LABEL: merge_vec_extract_stores
> +; CHECK:      vmovups %ymm0, 48(%rdi)
> +; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
> +; CHECK-NEXT: vzeroupper
> +; CHECK-NEXT: retq
> +}
> +
> +; Merging vector stores when sourced from vector loads is not currently handled.
> +define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {
> +  %load_idx0 = getelementptr inbounds <4 x float>* %v, i64 0
> +  %load_idx1 = getelementptr inbounds <4 x float>* %v, i64 1
> +  %v0 = load <4 x float>* %load_idx0
> +  %v1 = load <4 x float>* %load_idx1
> +  %store_idx0 = getelementptr inbounds <4 x float>* %ptr, i64 0
> +  %store_idx1 = getelementptr inbounds <4 x float>* %ptr, i64 1
> +  store <4 x float> %v0, <4 x float>* %store_idx0, align 16
> +  store <4 x float> %v1, <4 x float>* %store_idx1, align 16
> +  ret void
> +
> +; CHECK-LABEL: merge_vec_stores_from_loads
> +; CHECK:      vmovaps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: retq
> +}
> +
> +; Merging vector stores when sourced from a constant vector is not currently handled. 
> +define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
> +  %idx0 = getelementptr inbounds <4 x i32>* %ptr, i64 3
> +  %idx1 = getelementptr inbounds <4 x i32>* %ptr, i64 4
> +  store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx0, align 16
> +  store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx1, align 16
> +  ret void
> +
> +; CHECK-LABEL: merge_vec_stores_of_constants
> +; CHECK:      vxorps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: vmovaps
> +; CHECK-NEXT: retq
> +}
> +
> ; This is a minimized test based on real code that was failing.
> ; We could merge stores (and loads) like this...
> 
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150127/c75b922d/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: fldry.llvm.bc
Type: application/octet-stream
Size: 7008 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150127/c75b922d/attachment.obj>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150127/c75b922d/attachment-0001.html>