[llvm] r224611 - merge consecutive stores of extracted vector elements
Sanjay Patel
spatel at rotateright.com
Fri Dec 19 12:23:41 PST 2014
Author: spatel
Date: Fri Dec 19 14:23:41 2014
New Revision: 224611
URL: http://llvm.org/viewvc/llvm-project?rev=224611&view=rev
Log:
merge consecutive stores of extracted vector elements
Add a path to DAGCombiner::MergeConsecutiveStores()
to combine multiple scalar stores when the store operands
are extracted vector elements. This is a partial fix for
PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ).
For the new test case, codegen improves from:
vmovss %xmm0, (%rdi)
vextractps $1, %xmm0, 4(%rdi)
vextractps $2, %xmm0, 8(%rdi)
vextractps $3, %xmm0, 12(%rdi)
vextractf128 $1, %ymm0, %xmm0
vmovss %xmm0, 16(%rdi)
vextractps $1, %xmm0, 20(%rdi)
vextractps $2, %xmm0, 24(%rdi)
vextractps $3, %xmm0, 28(%rdi)
vzeroupper
retq
To:
vmovups %ymm0, (%rdi)
vzeroupper
retq
Patch reviewed by Nadav Rotem.
Differential Revision: http://reviews.llvm.org/D6698
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=224611&r1=224610&r2=224611&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Dec 19 14:23:41 2014
@@ -9498,11 +9498,14 @@ bool DAGCombiner::MergeConsecutiveStores
return false;
// Perform an early exit check. Do not bother looking at stored values that
- // are not constants or loads.
+ // are not constants, loads, or extracted vector elements.
SDValue StoredVal = St->getValue();
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
- if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
- !IsLoadSrc)
+ bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+ isa<ConstantFPSDNode>(StoredVal);
+ bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
return false;
// Only look at ends of store sequences.
@@ -9644,7 +9647,7 @@ bool DAGCombiner::MergeConsecutiveStores
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
// Store the constants into memory as one consecutive store.
- if (!IsLoadSrc) {
+ if (IsConstantSrc) {
unsigned LastLegalType = 0;
unsigned LastLegalVectorType = 0;
bool NonZero = false;
@@ -9769,6 +9772,74 @@ bool DAGCombiner::MergeConsecutiveStores
while (!St->use_empty())
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
deleteAndRecombine(St);
+ }
+
+ return true;
+ }
+
+ // When extracting multiple vector elements, try to store them
+ // in one vector store rather than a sequence of scalar stores.
+ if (IsExtractVecEltSrc) {
+ unsigned NumElem = 0;
+ for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(Ty))
+ NumElem = i + 1;
+ }
+
+ // Make sure we have a legal type and something to merge.
+ if (NumElem < 2)
+ return false;
+
+ unsigned EarliestNodeUsed = 0;
+ for (unsigned i=0; i < NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // The earliest Node in the DAG.
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ SDLoc DL(StoreNodes[0].MemNode);
+
+ SDValue StoredVal;
+
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = St->getValue();
+ // All of the operands of a BUILD_VECTOR must have the same type.
+ if (Val.getValueType() != MemVT)
+ return false;
+ Ops.push_back(Val);
+ }
+
+ // Build the extracted vector elements back into a vector.
+ StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ false, false,
+ FirstInChain->getAlignment());
+
+ // Replace the first store with the new store
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ deleteAndRecombine(St);
}
return true;
Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=224611&r1=224610&r2=224611&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
+++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Fri Dec 19 14:23:41 2014
@@ -434,3 +434,36 @@ define void @loadStoreBaseIndexOffsetSex
; <label>:14
ret void
}
+
+define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
+ %vecext0 = extractelement <8 x float> %v, i32 0
+ %vecext1 = extractelement <8 x float> %v, i32 1
+ %vecext2 = extractelement <8 x float> %v, i32 2
+ %vecext3 = extractelement <8 x float> %v, i32 3
+ %vecext4 = extractelement <8 x float> %v, i32 4
+ %vecext5 = extractelement <8 x float> %v, i32 5
+ %vecext6 = extractelement <8 x float> %v, i32 6
+ %vecext7 = extractelement <8 x float> %v, i32 7
+ %arrayidx1 = getelementptr inbounds float* %ptr, i64 1
+ %arrayidx2 = getelementptr inbounds float* %ptr, i64 2
+ %arrayidx3 = getelementptr inbounds float* %ptr, i64 3
+ %arrayidx4 = getelementptr inbounds float* %ptr, i64 4
+ %arrayidx5 = getelementptr inbounds float* %ptr, i64 5
+ %arrayidx6 = getelementptr inbounds float* %ptr, i64 6
+ %arrayidx7 = getelementptr inbounds float* %ptr, i64 7
+ store float %vecext0, float* %ptr, align 4
+ store float %vecext1, float* %arrayidx1, align 4
+ store float %vecext2, float* %arrayidx2, align 4
+ store float %vecext3, float* %arrayidx3, align 4
+ store float %vecext4, float* %arrayidx4, align 4
+ store float %vecext5, float* %arrayidx5, align 4
+ store float %vecext6, float* %arrayidx6, align 4
+ store float %vecext7, float* %arrayidx7, align 4
+ ret void
+
+; CHECK-LABEL: merge_vec_element_store
+; CHECK: vmovups
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
More information about the llvm-commits
mailing list