[llvm] r227242 - Merge vector stores into wider vector stores (PR21711)
Sanjay Patel
spatel at rotateright.com
Tue Jan 27 12:50:27 PST 2015
Author: spatel
Date: Tue Jan 27 14:50:27 2015
New Revision: 227242
URL: http://llvm.org/viewvc/llvm-project?rev=227242&view=rev
Log:
Merge vector stores into wider vector stores (PR21711)
This patch resolves part of PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ).
The 'f3' test case in that report presents a situation where we have two 128-bit
stores extracted from a 256-bit source vector.
Instead of producing this:
vmovaps %xmm0, (%rdi)
vextractf128 $1, %ymm0, 16(%rdi)
This patch merges the 128-bit stores into a single 256-bit store:
vmovups %ymm0, (%rdi)
Differential Revision: http://reviews.llvm.org/D7208
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=227242&r1=227241&r2=227242&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jan 27 14:50:27 2015
@@ -382,7 +382,7 @@ namespace {
/// vector elements, try to merge them into one larger store.
/// \return True if a merged store was created.
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
- EVT MemVT, unsigned NumElem,
+ EVT MemVT, unsigned StoresToMerge,
bool IsConstantSrc, bool UseVector);
/// Merge consecutive store operations into a wide store.
@@ -9730,16 +9730,16 @@ struct BaseIndexOffset {
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
- unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+ unsigned StoresToMerge, bool IsConstantSrc, bool UseVector) {
// Make sure we have something to merge.
- if (NumElem < 2)
+ if (StoresToMerge < 2)
return false;
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned EarliestNodeUsed = 0;
- for (unsigned i=0; i < NumElem; ++i) {
+ for (unsigned i=0; i < StoresToMerge; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
@@ -9754,9 +9754,16 @@ bool DAGCombiner::MergeStoresOfConstants
SDValue StoredVal;
if (UseVector) {
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ bool IsVec = MemVT.isVector();
+ unsigned Elts = StoresToMerge;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ // Get the type for the merged vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+
if (IsConstantSrc) {
// A vector store with a constant source implies that the constant is
// zero; we only handle merging stores of constant zeros because the zero
@@ -9766,31 +9773,31 @@ bool DAGCombiner::MergeStoresOfConstants
StoredVal = DAG.getConstant(0, Ty);
} else {
SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < NumElem ; ++i) {
+ for (unsigned i = 0; i < StoresToMerge ; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue Val = St->getValue();
- // All of the operands of a BUILD_VECTOR must have the same type.
+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
}
-
// Build the extracted vector elements back into a vector.
- StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
+ StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
+ DL, Ty, Ops);
}
} else {
// We should always use a vector store when merging extracted vector
// elements, so this path implies a store of constants.
assert(IsConstantSrc && "Merged vector elements should use vector store");
- unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ unsigned StoreBW = StoresToMerge * ElementSizeBytes * 8;
APInt StoreInt(StoreBW, 0);
// Construct a single integer constant which is made of the smaller
// constant inputs.
bool IsLE = TLI.isLittleEndian();
- for (unsigned i = 0; i < NumElem ; ++i) {
- unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+ for (unsigned i = 0; i < StoresToMerge ; ++i) {
+ unsigned Idx = IsLE ? (StoresToMerge - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes*8;
@@ -9817,7 +9824,7 @@ bool DAGCombiner::MergeStoresOfConstants
// Replace the first store with the new store
CombineTo(EarliestOp, NewStore);
// Erase all other stores.
- for (unsigned i = 0; i < NumElem ; ++i) {
+ for (unsigned i = 0; i < StoresToMerge ; ++i) {
if (StoreNodes[i].MemNode == EarliestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
@@ -9840,25 +9847,35 @@ bool DAGCombiner::MergeStoresOfConstants
}
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
- EVT MemVT = St->getMemoryVT();
- int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
- // Don't merge vectors into wider inputs.
- if (MemVT.isVector() || !MemVT.isSimple())
- return false;
-
// Perform an early exit check. Do not bother looking at stored values that
// are not constants, loads, or extracted vector elements.
SDValue StoredVal = St->getValue();
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
- bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
- if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
+ return false;
+
+ EVT MemVT = St->getMemoryVT();
+ int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
+
+ // Don't merge vectors into wider vectors if the source data comes from loads.
+ // TODO: This restriction can be lifted by using logic similar to the
+ // ExtractVecSrc case.
+ // There's no such thing as a vector constant node; that merge case should be
+ // handled by looking through a BUILD_VECTOR source with all constant inputs.
+ if (MemVT.isVector() && IsLoadSrc)
return false;
+
+ if (!MemVT.isSimple())
+ return false;
+
// Only look at ends of store sequences.
SDValue Chain = SDValue(St, 0);
@@ -10054,26 +10071,33 @@ bool DAGCombiner::MergeConsecutiveStores
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
- if (IsExtractVecEltSrc) {
- unsigned NumElem = 0;
+ if (IsExtractVecSrc) {
+ unsigned StoresToMerge = 0;
+ bool IsVec = MemVT.isVector();
for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = St->getValue();
+ unsigned StoreValOpcode = St->getValue().getOpcode();
// This restriction could be loosened.
// Bail out if any stored values are not elements extracted from a vector.
// It should be possible to handle mixed sources, but load sources need
// more careful handling (see the block of code below that handles
// consecutive loads).
- if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
+ StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
return false;
// Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
- if (TLI.isTypeLegal(Ty))
- NumElem = i + 1;
+ unsigned Elts = i + 1;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ if (TLI.isTypeLegal(EVT::getVectorVT(*DAG.getContext(),
+ MemVT.getScalarType(), Elts)))
+ StoresToMerge = i + 1;
}
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, StoresToMerge,
false, true);
}
Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=227242&r1=227241&r2=227242&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
+++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Tue Jan 27 14:50:27 2015
@@ -468,6 +468,64 @@ define void @merge_vec_element_store(<8
; CHECK-NEXT: retq
}
+; PR21711 - Merge vector stores into wider vector stores.
+define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) {
+ %idx0 = getelementptr inbounds <4 x float>* %ptr, i64 3
+ %idx1 = getelementptr inbounds <4 x float>* %ptr, i64 4
+ %idx2 = getelementptr inbounds <4 x float>* %ptr, i64 5
+ %idx3 = getelementptr inbounds <4 x float>* %ptr, i64 6
+ %shuffle0 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shuffle1 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle2 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shuffle3 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ store <4 x float> %shuffle0, <4 x float>* %idx0, align 16
+ store <4 x float> %shuffle1, <4 x float>* %idx1, align 16
+ store <4 x float> %shuffle2, <4 x float>* %idx2, align 16
+ store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
+ ret void
+
+; CHECK-LABEL: merge_vec_extract_stores
+; CHECK: vmovups %ymm0, 48(%rdi)
+; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
+; Merging vector stores when sourced from vector loads is not currently handled.
+define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {
+ %load_idx0 = getelementptr inbounds <4 x float>* %v, i64 0
+ %load_idx1 = getelementptr inbounds <4 x float>* %v, i64 1
+ %v0 = load <4 x float>* %load_idx0
+ %v1 = load <4 x float>* %load_idx1
+ %store_idx0 = getelementptr inbounds <4 x float>* %ptr, i64 0
+ %store_idx1 = getelementptr inbounds <4 x float>* %ptr, i64 1
+ store <4 x float> %v0, <4 x float>* %store_idx0, align 16
+ store <4 x float> %v1, <4 x float>* %store_idx1, align 16
+ ret void
+
+; CHECK-LABEL: merge_vec_stores_from_loads
+; CHECK: vmovaps
+; CHECK-NEXT: vmovaps
+; CHECK-NEXT: vmovaps
+; CHECK-NEXT: vmovaps
+; CHECK-NEXT: retq
+}
+
+; Merging vector stores when sourced from a constant vector is not currently handled.
+define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
+ %idx0 = getelementptr inbounds <4 x i32>* %ptr, i64 3
+ %idx1 = getelementptr inbounds <4 x i32>* %ptr, i64 4
+ store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx0, align 16
+ store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx1, align 16
+ ret void
+
+; CHECK-LABEL: merge_vec_stores_of_constants
+; CHECK: vxorps
+; CHECK-NEXT: vmovaps
+; CHECK-NEXT: vmovaps
+; CHECK-NEXT: retq
+}
+
; This is a minimized test based on real code that was failing.
; We could merge stores (and loads) like this...
More information about the llvm-commits
mailing list