[llvm] r213339 - Revert "[x86] Fold extract_vector_elt of a load into the Load's address computation."
Michael J. Spencer
bigcheesegs at gmail.com
Thu Jul 17 17:15:50 PDT 2014
Author: mspencer
Date: Thu Jul 17 19:15:50 2014
New Revision: 213339
URL: http://llvm.org/viewvc/llvm-project?rev=213339&view=rev
Log:
Revert "[x86] Fold extract_vector_elt of a load into the Load's address computation."
There's a bug where this can create cycles in the DAG. It will take a bit
to fix, so I'm backing it out for now.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/vec_splat.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=213339&r1=213338&r2=213339&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jul 17 19:15:50 2014
@@ -169,16 +169,6 @@ namespace {
bool CombineToPostIndexedLoadStore(SDNode *N);
bool SliceUpLoad(SDNode *N);
- /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
- /// load.
- ///
- /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
- /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
- /// \param EltNo index of the vector element to load.
- /// \param OriginalLoad load that EVE came from to be replaced.
- /// \returns EVE on success SDValue() on failure.
- SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
- SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -9792,86 +9782,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
-SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
- SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
- EVT ResultVT = EVE->getValueType(0);
- EVT VecEltVT = InVecVT.getVectorElementType();
- unsigned Align = OriginalLoad->getAlignment();
- unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
- VecEltVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
- return SDValue();
-
- Align = NewAlign;
-
- SDValue NewPtr = OriginalLoad->getBasePtr();
- SDValue Offset;
- EVT PtrType = NewPtr.getValueType();
- MachinePointerInfo MPI;
- if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
- int Elt = ConstEltNo->getZExtValue();
- unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
- if (TLI.isBigEndian())
- PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
- Offset = DAG.getConstant(PtrOff, PtrType);
- MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
- } else {
- Offset = DAG.getNode(
- ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
- DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
- if (TLI.isBigEndian())
- Offset = DAG.getNode(
- ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
- DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
- MPI = OriginalLoad->getPointerInfo();
- }
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
-
- // The replacement we need to do here is a little tricky: we need to
- // replace an extractelement of a load with a load.
- // Use ReplaceAllUsesOfValuesWith to do the replacement.
- // Note that this replacement assumes that the extractvalue is the only
- // use of the load; that's okay because we don't want to perform this
- // transformation in other cases anyway.
- SDValue Load;
- SDValue Chain;
- if (ResultVT.bitsGT(VecEltVT)) {
- // If the result type of vextract is wider than the load, then issue an
- // extending load instead.
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
- ? ISD::ZEXTLOAD
- : ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
- NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
- OriginalLoad->isNonTemporal(), Align,
- OriginalLoad->getTBAAInfo());
- Chain = Load.getValue(1);
- } else {
- Load = DAG.getLoad(
- VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
- OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
- OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
- Chain = Load.getValue(1);
- if (ResultVT.bitsLT(VecEltVT))
- Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
- else
- Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
- }
- WorkListRemover DeadNodes(*this);
- SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
- SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
- // Since we're explicitly calling ReplaceAllUses, add the new node to the
- // worklist explicitly as well.
- AddToWorkList(Load.getNode());
- AddUsersToWorkList(Load.getNode()); // Add users too
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorkList(EVE);
- ++OpsNarrowed;
- return SDValue(EVE, 0);
-}
-
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
@@ -9940,38 +9850,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
}
}
- bool BCNumEltsChanged = false;
- EVT ExtVT = VT.getVectorElementType();
- EVT LVT = ExtVT;
-
- // If the result of load has to be truncated, then it's not necessarily
- // profitable.
- if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
- return SDValue();
-
- if (InVec.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- EVT BCVT = InVec.getOperand(0).getValueType();
- if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
- return SDValue();
- if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
- BCNumEltsChanged = true;
- InVec = InVec.getOperand(0);
- ExtVT = BCVT.getVectorElementType();
- }
-
- // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
- if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
- ISD::isNormalLoad(InVec.getNode())) {
- SDValue Index = N->getOperand(1);
- if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
- return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
- OrigLoad);
- }
-
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
@@ -9982,6 +9860,30 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ bool NewLoad = false;
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ NewLoad = true;
+ }
LoadSDNode *LN0 = nullptr;
const ShuffleVectorSDNode *SVN = nullptr;
@@ -10024,7 +9926,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
- EltNo = DAG.getConstant(Elt, EltNo.getValueType());
}
}
@@ -10037,7 +9938,72 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
if (Elt == -1)
return DAG.getUNDEF(LVT);
- return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
+ unsigned Align = LN0->getAlignment();
+ if (NewLoad) {
+ // Check the resultant load doesn't need a higher alignment than the
+ // original load.
+ unsigned NewAlign =
+ TLI.getDataLayout()
+ ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+ return SDValue();
+
+ Align = NewAlign;
+ }
+
+ SDValue NewPtr = LN0->getBasePtr();
+ unsigned PtrOff = 0;
+
+ if (Elt) {
+ PtrOff = LVT.getSizeInBits() * Elt / 8;
+ EVT PtrType = NewPtr.getValueType();
+ if (TLI.isBigEndian())
+ PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+ NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
+ DAG.getConstant(PtrOff, PtrType));
+ }
+
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (NVT.bitsGT(LVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+ ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
+ NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ Align, LN0->getTBAAInfo());
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), Align, LN0->getTBAAInfo());
+ Chain = Load.getValue(1);
+ if (NVT.bitsLT(LVT))
+ Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Since we're explcitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(N);
+ return SDValue(N, 0);
}
return SDValue();
Modified: llvm/trunk/test/CodeGen/X86/vec_splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_splat.ll?rev=213339&r1=213338&r2=213339&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_splat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_splat.ll Thu Jul 17 19:15:50 2014
@@ -1,6 +1,5 @@
; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2
; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1]
@@ -38,23 +37,6 @@ define void @test_v2sd(<2 x double>* %P,
define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {
%1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
%2 = load <4 x float>* %1, align 16
- %3 = trunc i64 %j to i32
- %4 = extractelement <4 x float> %2, i32 %3
- %5 = insertelement <4 x float> undef, float %4, i32 0
- %6 = insertelement <4 x float> %5, float %4, i32 1
- %7 = insertelement <4 x float> %6, float %4, i32 2
- %8 = insertelement <4 x float> %7, float %4, i32 3
- ret <4 x float> %8
-
-; AVX-LABEL: load_extract_splat
-; AVX-NOT: rsp
-; AVX: vbroadcastss
-}
-
-; Fold extract of a load into the load's address computation. This avoids spilling to the stack.
-define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {
- %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
- %2 = load <4 x float>* %1, align 16
%3 = extractelement <4 x float> %2, i64 %j
%4 = insertelement <4 x float> undef, float %3, i32 0
%5 = insertelement <4 x float> %4, float %3, i32 1
@@ -62,7 +44,7 @@ define <4 x float> @load_extract_splat1(
%7 = insertelement <4 x float> %6, float %3, i32 3
ret <4 x float> %7
-; AVX-LABEL: load_extract_splat1
+; AVX-LABEL: load_extract_splat
; AVX-NOT: movs
; AVX: vbroadcastss
}
More information about the llvm-commits
mailing list