[llvm] r209788 - [x86] Fold extract_vector_elt of a load into the Load's address computation.
Dmitry Babokin
babokin at gmail.com
Fri Jul 11 05:36:16 PDT 2014
Michael,
Your commit caused a regression described in bug 20115
<http://llvm.org/bugs/show_bug.cgi?id=20115>, could you have a look?
Dmitry
On Thu, May 29, 2014 at 5:42 AM, Michael J. Spencer <bigcheesegs at gmail.com>
wrote:
> Author: mspencer
> Date: Wed May 28 20:42:45 2014
> New Revision: 209788
>
> URL: http://llvm.org/viewvc/llvm-project?rev=209788&view=rev
> Log:
> [x86] Fold extract_vector_elt of a load into the Load's address
> computation.
>
> An address only use of an extract element of a load can be simplified to a
> load. Without this the result of the extract element is spilled to the
> stack so that an address is available.
>
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/trunk/test/CodeGen/X86/vec_splat.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=209788&r1=209787&r2=209788&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed May 28
> 20:42:45 2014
> @@ -169,6 +169,16 @@ namespace {
> bool CombineToPostIndexedLoadStore(SDNode *N);
> bool SliceUpLoad(SDNode *N);
>
> + /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a
> narrowed
> + /// load.
> + ///
> + /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
> + /// \param InVecVT type of the input vector to EVE with bitcasts
> resolved.
> + /// \param EltNo index of the vector element to load.
> + /// \param OriginalLoad load that EVE came from to be replaced.
> + /// \returns EVE on success SDValue() on failure.
> + SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
> + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode
> *OriginalLoad);
> void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
> SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
> SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
> @@ -9675,6 +9685,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_
> return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
> }
>
> +SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
> + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
> + EVT ResultVT = EVE->getValueType(0);
> + EVT VecEltVT = InVecVT.getVectorElementType();
> + unsigned Align = OriginalLoad->getAlignment();
> + unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
> + VecEltVT.getTypeForEVT(*DAG.getContext()));
> +
> + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD,
> VecEltVT))
> + return SDValue();
> +
> + Align = NewAlign;
> +
> + SDValue NewPtr = OriginalLoad->getBasePtr();
> + SDValue Offset;
> + EVT PtrType = NewPtr.getValueType();
> + MachinePointerInfo MPI;
> + if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
> + int Elt = ConstEltNo->getZExtValue();
> + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
> + if (TLI.isBigEndian())
> + PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
> + Offset = DAG.getConstant(PtrOff, PtrType);
> + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
> + } else {
> + Offset = DAG.getNode(
> + ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
> + DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
> + if (TLI.isBigEndian())
> + Offset = DAG.getNode(
> + ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
> + DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()),
> Offset);
> + MPI = OriginalLoad->getPointerInfo();
> + }
> + NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
> +
> + // The replacement we need to do here is a little tricky: we need to
> + // replace an extractelement of a load with a load.
> + // Use ReplaceAllUsesOfValuesWith to do the replacement.
> + // Note that this replacement assumes that the extractvalue is the only
> + // use of the load; that's okay because we don't want to perform this
> + // transformation in other cases anyway.
> + SDValue Load;
> + SDValue Chain;
> + if (ResultVT.bitsGT(VecEltVT)) {
> + // If the result type of vextract is wider than the load, then issue
> an
> + // extending load instead.
> + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
> + ? ISD::ZEXTLOAD
> + : ISD::EXTLOAD;
> + Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
> OriginalLoad->getChain(),
> + NewPtr, MPI, VecEltVT,
> OriginalLoad->isVolatile(),
> + OriginalLoad->isNonTemporal(), Align,
> + OriginalLoad->getTBAAInfo());
> + Chain = Load.getValue(1);
> + } else {
> + Load = DAG.getLoad(
> + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
> + OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
> + OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
> + Chain = Load.getValue(1);
> + if (ResultVT.bitsLT(VecEltVT))
> + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
> + else
> + Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
> + }
> + WorkListRemover DeadNodes(*this);
> + SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
> + SDValue To[] = { Load, Chain };
> + DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
> + // Since we're explicitly calling ReplaceAllUses, add the new node to
> the
> + // worklist explicitly as well.
> + AddToWorkList(Load.getNode());
> + AddUsersToWorkList(Load.getNode()); // Add users too
> + // Make sure to revisit this node to clean it up; it will usually be
> dead.
> + AddToWorkList(EVE);
> + ++OpsNarrowed;
> + return SDValue(EVE, 0);
> +}
> +
> SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
> // (vextract (scalar_to_vector val, 0) -> val
> SDValue InVec = N->getOperand(0);
> @@ -9743,6 +9833,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
> }
> }
>
> + bool BCNumEltsChanged = false;
> + EVT ExtVT = VT.getVectorElementType();
> + EVT LVT = ExtVT;
> +
> + // If the result of load has to be truncated, then it's not necessarily
> + // profitable.
> + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
> + return SDValue();
> +
> + if (InVec.getOpcode() == ISD::BITCAST) {
> + // Don't duplicate a load with other uses.
> + if (!InVec.hasOneUse())
> + return SDValue();
> +
> + EVT BCVT = InVec.getOperand(0).getValueType();
> + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
> + return SDValue();
> + if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
> + BCNumEltsChanged = true;
> + InVec = InVec.getOperand(0);
> + ExtVT = BCVT.getVectorElementType();
> + }
> +
> + // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
> + if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
> + ISD::isNormalLoad(InVec.getNode())) {
> + SDValue Index = N->getOperand(1);
> + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
> + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
> + OrigLoad);
> + }
> +
> // Perform only after legalization to ensure build_vector /
> vector_shuffle
> // optimizations have already been done.
> if (!LegalOperations) return SDValue();
> @@ -9753,30 +9875,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
>
> if (ConstEltNo) {
> int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
> - bool NewLoad = false;
> - bool BCNumEltsChanged = false;
> - EVT ExtVT = VT.getVectorElementType();
> - EVT LVT = ExtVT;
> -
> - // If the result of load has to be truncated, then it's not
> necessarily
> - // profitable.
> - if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
> - return SDValue();
> -
> - if (InVec.getOpcode() == ISD::BITCAST) {
> - // Don't duplicate a load with other uses.
> - if (!InVec.hasOneUse())
> - return SDValue();
> -
> - EVT BCVT = InVec.getOperand(0).getValueType();
> - if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
> - return SDValue();
> - if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
> - BCNumEltsChanged = true;
> - InVec = InVec.getOperand(0);
> - ExtVT = BCVT.getVectorElementType();
> - NewLoad = true;
> - }
>
> LoadSDNode *LN0 = nullptr;
> const ShuffleVectorSDNode *SVN = nullptr;
> @@ -9819,6 +9917,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
> if (ISD::isNormalLoad(InVec.getNode())) {
> LN0 = cast<LoadSDNode>(InVec);
> Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
> + EltNo = DAG.getConstant(Elt, EltNo.getValueType());
> }
> }
>
> @@ -9831,72 +9930,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
> if (Elt == -1)
> return DAG.getUNDEF(LVT);
>
> - unsigned Align = LN0->getAlignment();
> - if (NewLoad) {
> - // Check the resultant load doesn't need a higher alignment than the
> - // original load.
> - unsigned NewAlign =
> - TLI.getDataLayout()
> - ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
> -
> - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD,
> LVT))
> - return SDValue();
> -
> - Align = NewAlign;
> - }
> -
> - SDValue NewPtr = LN0->getBasePtr();
> - unsigned PtrOff = 0;
> -
> - if (Elt) {
> - PtrOff = LVT.getSizeInBits() * Elt / 8;
> - EVT PtrType = NewPtr.getValueType();
> - if (TLI.isBigEndian())
> - PtrOff = VT.getSizeInBits() / 8 - PtrOff;
> - NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
> - DAG.getConstant(PtrOff, PtrType));
> - }
> -
> - // The replacement we need to do here is a little tricky: we need to
> - // replace an extractelement of a load with a load.
> - // Use ReplaceAllUsesOfValuesWith to do the replacement.
> - // Note that this replacement assumes that the extractvalue is the
> only
> - // use of the load; that's okay because we don't want to perform this
> - // transformation in other cases anyway.
> - SDValue Load;
> - SDValue Chain;
> - if (NVT.bitsGT(LVT)) {
> - // If the result type of vextract is wider than the load, then
> issue an
> - // extending load instead.
> - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
> - ? ISD::ZEXTLOAD : ISD::EXTLOAD;
> - Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
> - NewPtr,
> LN0->getPointerInfo().getWithOffset(PtrOff),
> - LVT, LN0->isVolatile(), LN0->isNonTemporal(),
> - Align, LN0->getTBAAInfo());
> - Chain = Load.getValue(1);
> - } else {
> - Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
> - LN0->getPointerInfo().getWithOffset(PtrOff),
> - LN0->isVolatile(), LN0->isNonTemporal(),
> - LN0->isInvariant(), Align, LN0->getTBAAInfo());
> - Chain = Load.getValue(1);
> - if (NVT.bitsLT(LVT))
> - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
> - else
> - Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
> - }
> - WorkListRemover DeadNodes(*this);
> - SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
> - SDValue To[] = { Load, Chain };
> - DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
> - // Since we're explcitly calling ReplaceAllUses, add the new node to
> the
> - // worklist explicitly as well.
> - AddToWorkList(Load.getNode());
> - AddUsersToWorkList(Load.getNode()); // Add users too
> - // Make sure to revisit this node to clean it up; it will usually be
> dead.
> - AddToWorkList(N);
> - return SDValue(N, 0);
> + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo,
> LN0);
> }
>
> return SDValue();
>
> Modified: llvm/trunk/test/CodeGen/X86/vec_splat.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_splat.ll?rev=209788&r1=209787&r2=209788&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_splat.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vec_splat.ll Wed May 28 20:42:45 2014
> @@ -1,5 +1,6 @@
> ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s
> -check-prefix=SSE2
> ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s
> -check-prefix=SSE3
> +; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
>
> define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X)
> nounwind {
> %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0
> ; <<4 x float>> [#uses=1]
> @@ -37,6 +38,23 @@ define void @test_v2sd(<2 x double>* %P,
> define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly
> %ptr, i64 %i, i64 %j) nounwind {
> %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
> %2 = load <4 x float>* %1, align 16
> + %3 = trunc i64 %j to i32
> + %4 = extractelement <4 x float> %2, i32 %3
> + %5 = insertelement <4 x float> undef, float %4, i32 0
> + %6 = insertelement <4 x float> %5, float %4, i32 1
> + %7 = insertelement <4 x float> %6, float %4, i32 2
> + %8 = insertelement <4 x float> %7, float %4, i32 3
> + ret <4 x float> %8
> +
> +; AVX-LABEL: load_extract_splat
> +; AVX-NOT: rsp
> +; AVX: vbroadcastss
> +}
> +
> +; Fold extract of a load into the load's address computation. This avoids
> spilling to the stack.
> +define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly
> %ptr, i64 %i, i64 %j) nounwind {
> + %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
> + %2 = load <4 x float>* %1, align 16
> %3 = extractelement <4 x float> %2, i64 %j
> %4 = insertelement <4 x float> undef, float %3, i32 0
> %5 = insertelement <4 x float> %4, float %3, i32 1
> @@ -44,7 +62,7 @@ define <4 x float> @load_extract_splat(<
> %7 = insertelement <4 x float> %6, float %3, i32 3
> ret <4 x float> %7
>
> -; AVX-LABEL: load_extract_splat
> +; AVX-LABEL: load_extract_splat1
> ; AVX-NOT: movs
> ; AVX: vbroadcastss
> }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140711/19fe0136/attachment.html>
More information about the llvm-commits
mailing list