[llvm] r209788 - [x86] Fold extract_vector_elt of a load into the Load's address computation.

Dmitry Babokin babokin at gmail.com
Fri Jul 11 05:36:16 PDT 2014


Michael,

Your commit caused a regression described in bug 20115
<http://llvm.org/bugs/show_bug.cgi?id=20115>, could you have a look?

Dmitry


On Thu, May 29, 2014 at 5:42 AM, Michael J. Spencer <bigcheesegs at gmail.com>
wrote:

> Author: mspencer
> Date: Wed May 28 20:42:45 2014
> New Revision: 209788
>
> URL: http://llvm.org/viewvc/llvm-project?rev=209788&view=rev
> Log:
> [x86] Fold extract_vector_elt of a load into the Load's address
> computation.
>
> An address only use of an extract element of a load can be simplified to a
> load. Without this the result of the extract element is spilled to the
> stack so that an address is available.
>
> Modified:
>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>     llvm/trunk/test/CodeGen/X86/vec_splat.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=209788&r1=209787&r2=209788&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed May 28
> 20:42:45 2014
> @@ -169,6 +169,16 @@ namespace {
>      bool CombineToPostIndexedLoadStore(SDNode *N);
>      bool SliceUpLoad(SDNode *N);
>
> +    /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a
> narrowed
> +    ///   load.
> +    ///
> +    /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
> +    /// \param InVecVT type of the input vector to EVE with bitcasts
> resolved.
> +    /// \param EltNo index of the vector element to load.
> +    /// \param OriginalLoad load that EVE came from to be replaced.
> +    /// \returns EVE on success SDValue() on failure.
> +    SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
> +        SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode
> *OriginalLoad);
>      void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
>      SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
>      SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
> @@ -9675,6 +9685,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_
>    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
>  }
>
> +SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
> +    SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
> +  EVT ResultVT = EVE->getValueType(0);
> +  EVT VecEltVT = InVecVT.getVectorElementType();
> +  unsigned Align = OriginalLoad->getAlignment();
> +  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
> +      VecEltVT.getTypeForEVT(*DAG.getContext()));
> +
> +  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD,
> VecEltVT))
> +    return SDValue();
> +
> +  Align = NewAlign;
> +
> +  SDValue NewPtr = OriginalLoad->getBasePtr();
> +  SDValue Offset;
> +  EVT PtrType = NewPtr.getValueType();
> +  MachinePointerInfo MPI;
> +  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
> +    int Elt = ConstEltNo->getZExtValue();
> +    unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
> +    if (TLI.isBigEndian())
> +      PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
> +    Offset = DAG.getConstant(PtrOff, PtrType);
> +    MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
> +  } else {
> +    Offset = DAG.getNode(
> +        ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
> +        DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
> +    if (TLI.isBigEndian())
> +      Offset = DAG.getNode(
> +          ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
> +          DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()),
> Offset);
> +    MPI = OriginalLoad->getPointerInfo();
> +  }
> +  NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
> +
> +  // The replacement we need to do here is a little tricky: we need to
> +  // replace an extractelement of a load with a load.
> +  // Use ReplaceAllUsesOfValuesWith to do the replacement.
> +  // Note that this replacement assumes that the extractvalue is the only
> +  // use of the load; that's okay because we don't want to perform this
> +  // transformation in other cases anyway.
> +  SDValue Load;
> +  SDValue Chain;
> +  if (ResultVT.bitsGT(VecEltVT)) {
> +    // If the result type of vextract is wider than the load, then issue
> an
> +    // extending load instead.
> +    ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
> +                                   ? ISD::ZEXTLOAD
> +                                   : ISD::EXTLOAD;
> +    Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
> OriginalLoad->getChain(),
> +                          NewPtr, MPI, VecEltVT,
> OriginalLoad->isVolatile(),
> +                          OriginalLoad->isNonTemporal(), Align,
> +                          OriginalLoad->getTBAAInfo());
> +    Chain = Load.getValue(1);
> +  } else {
> +    Load = DAG.getLoad(
> +        VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
> +        OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
> +        OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
> +    Chain = Load.getValue(1);
> +    if (ResultVT.bitsLT(VecEltVT))
> +      Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
> +    else
> +      Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
> +  }
> +  WorkListRemover DeadNodes(*this);
> +  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
> +  SDValue To[] = { Load, Chain };
> +  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
> +  // Since we're explicitly calling ReplaceAllUses, add the new node to
> the
> +  // worklist explicitly as well.
> +  AddToWorkList(Load.getNode());
> +  AddUsersToWorkList(Load.getNode()); // Add users too
> +  // Make sure to revisit this node to clean it up; it will usually be
> dead.
> +  AddToWorkList(EVE);
> +  ++OpsNarrowed;
> +  return SDValue(EVE, 0);
> +}
> +
>  SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
>    // (vextract (scalar_to_vector val, 0) -> val
>    SDValue InVec = N->getOperand(0);
> @@ -9743,6 +9833,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
>      }
>    }
>
> +  bool BCNumEltsChanged = false;
> +  EVT ExtVT = VT.getVectorElementType();
> +  EVT LVT = ExtVT;
> +
> +  // If the result of load has to be truncated, then it's not necessarily
> +  // profitable.
> +  if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
> +    return SDValue();
> +
> +  if (InVec.getOpcode() == ISD::BITCAST) {
> +    // Don't duplicate a load with other uses.
> +    if (!InVec.hasOneUse())
> +      return SDValue();
> +
> +    EVT BCVT = InVec.getOperand(0).getValueType();
> +    if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
> +      return SDValue();
> +    if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
> +      BCNumEltsChanged = true;
> +    InVec = InVec.getOperand(0);
> +    ExtVT = BCVT.getVectorElementType();
> +  }
> +
> +  // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
> +  if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
> +      ISD::isNormalLoad(InVec.getNode())) {
> +    SDValue Index = N->getOperand(1);
> +    if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
> +      return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
> +                                                           OrigLoad);
> +  }
> +
>    // Perform only after legalization to ensure build_vector /
> vector_shuffle
>    // optimizations have already been done.
>    if (!LegalOperations) return SDValue();
> @@ -9753,30 +9875,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
>
>    if (ConstEltNo) {
>      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
> -    bool NewLoad = false;
> -    bool BCNumEltsChanged = false;
> -    EVT ExtVT = VT.getVectorElementType();
> -    EVT LVT = ExtVT;
> -
> -    // If the result of load has to be truncated, then it's not
> necessarily
> -    // profitable.
> -    if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
> -      return SDValue();
> -
> -    if (InVec.getOpcode() == ISD::BITCAST) {
> -      // Don't duplicate a load with other uses.
> -      if (!InVec.hasOneUse())
> -        return SDValue();
> -
> -      EVT BCVT = InVec.getOperand(0).getValueType();
> -      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
> -        return SDValue();
> -      if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
> -        BCNumEltsChanged = true;
> -      InVec = InVec.getOperand(0);
> -      ExtVT = BCVT.getVectorElementType();
> -      NewLoad = true;
> -    }
>
>      LoadSDNode *LN0 = nullptr;
>      const ShuffleVectorSDNode *SVN = nullptr;
> @@ -9819,6 +9917,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
>        if (ISD::isNormalLoad(InVec.getNode())) {
>          LN0 = cast<LoadSDNode>(InVec);
>          Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
> +        EltNo = DAG.getConstant(Elt, EltNo.getValueType());
>        }
>      }
>
> @@ -9831,72 +9930,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
>      if (Elt == -1)
>        return DAG.getUNDEF(LVT);
>
> -    unsigned Align = LN0->getAlignment();
> -    if (NewLoad) {
> -      // Check the resultant load doesn't need a higher alignment than the
> -      // original load.
> -      unsigned NewAlign =
> -        TLI.getDataLayout()
> -            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
> -
> -      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD,
> LVT))
> -        return SDValue();
> -
> -      Align = NewAlign;
> -    }
> -
> -    SDValue NewPtr = LN0->getBasePtr();
> -    unsigned PtrOff = 0;
> -
> -    if (Elt) {
> -      PtrOff = LVT.getSizeInBits() * Elt / 8;
> -      EVT PtrType = NewPtr.getValueType();
> -      if (TLI.isBigEndian())
> -        PtrOff = VT.getSizeInBits() / 8 - PtrOff;
> -      NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
> -                           DAG.getConstant(PtrOff, PtrType));
> -    }
> -
> -    // The replacement we need to do here is a little tricky: we need to
> -    // replace an extractelement of a load with a load.
> -    // Use ReplaceAllUsesOfValuesWith to do the replacement.
> -    // Note that this replacement assumes that the extractvalue is the
> only
> -    // use of the load; that's okay because we don't want to perform this
> -    // transformation in other cases anyway.
> -    SDValue Load;
> -    SDValue Chain;
> -    if (NVT.bitsGT(LVT)) {
> -      // If the result type of vextract is wider than the load, then
> issue an
> -      // extending load instead.
> -      ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
> -        ? ISD::ZEXTLOAD : ISD::EXTLOAD;
> -      Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
> -                            NewPtr,
> LN0->getPointerInfo().getWithOffset(PtrOff),
> -                            LVT, LN0->isVolatile(), LN0->isNonTemporal(),
> -                            Align, LN0->getTBAAInfo());
> -      Chain = Load.getValue(1);
> -    } else {
> -      Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
> -                         LN0->getPointerInfo().getWithOffset(PtrOff),
> -                         LN0->isVolatile(), LN0->isNonTemporal(),
> -                         LN0->isInvariant(), Align, LN0->getTBAAInfo());
> -      Chain = Load.getValue(1);
> -      if (NVT.bitsLT(LVT))
> -        Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
> -      else
> -        Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
> -    }
> -    WorkListRemover DeadNodes(*this);
> -    SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
> -    SDValue To[] = { Load, Chain };
> -    DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
> -    // Since we're explcitly calling ReplaceAllUses, add the new node to
> the
> -    // worklist explicitly as well.
> -    AddToWorkList(Load.getNode());
> -    AddUsersToWorkList(Load.getNode()); // Add users too
> -    // Make sure to revisit this node to clean it up; it will usually be
> dead.
> -    AddToWorkList(N);
> -    return SDValue(N, 0);
> +    return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo,
> LN0);
>    }
>
>    return SDValue();
>
> Modified: llvm/trunk/test/CodeGen/X86/vec_splat.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_splat.ll?rev=209788&r1=209787&r2=209788&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_splat.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vec_splat.ll Wed May 28 20:42:45 2014
> @@ -1,5 +1,6 @@
>  ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s
> -check-prefix=SSE2
>  ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s
> -check-prefix=SSE3
> +; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
>
>  define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X)
> nounwind {
>         %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0
>               ; <<4 x float>> [#uses=1]
> @@ -37,6 +38,23 @@ define void @test_v2sd(<2 x double>* %P,
>  define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly
> %ptr, i64 %i, i64 %j) nounwind {
>    %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
>    %2 = load <4 x float>* %1, align 16
> +  %3 = trunc i64 %j to i32
> +  %4 = extractelement <4 x float> %2, i32 %3
> +  %5 = insertelement <4 x float> undef, float %4, i32 0
> +  %6 = insertelement <4 x float> %5, float %4, i32 1
> +  %7 = insertelement <4 x float> %6, float %4, i32 2
> +  %8 = insertelement <4 x float> %7, float %4, i32 3
> +  ret <4 x float> %8
> +
> +; AVX-LABEL: load_extract_splat
> +; AVX-NOT: rsp
> +; AVX: vbroadcastss
> +}
> +
> +; Fold extract of a load into the load's address computation. This avoids
> spilling to the stack.
> +define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly
> %ptr, i64 %i, i64 %j) nounwind {
> +  %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
> +  %2 = load <4 x float>* %1, align 16
>    %3 = extractelement <4 x float> %2, i64 %j
>    %4 = insertelement <4 x float> undef, float %3, i32 0
>    %5 = insertelement <4 x float> %4, float %3, i32 1
> @@ -44,7 +62,7 @@ define <4 x float> @load_extract_splat(<
>    %7 = insertelement <4 x float> %6, float %3, i32 3
>    ret <4 x float> %7
>
> -; AVX-LABEL: load_extract_splat
> +; AVX-LABEL: load_extract_splat1
>  ; AVX-NOT: movs
>  ; AVX: vbroadcastss
>  }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140711/19fe0136/attachment.html>


More information about the llvm-commits mailing list