[llvm] r209788 - [x86] Fold extract_vector_elt of a load into the Load's address computation.
Michael Spencer
bigcheesegs at gmail.com
Fri Jul 11 12:47:25 PDT 2014
On Fri, Jul 11, 2014 at 5:36 AM, Dmitry Babokin <babokin at gmail.com> wrote:
> Michael,
>
> Your commit caused a regression described in bug 20115, could you have a
> look?
>
> Dmitry
Investigating now. Thanks for letting me know.
- Michael Spencer
>
>
> On Thu, May 29, 2014 at 5:42 AM, Michael J. Spencer <bigcheesegs at gmail.com>
> wrote:
>>
>> Author: mspencer
>> Date: Wed May 28 20:42:45 2014
>> New Revision: 209788
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=209788&view=rev
>> Log:
>> [x86] Fold extract_vector_elt of a load into the Load's address
>> computation.
>>
>> An address only use of an extract element of a load can be simplified to a
>> load. Without this the result of the extract element is spilled to the
>> stack so that an address is available.
>>
>> Modified:
>> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> llvm/trunk/test/CodeGen/X86/vec_splat.ll
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=209788&r1=209787&r2=209788&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed May 28
>> 20:42:45 2014
>> @@ -169,6 +169,16 @@ namespace {
>> bool CombineToPostIndexedLoadStore(SDNode *N);
>> bool SliceUpLoad(SDNode *N);
>>
>> + /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a
>> narrowed
>> + /// load.
>> + ///
>> + /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
>> + /// \param InVecVT type of the input vector to EVE with bitcasts
>> resolved.
>> + /// \param EltNo index of the vector element to load.
>> + /// \param OriginalLoad load that EVE came from to be replaced.
>> + /// \returns EVE on success SDValue() on failure.
>> + SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
>> + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode
>> *OriginalLoad);
>> void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
>> SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
>> SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
>> @@ -9675,6 +9685,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_
>> return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
>> }
>>
>> +SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
>> + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
>> + EVT ResultVT = EVE->getValueType(0);
>> + EVT VecEltVT = InVecVT.getVectorElementType();
>> + unsigned Align = OriginalLoad->getAlignment();
>> + unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
>> + VecEltVT.getTypeForEVT(*DAG.getContext()));
>> +
>> + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD,
>> VecEltVT))
>> + return SDValue();
>> +
>> + Align = NewAlign;
>> +
>> + SDValue NewPtr = OriginalLoad->getBasePtr();
>> + SDValue Offset;
>> + EVT PtrType = NewPtr.getValueType();
>> + MachinePointerInfo MPI;
>> + if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
>> + int Elt = ConstEltNo->getZExtValue();
>> + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
>> + if (TLI.isBigEndian())
>> + PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
>> + Offset = DAG.getConstant(PtrOff, PtrType);
>> + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
>> + } else {
>> + Offset = DAG.getNode(
>> + ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
>> + DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
>> + if (TLI.isBigEndian())
>> + Offset = DAG.getNode(
>> + ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
>> + DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()),
>> Offset);
>> + MPI = OriginalLoad->getPointerInfo();
>> + }
>> + NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
>> +
>> + // The replacement we need to do here is a little tricky: we need to
>> + // replace an extractelement of a load with a load.
>> + // Use ReplaceAllUsesOfValuesWith to do the replacement.
>> + // Note that this replacement assumes that the extractvalue is the only
>> + // use of the load; that's okay because we don't want to perform this
>> + // transformation in other cases anyway.
>> + SDValue Load;
>> + SDValue Chain;
>> + if (ResultVT.bitsGT(VecEltVT)) {
>> + // If the result type of vextract is wider than the load, then issue
>> an
>> + // extending load instead.
>> + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
>> VecEltVT)
>> + ? ISD::ZEXTLOAD
>> + : ISD::EXTLOAD;
>> + Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
>> OriginalLoad->getChain(),
>> + NewPtr, MPI, VecEltVT,
>> OriginalLoad->isVolatile(),
>> + OriginalLoad->isNonTemporal(), Align,
>> + OriginalLoad->getTBAAInfo());
>> + Chain = Load.getValue(1);
>> + } else {
>> + Load = DAG.getLoad(
>> + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
>> + OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
>> + OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
>> + Chain = Load.getValue(1);
>> + if (ResultVT.bitsLT(VecEltVT))
>> + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
>> + else
>> + Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
>> + }
>> + WorkListRemover DeadNodes(*this);
>> + SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
>> + SDValue To[] = { Load, Chain };
>> + DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
>> + // Since we're explicitly calling ReplaceAllUses, add the new node to
>> the
>> + // worklist explicitly as well.
>> + AddToWorkList(Load.getNode());
>> + AddUsersToWorkList(Load.getNode()); // Add users too
>> + // Make sure to revisit this node to clean it up; it will usually be
>> dead.
>> + AddToWorkList(EVE);
>> + ++OpsNarrowed;
>> + return SDValue(EVE, 0);
>> +}
>> +
>> SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
>> // (vextract (scalar_to_vector val, 0) -> val
>> SDValue InVec = N->getOperand(0);
>> @@ -9743,6 +9833,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
>> }
>> }
>>
>> + bool BCNumEltsChanged = false;
>> + EVT ExtVT = VT.getVectorElementType();
>> + EVT LVT = ExtVT;
>> +
>> + // If the result of load has to be truncated, then it's not necessarily
>> + // profitable.
>> + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
>> + return SDValue();
>> +
>> + if (InVec.getOpcode() == ISD::BITCAST) {
>> + // Don't duplicate a load with other uses.
>> + if (!InVec.hasOneUse())
>> + return SDValue();
>> +
>> + EVT BCVT = InVec.getOperand(0).getValueType();
>> + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
>> + return SDValue();
>> + if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
>> + BCNumEltsChanged = true;
>> + InVec = InVec.getOperand(0);
>> + ExtVT = BCVT.getVectorElementType();
>> + }
>> +
>> + // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
>> + if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
>> + ISD::isNormalLoad(InVec.getNode())) {
>> + SDValue Index = N->getOperand(1);
>> + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
>> + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
>> + OrigLoad);
>> + }
>> +
>> // Perform only after legalization to ensure build_vector /
>> vector_shuffle
>> // optimizations have already been done.
>> if (!LegalOperations) return SDValue();
>> @@ -9753,30 +9875,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
>>
>> if (ConstEltNo) {
>> int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
>> - bool NewLoad = false;
>> - bool BCNumEltsChanged = false;
>> - EVT ExtVT = VT.getVectorElementType();
>> - EVT LVT = ExtVT;
>> -
>> - // If the result of load has to be truncated, then it's not
>> necessarily
>> - // profitable.
>> - if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
>> - return SDValue();
>> -
>> - if (InVec.getOpcode() == ISD::BITCAST) {
>> - // Don't duplicate a load with other uses.
>> - if (!InVec.hasOneUse())
>> - return SDValue();
>> -
>> - EVT BCVT = InVec.getOperand(0).getValueType();
>> - if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
>> - return SDValue();
>> - if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
>> - BCNumEltsChanged = true;
>> - InVec = InVec.getOperand(0);
>> - ExtVT = BCVT.getVectorElementType();
>> - NewLoad = true;
>> - }
>>
>> LoadSDNode *LN0 = nullptr;
>> const ShuffleVectorSDNode *SVN = nullptr;
>> @@ -9819,6 +9917,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
>> if (ISD::isNormalLoad(InVec.getNode())) {
>> LN0 = cast<LoadSDNode>(InVec);
>> Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
>> + EltNo = DAG.getConstant(Elt, EltNo.getValueType());
>> }
>> }
>>
>> @@ -9831,72 +9930,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
>> if (Elt == -1)
>> return DAG.getUNDEF(LVT);
>>
>> - unsigned Align = LN0->getAlignment();
>> - if (NewLoad) {
>> - // Check the resultant load doesn't need a higher alignment than
>> the
>> - // original load.
>> - unsigned NewAlign =
>> - TLI.getDataLayout()
>> - ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
>> -
>> - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD,
>> LVT))
>> - return SDValue();
>> -
>> - Align = NewAlign;
>> - }
>> -
>> - SDValue NewPtr = LN0->getBasePtr();
>> - unsigned PtrOff = 0;
>> -
>> - if (Elt) {
>> - PtrOff = LVT.getSizeInBits() * Elt / 8;
>> - EVT PtrType = NewPtr.getValueType();
>> - if (TLI.isBigEndian())
>> - PtrOff = VT.getSizeInBits() / 8 - PtrOff;
>> - NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
>> - DAG.getConstant(PtrOff, PtrType));
>> - }
>> -
>> - // The replacement we need to do here is a little tricky: we need to
>> - // replace an extractelement of a load with a load.
>> - // Use ReplaceAllUsesOfValuesWith to do the replacement.
>> - // Note that this replacement assumes that the extractvalue is the
>> only
>> - // use of the load; that's okay because we don't want to perform this
>> - // transformation in other cases anyway.
>> - SDValue Load;
>> - SDValue Chain;
>> - if (NVT.bitsGT(LVT)) {
>> - // If the result type of vextract is wider than the load, then
>> issue an
>> - // extending load instead.
>> - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
>> - ? ISD::ZEXTLOAD : ISD::EXTLOAD;
>> - Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
>> - NewPtr,
>> LN0->getPointerInfo().getWithOffset(PtrOff),
>> - LVT, LN0->isVolatile(), LN0->isNonTemporal(),
>> - Align, LN0->getTBAAInfo());
>> - Chain = Load.getValue(1);
>> - } else {
>> - Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
>> - LN0->getPointerInfo().getWithOffset(PtrOff),
>> - LN0->isVolatile(), LN0->isNonTemporal(),
>> - LN0->isInvariant(), Align, LN0->getTBAAInfo());
>> - Chain = Load.getValue(1);
>> - if (NVT.bitsLT(LVT))
>> - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
>> - else
>> - Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
>> - }
>> - WorkListRemover DeadNodes(*this);
>> - SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
>> - SDValue To[] = { Load, Chain };
>> - DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
>> - // Since we're explcitly calling ReplaceAllUses, add the new node to
>> the
>> - // worklist explicitly as well.
>> - AddToWorkList(Load.getNode());
>> - AddUsersToWorkList(Load.getNode()); // Add users too
>> - // Make sure to revisit this node to clean it up; it will usually be
>> dead.
>> - AddToWorkList(N);
>> - return SDValue(N, 0);
>> + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo,
>> LN0);
>> }
>>
>> return SDValue();
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vec_splat.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_splat.ll?rev=209788&r1=209787&r2=209788&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vec_splat.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vec_splat.ll Wed May 28 20:42:45 2014
>> @@ -1,5 +1,6 @@
>> ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s
>> -check-prefix=SSE2
>> ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s
>> -check-prefix=SSE3
>> +; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
>> -check-prefix=AVX
>>
>> define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X)
>> nounwind {
>> %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0
>> ; <<4 x float>> [#uses=1]
>> @@ -37,6 +38,23 @@ define void @test_v2sd(<2 x double>* %P,
>> define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly
>> %ptr, i64 %i, i64 %j) nounwind {
>> %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
>> %2 = load <4 x float>* %1, align 16
>> + %3 = trunc i64 %j to i32
>> + %4 = extractelement <4 x float> %2, i32 %3
>> + %5 = insertelement <4 x float> undef, float %4, i32 0
>> + %6 = insertelement <4 x float> %5, float %4, i32 1
>> + %7 = insertelement <4 x float> %6, float %4, i32 2
>> + %8 = insertelement <4 x float> %7, float %4, i32 3
>> + ret <4 x float> %8
>> +
>> +; AVX-LABEL: load_extract_splat
>> +; AVX-NOT: rsp
>> +; AVX: vbroadcastss
>> +}
>> +
>> +; Fold extract of a load into the load's address computation. This avoids
>> spilling to the stack.
>> +define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly
>> %ptr, i64 %i, i64 %j) nounwind {
>> + %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
>> + %2 = load <4 x float>* %1, align 16
>> %3 = extractelement <4 x float> %2, i64 %j
>> %4 = insertelement <4 x float> undef, float %3, i32 0
>> %5 = insertelement <4 x float> %4, float %3, i32 1
>> @@ -44,7 +62,7 @@ define <4 x float> @load_extract_splat(<
>> %7 = insertelement <4 x float> %6, float %3, i32 3
>> ret <4 x float> %7
>>
>> -; AVX-LABEL: load_extract_splat
>> +; AVX-LABEL: load_extract_splat1
>> ; AVX-NOT: movs
>> ; AVX: vbroadcastss
>> }
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>
More information about the llvm-commits
mailing list