<div dir="ltr">Michael,<div><br></div><div>Your commit caused a regression described in bug <a href="http://llvm.org/bugs/show_bug.cgi?id=20115">20115</a>, could you have a look?</div><div><br></div><div>Dmitry</div></div>
<div class="gmail_extra"><br><br><div class="gmail_quote">On Thu, May 29, 2014 at 5:42 AM, Michael J. Spencer <span dir="ltr"><<a href="mailto:bigcheesegs@gmail.com" target="_blank">bigcheesegs@gmail.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: mspencer<br>
Date: Wed May 28 20:42:45 2014<br>
New Revision: 209788<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=209788&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=209788&view=rev</a><br>
Log:<br>
[x86] Fold extract_vector_elt of a load into the Load's address computation.<br>
<br>
An address only use of an extract element of a load can be simplified to a<br>
load. Without this the result of the extract element is spilled to the<br>
stack so that an address is available.<br>
<br>
Modified:<br>
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp<br>
llvm/trunk/test/CodeGen/X86/vec_splat.ll<br>
<br>
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=209788&r1=209787&r2=209788&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=209788&r1=209787&r2=209788&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)<br>
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed May 28 20:42:45 2014<br>
@@ -169,6 +169,16 @@ namespace {<br>
bool CombineToPostIndexedLoadStore(SDNode *N);<br>
bool SliceUpLoad(SDNode *N);<br>
<br>
+ /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed<br>
+ /// load.<br>
+ ///<br>
+ /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.<br>
+ /// \param InVecVT type of the input vector to EVE with bitcasts resolved.<br>
+ /// \param EltNo index of the vector element to load.<br>
+ /// \param OriginalLoad load that EVE came from to be replaced.<br>
+ /// \returns EVE on success SDValue() on failure.<br>
+ SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(<br>
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);<br>
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);<br>
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);<br>
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);<br>
@@ -9675,6 +9685,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_<br>
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);<br>
}<br>
<br>
+SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(<br>
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {<br>
+ EVT ResultVT = EVE->getValueType(0);<br>
+ EVT VecEltVT = InVecVT.getVectorElementType();<br>
+ unsigned Align = OriginalLoad->getAlignment();<br>
+ unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(<br>
+ VecEltVT.getTypeForEVT(*DAG.getContext()));<br>
+<br>
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))<br>
+ return SDValue();<br>
+<br>
+ Align = NewAlign;<br>
+<br>
+ SDValue NewPtr = OriginalLoad->getBasePtr();<br>
+ SDValue Offset;<br>
+ EVT PtrType = NewPtr.getValueType();<br>
+ MachinePointerInfo MPI;<br>
+ if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {<br>
+ int Elt = ConstEltNo->getZExtValue();<br>
+ unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;<br>
+ if (TLI.isBigEndian())<br>
+ PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;<br>
+ Offset = DAG.getConstant(PtrOff, PtrType);<br>
+ MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);<br>
+ } else {<br>
+ Offset = DAG.getNode(<br>
+ ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,<br>
+ DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));<br>
+ if (TLI.isBigEndian())<br>
+ Offset = DAG.getNode(<br>
+ ISD::SUB, SDLoc(EVE), EltNo.getValueType(),<br>
+ DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);<br>
+ MPI = OriginalLoad->getPointerInfo();<br>
+ }<br>
+ NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);<br>
+<br>
+ // The replacement we need to do here is a little tricky: we need to<br>
+ // replace an extractelement of a load with a load.<br>
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.<br>
+ // Note that this replacement assumes that the extractvalue is the only<br>
+ // use of the load; that's okay because we don't want to perform this<br>
+ // transformation in other cases anyway.<br>
+ SDValue Load;<br>
+ SDValue Chain;<br>
+ if (ResultVT.bitsGT(VecEltVT)) {<br>
+ // If the result type of vextract is wider than the load, then issue an<br>
+ // extending load instead.<br>
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)<br>
+ ? ISD::ZEXTLOAD<br>
+ : ISD::EXTLOAD;<br>
+ Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),<br>
+ NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),<br>
+ OriginalLoad->isNonTemporal(), Align,<br>
+ OriginalLoad->getTBAAInfo());<br>
+ Chain = Load.getValue(1);<br>
+ } else {<br>
+ Load = DAG.getLoad(<br>
+ VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,<br>
+ OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),<br>
+ OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());<br>
+ Chain = Load.getValue(1);<br>
+ if (ResultVT.bitsLT(VecEltVT))<br>
+ Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);<br>
+ else<br>
+ Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);<br>
+ }<br>
+ WorkListRemover DeadNodes(*this);<br>
+ SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };<br>
+ SDValue To[] = { Load, Chain };<br>
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);<br>
+ // Since we're explicitly calling ReplaceAllUses, add the new node to the<br>
+ // worklist explicitly as well.<br>
+ AddToWorkList(Load.getNode());<br>
+ AddUsersToWorkList(Load.getNode()); // Add users too<br>
+ // Make sure to revisit this node to clean it up; it will usually be dead.<br>
+ AddToWorkList(EVE);<br>
+ ++OpsNarrowed;<br>
+ return SDValue(EVE, 0);<br>
+}<br>
+<br>
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {<br>
// (vextract (scalar_to_vector val, 0) -> val<br>
SDValue InVec = N->getOperand(0);<br>
@@ -9743,6 +9833,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR<br>
}<br>
}<br>
<br>
+ bool BCNumEltsChanged = false;<br>
+ EVT ExtVT = VT.getVectorElementType();<br>
+ EVT LVT = ExtVT;<br>
+<br>
+ // If the result of load has to be truncated, then it's not necessarily<br>
+ // profitable.<br>
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))<br>
+ return SDValue();<br>
+<br>
+ if (InVec.getOpcode() == ISD::BITCAST) {<br>
+ // Don't duplicate a load with other uses.<br>
+ if (!InVec.hasOneUse())<br>
+ return SDValue();<br>
+<br>
+ EVT BCVT = InVec.getOperand(0).getValueType();<br>
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))<br>
+ return SDValue();<br>
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())<br>
+ BCNumEltsChanged = true;<br>
+ InVec = InVec.getOperand(0);<br>
+ ExtVT = BCVT.getVectorElementType();<br>
+ }<br>
+<br>
+ // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)<br>
+ if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&<br>
+ ISD::isNormalLoad(InVec.getNode())) {<br>
+ SDValue Index = N->getOperand(1);<br>
+ if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))<br>
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,<br>
+ OrigLoad);<br>
+ }<br>
+<br>
// Perform only after legalization to ensure build_vector / vector_shuffle<br>
// optimizations have already been done.<br>
if (!LegalOperations) return SDValue();<br>
@@ -9753,30 +9875,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR<br>
<br>
if (ConstEltNo) {<br>
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();<br>
- bool NewLoad = false;<br>
- bool BCNumEltsChanged = false;<br>
- EVT ExtVT = VT.getVectorElementType();<br>
- EVT LVT = ExtVT;<br>
-<br>
- // If the result of load has to be truncated, then it's not necessarily<br>
- // profitable.<br>
- if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))<br>
- return SDValue();<br>
-<br>
- if (InVec.getOpcode() == ISD::BITCAST) {<br>
- // Don't duplicate a load with other uses.<br>
- if (!InVec.hasOneUse())<br>
- return SDValue();<br>
-<br>
- EVT BCVT = InVec.getOperand(0).getValueType();<br>
- if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))<br>
- return SDValue();<br>
- if (VT.getVectorNumElements() != BCVT.getVectorNumElements())<br>
- BCNumEltsChanged = true;<br>
- InVec = InVec.getOperand(0);<br>
- ExtVT = BCVT.getVectorElementType();<br>
- NewLoad = true;<br>
- }<br>
<br>
LoadSDNode *LN0 = nullptr;<br>
const ShuffleVectorSDNode *SVN = nullptr;<br>
@@ -9819,6 +9917,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR<br>
if (ISD::isNormalLoad(InVec.getNode())) {<br>
LN0 = cast<LoadSDNode>(InVec);<br>
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;<br>
+ EltNo = DAG.getConstant(Elt, EltNo.getValueType());<br>
}<br>
}<br>
<br>
@@ -9831,72 +9930,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR<br>
if (Elt == -1)<br>
return DAG.getUNDEF(LVT);<br>
<br>
- unsigned Align = LN0->getAlignment();<br>
- if (NewLoad) {<br>
- // Check the resultant load doesn't need a higher alignment than the<br>
- // original load.<br>
- unsigned NewAlign =<br>
- TLI.getDataLayout()<br>
- ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));<br>
-<br>
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))<br>
- return SDValue();<br>
-<br>
- Align = NewAlign;<br>
- }<br>
-<br>
- SDValue NewPtr = LN0->getBasePtr();<br>
- unsigned PtrOff = 0;<br>
-<br>
- if (Elt) {<br>
- PtrOff = LVT.getSizeInBits() * Elt / 8;<br>
- EVT PtrType = NewPtr.getValueType();<br>
- if (TLI.isBigEndian())<br>
- PtrOff = VT.getSizeInBits() / 8 - PtrOff;<br>
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,<br>
- DAG.getConstant(PtrOff, PtrType));<br>
- }<br>
-<br>
- // The replacement we need to do here is a little tricky: we need to<br>
- // replace an extractelement of a load with a load.<br>
- // Use ReplaceAllUsesOfValuesWith to do the replacement.<br>
- // Note that this replacement assumes that the extractvalue is the only<br>
- // use of the load; that's okay because we don't want to perform this<br>
- // transformation in other cases anyway.<br>
- SDValue Load;<br>
- SDValue Chain;<br>
- if (NVT.bitsGT(LVT)) {<br>
- // If the result type of vextract is wider than the load, then issue an<br>
- // extending load instead.<br>
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)<br>
- ? ISD::ZEXTLOAD : ISD::EXTLOAD;<br>
- Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),<br>
- NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),<br>
- LVT, LN0->isVolatile(), LN0->isNonTemporal(),<br>
- Align, LN0->getTBAAInfo());<br>
- Chain = Load.getValue(1);<br>
- } else {<br>
- Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,<br>
- LN0->getPointerInfo().getWithOffset(PtrOff),<br>
- LN0->isVolatile(), LN0->isNonTemporal(),<br>
- LN0->isInvariant(), Align, LN0->getTBAAInfo());<br>
- Chain = Load.getValue(1);<br>
- if (NVT.bitsLT(LVT))<br>
- Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);<br>
- else<br>
- Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);<br>
- }<br>
- WorkListRemover DeadNodes(*this);<br>
- SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };<br>
- SDValue To[] = { Load, Chain };<br>
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2);<br>
- // Since we're explcitly calling ReplaceAllUses, add the new node to the<br>
- // worklist explicitly as well.<br>
- AddToWorkList(Load.getNode());<br>
- AddUsersToWorkList(Load.getNode()); // Add users too<br>
- // Make sure to revisit this node to clean it up; it will usually be dead.<br>
- AddToWorkList(N);<br>
- return SDValue(N, 0);<br>
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);<br>
}<br>
<br>
return SDValue();<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/vec_splat.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_splat.ll?rev=209788&r1=209787&r2=209788&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_splat.ll?rev=209788&r1=209787&r2=209788&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/X86/vec_splat.ll (original)<br>
+++ llvm/trunk/test/CodeGen/X86/vec_splat.ll Wed May 28 20:42:45 2014<br>
@@ -1,5 +1,6 @@<br>
; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2<br>
; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3<br>
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX<br>
<br>
define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {<br>
%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1]<br>
@@ -37,6 +38,23 @@ define void @test_v2sd(<2 x double>* %P,<br>
define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {<br>
%1 = getelementptr inbounds <4 x float>* %ptr, i64 %i<br>
%2 = load <4 x float>* %1, align 16<br>
+ %3 = trunc i64 %j to i32<br>
+ %4 = extractelement <4 x float> %2, i32 %3<br>
+ %5 = insertelement <4 x float> undef, float %4, i32 0<br>
+ %6 = insertelement <4 x float> %5, float %4, i32 1<br>
+ %7 = insertelement <4 x float> %6, float %4, i32 2<br>
+ %8 = insertelement <4 x float> %7, float %4, i32 3<br>
+ ret <4 x float> %8<br>
+<br>
+; AVX-LABEL: load_extract_splat<br>
+; AVX-NOT: rsp<br>
+; AVX: vbroadcastss<br>
+}<br>
+<br>
+; Fold extract of a load into the load's address computation. This avoids spilling to the stack.<br>
+define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {<br>
+ %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i<br>
+ %2 = load <4 x float>* %1, align 16<br>
%3 = extractelement <4 x float> %2, i64 %j<br>
%4 = insertelement <4 x float> undef, float %3, i32 0<br>
%5 = insertelement <4 x float> %4, float %3, i32 1<br>
@@ -44,7 +62,7 @@ define <4 x float> @load_extract_splat(<<br>
%7 = insertelement <4 x float> %6, float %3, i32 3<br>
ret <4 x float> %7<br>
<br>
-; AVX-LABEL: load_extract_splat<br>
+; AVX-LABEL: load_extract_splat1<br>
; AVX-NOT: movs<br>
; AVX: vbroadcastss<br>
}<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>