[llvm] r213339 - Revert "[x86] Fold extract_vector_elt of a load into the Load's address computation."

Michael J. Spencer bigcheesegs at gmail.com
Thu Jul 17 17:15:50 PDT 2014


Author: mspencer
Date: Thu Jul 17 19:15:50 2014
New Revision: 213339

URL: http://llvm.org/viewvc/llvm-project?rev=213339&view=rev
Log:
Revert "[x86] Fold extract_vector_elt of a load into the Load's address computation."

There's a bug where this can create cycles in the DAG. It will take a bit
to fix, so I'm backing it out for now.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/vec_splat.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=213339&r1=213338&r2=213339&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jul 17 19:15:50 2014
@@ -169,16 +169,6 @@ namespace {
     bool CombineToPostIndexedLoadStore(SDNode *N);
     bool SliceUpLoad(SDNode *N);
 
-    /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
-    ///   load.
-    ///
-    /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
-    /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
-    /// \param EltNo index of the vector element to load.
-    /// \param OriginalLoad load that EVE came from to be replaced.
-    /// \returns EVE on success SDValue() on failure.
-    SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
-        SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -9792,86 +9782,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
 
-SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
-    SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
-  EVT ResultVT = EVE->getValueType(0);
-  EVT VecEltVT = InVecVT.getVectorElementType();
-  unsigned Align = OriginalLoad->getAlignment();
-  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
-      VecEltVT.getTypeForEVT(*DAG.getContext()));
-
-  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
-    return SDValue();
-
-  Align = NewAlign;
-
-  SDValue NewPtr = OriginalLoad->getBasePtr();
-  SDValue Offset;
-  EVT PtrType = NewPtr.getValueType();
-  MachinePointerInfo MPI;
-  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
-    int Elt = ConstEltNo->getZExtValue();
-    unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
-    if (TLI.isBigEndian())
-      PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
-    Offset = DAG.getConstant(PtrOff, PtrType);
-    MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
-  } else {
-    Offset = DAG.getNode(
-        ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
-        DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
-    if (TLI.isBigEndian())
-      Offset = DAG.getNode(
-          ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
-          DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
-    MPI = OriginalLoad->getPointerInfo();
-  }
-  NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
-
-  // The replacement we need to do here is a little tricky: we need to
-  // replace an extractelement of a load with a load.
-  // Use ReplaceAllUsesOfValuesWith to do the replacement.
-  // Note that this replacement assumes that the extractvalue is the only
-  // use of the load; that's okay because we don't want to perform this
-  // transformation in other cases anyway.
-  SDValue Load;
-  SDValue Chain;
-  if (ResultVT.bitsGT(VecEltVT)) {
-    // If the result type of vextract is wider than the load, then issue an
-    // extending load instead.
-    ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
-                                   ? ISD::ZEXTLOAD
-                                   : ISD::EXTLOAD;
-    Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
-                          NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
-                          OriginalLoad->isNonTemporal(), Align,
-                          OriginalLoad->getTBAAInfo());
-    Chain = Load.getValue(1);
-  } else {
-    Load = DAG.getLoad(
-        VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
-        OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
-        OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
-    Chain = Load.getValue(1);
-    if (ResultVT.bitsLT(VecEltVT))
-      Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
-    else
-      Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
-  }
-  WorkListRemover DeadNodes(*this);
-  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
-  SDValue To[] = { Load, Chain };
-  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
-  // Since we're explicitly calling ReplaceAllUses, add the new node to the
-  // worklist explicitly as well.
-  AddToWorkList(Load.getNode());
-  AddUsersToWorkList(Load.getNode()); // Add users too
-  // Make sure to revisit this node to clean it up; it will usually be dead.
-  AddToWorkList(EVE);
-  ++OpsNarrowed;
-  return SDValue(EVE, 0);
-}
-
 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // (vextract (scalar_to_vector val, 0) -> val
   SDValue InVec = N->getOperand(0);
@@ -9940,38 +9850,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
     }
   }
 
-  bool BCNumEltsChanged = false;
-  EVT ExtVT = VT.getVectorElementType();
-  EVT LVT = ExtVT;
-
-  // If the result of load has to be truncated, then it's not necessarily
-  // profitable.
-  if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
-    return SDValue();
-
-  if (InVec.getOpcode() == ISD::BITCAST) {
-    // Don't duplicate a load with other uses.
-    if (!InVec.hasOneUse())
-      return SDValue();
-
-    EVT BCVT = InVec.getOperand(0).getValueType();
-    if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
-      return SDValue();
-    if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
-      BCNumEltsChanged = true;
-    InVec = InVec.getOperand(0);
-    ExtVT = BCVT.getVectorElementType();
-  }
-
-  // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
-  if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
-      ISD::isNormalLoad(InVec.getNode())) {
-    SDValue Index = N->getOperand(1);
-    if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
-      return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
-                                                           OrigLoad);
-  }
-
   // Perform only after legalization to ensure build_vector / vector_shuffle
   // optimizations have already been done.
   if (!LegalOperations) return SDValue();
@@ -9982,6 +9860,30 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
 
   if (ConstEltNo) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    bool NewLoad = false;
+    bool BCNumEltsChanged = false;
+    EVT ExtVT = VT.getVectorElementType();
+    EVT LVT = ExtVT;
+
+    // If the result of load has to be truncated, then it's not necessarily
+    // profitable.
+    if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+      return SDValue();
+
+    if (InVec.getOpcode() == ISD::BITCAST) {
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
+      EVT BCVT = InVec.getOperand(0).getValueType();
+      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+        return SDValue();
+      if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+        BCNumEltsChanged = true;
+      InVec = InVec.getOperand(0);
+      ExtVT = BCVT.getVectorElementType();
+      NewLoad = true;
+    }
 
     LoadSDNode *LN0 = nullptr;
     const ShuffleVectorSDNode *SVN = nullptr;
@@ -10024,7 +9926,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
-        EltNo = DAG.getConstant(Elt, EltNo.getValueType());
       }
     }
 
@@ -10037,7 +9938,72 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR
     if (Elt == -1)
       return DAG.getUNDEF(LVT);
 
-    return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
+    unsigned Align = LN0->getAlignment();
+    if (NewLoad) {
+      // Check the resultant load doesn't need a higher alignment than the
+      // original load.
+      unsigned NewAlign =
+        TLI.getDataLayout()
+            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+
+      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+        return SDValue();
+
+      Align = NewAlign;
+    }
+
+    SDValue NewPtr = LN0->getBasePtr();
+    unsigned PtrOff = 0;
+
+    if (Elt) {
+      PtrOff = LVT.getSizeInBits() * Elt / 8;
+      EVT PtrType = NewPtr.getValueType();
+      if (TLI.isBigEndian())
+        PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+      NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
+                           DAG.getConstant(PtrOff, PtrType));
+    }
+
+    // The replacement we need to do here is a little tricky: we need to
+    // replace an extractelement of a load with a load.
+    // Use ReplaceAllUsesOfValuesWith to do the replacement.
+    // Note that this replacement assumes that the extractvalue is the only
+    // use of the load; that's okay because we don't want to perform this
+    // transformation in other cases anyway.
+    SDValue Load;
+    SDValue Chain;
+    if (NVT.bitsGT(LVT)) {
+      // If the result type of vextract is wider than the load, then issue an
+      // extending load instead.
+      ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+        ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+      Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
+                            NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+                            LVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                            Align, LN0->getTBAAInfo());
+      Chain = Load.getValue(1);
+    } else {
+      Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
+                         LN0->getPointerInfo().getWithOffset(PtrOff),
+                         LN0->isVolatile(), LN0->isNonTemporal(),
+                         LN0->isInvariant(), Align, LN0->getTBAAInfo());
+      Chain = Load.getValue(1);
+      if (NVT.bitsLT(LVT))
+        Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
+      else
+        Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
+    }
+    WorkListRemover DeadNodes(*this);
+    SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
+    SDValue To[] = { Load, Chain };
+    DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+    // Since we're explcitly calling ReplaceAllUses, add the new node to the
+    // worklist explicitly as well.
+    AddToWorkList(Load.getNode());
+    AddUsersToWorkList(Load.getNode()); // Add users too
+    // Make sure to revisit this node to clean it up; it will usually be dead.
+    AddToWorkList(N);
+    return SDValue(N, 0);
   }
 
   return SDValue();

Modified: llvm/trunk/test/CodeGen/X86/vec_splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_splat.ll?rev=213339&r1=213338&r2=213339&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_splat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_splat.ll Thu Jul 17 19:15:50 2014
@@ -1,6 +1,5 @@
 ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2
 ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
 
 define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
@@ -38,23 +37,6 @@ define void @test_v2sd(<2 x double>* %P,
 define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {
   %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
   %2 = load <4 x float>* %1, align 16
-  %3 = trunc i64 %j to i32
-  %4 = extractelement <4 x float> %2, i32 %3
-  %5 = insertelement <4 x float> undef, float %4, i32 0
-  %6 = insertelement <4 x float> %5, float %4, i32 1
-  %7 = insertelement <4 x float> %6, float %4, i32 2
-  %8 = insertelement <4 x float> %7, float %4, i32 3
-  ret <4 x float> %8
-  
-; AVX-LABEL: load_extract_splat
-; AVX-NOT: rsp
-; AVX: vbroadcastss
-}
-
-; Fold extract of a load into the load's address computation. This avoids spilling to the stack.
-define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {
-  %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
-  %2 = load <4 x float>* %1, align 16
   %3 = extractelement <4 x float> %2, i64 %j
   %4 = insertelement <4 x float> undef, float %3, i32 0
   %5 = insertelement <4 x float> %4, float %3, i32 1
@@ -62,7 +44,7 @@ define <4 x float> @load_extract_splat1(
   %7 = insertelement <4 x float> %6, float %3, i32 3
   ret <4 x float> %7
   
-; AVX-LABEL: load_extract_splat1
+; AVX-LABEL: load_extract_splat
 ; AVX-NOT: movs
 ; AVX: vbroadcastss
 }





More information about the llvm-commits mailing list