[PATCH] [SDAG] When performing post-legalize DAG combining, run the legalizer over each node in the worklist prior to combining.

Tom Stellard tom at stellard.net
Thu Jul 17 12:11:24 PDT 2014


On Thu, Jul 17, 2014 at 04:55:00PM +0000, Chandler Carruth wrote:
> Hi hfinkel, grosbach,
> 
> This allows the combiner to produce new nodes which need to go back
> through legalization. This is particularly useful when generating
> operands to target specific nodes in a post-legalize DAG combine where
> the operands are significantly easier to express as pre-legalized
> operations. My immediate use case will be PSHUFB formation where we need
> to build a constant shuffle mask with a build_vector node.
> 

Since the post-legalize DAG combiner is now doing combining and
legalizing, do we still need to run legalize as a separate pass.
Can we instead just do:

combine -> legalize types -> combine w/ legalize ops

-Tom

> This also refactors the relevant functionality in the legalizer to
> support this, and updates relevant tests. I've spoken to the R600 folks
> and these changes look like improvements. The avx512 change needs to be
> investigated, I suspect there is a disagreement between the legalizer
> and the DAG combiner there, but it seems a minor issue so leaving it to
> be re-evaluated after this patch.
> 
> http://reviews.llvm.org/D4564
> 
> Files:
>   include/llvm/CodeGen/SelectionDAG.h
>   lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>   lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
>   test/CodeGen/R600/fp_to_sint.ll
>   test/CodeGen/R600/fp_to_uint.ll
>   test/CodeGen/R600/setcc-equivalent.ll
>   test/CodeGen/X86/avx512-cmp.ll

> Index: include/llvm/CodeGen/SelectionDAG.h
> ===================================================================
> --- include/llvm/CodeGen/SelectionDAG.h
> +++ include/llvm/CodeGen/SelectionDAG.h
> @@ -16,6 +16,7 @@
>  #define LLVM_CODEGEN_SELECTIONDAG_H
>  
>  #include "llvm/ADT/DenseSet.h"
> +#include "llvm/ADT/SetVector.h"
>  #include "llvm/ADT/StringMap.h"
>  #include "llvm/ADT/ilist.h"
>  #include "llvm/CodeGen/DAGCombine.h"
> @@ -364,6 +365,15 @@
>    /// the graph.
>    void Legalize();
>  
> +  /// \brief Transforms a SelectionDAG node and any operands to it into a node
> +  /// that is compatible with the target instruction selector, as indicated by
> +  /// the TargetLowering object.
> +  ///
> +  /// This essentially runs a single recursive walk of the \c Legalize process
> +  /// over the given node (and its operands). This can be used to incrementally
> +  /// legalize the DAG.
> +  bool LegalizeOp(SDNode *N, SmallSetVector<SDNode *, 16> &UpdatedNodes);
> +
>    /// LegalizeVectors - This transforms the SelectionDAG into a SelectionDAG
>    /// that only uses vector math operations supported by the target.  This is
>    /// necessary as a separate step from Legalize because unrolling a vector
> Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> @@ -1098,10 +1098,6 @@
>    // changes of the root.
>    HandleSDNode Dummy(DAG.getRoot());
>  
> -  // The root of the dag may dangle to deleted nodes until the dag combiner is
> -  // done.  Set it to null to avoid confusion.
> -  DAG.setRoot(SDValue());
> -
>    // while the worklist isn't empty, find a node and
>    // try and combine it.
>    while (!WorkListContents.empty()) {
> @@ -1126,6 +1122,22 @@
>        continue;
>      }
>  
> +    // If this combine is running after legalizing the DAG, re-legalize any
> +    // nodes pulled off the worklist.
> +    if (Level == AfterLegalizeDAG) {
> +      WorkListRemover DeadNodes(*this);
> +      SmallSetVector<SDNode *, 16> UpdatedNodes;
> +      bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
> +
> +      for (SDNode *LN : UpdatedNodes) {
> +        assert(LN->getOpcode() != ISD::DELETED_NODE && "boom");
> +        AddToWorkList(LN);
> +        AddUsersToWorkList(LN);
> +      }
> +      if (!NIsValid)
> +        continue;
> +    }
> +
>      SDValue RV = combine(N);
>  
>      if (!RV.getNode())
> Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> @@ -12,6 +12,7 @@
>  //===----------------------------------------------------------------------===//
>  
>  #include "llvm/CodeGen/SelectionDAG.h"
> +#include "llvm/ADT/SetVector.h"
>  #include "llvm/ADT/SmallPtrSet.h"
>  #include "llvm/ADT/SmallSet.h"
>  #include "llvm/ADT/SmallVector.h"
> @@ -53,27 +54,37 @@
>    const TargetLowering &TLI;
>    SelectionDAG &DAG;
>  
> -  /// LegalizePosition - The iterator for walking through the node list.
> -  SelectionDAG::allnodes_iterator LegalizePosition;
> +  /// \brief The iterator being used to walk the DAG. We hold a reference to it
> +  /// in order to update it as necessary on node deletion.
> +  SelectionDAG::allnodes_iterator &LegalizePosition;
>  
> -  /// LegalizedNodes - The set of nodes which have already been legalized.
> -  SmallPtrSet<SDNode *, 16> LegalizedNodes;
> +  /// \brief The set of nodes which have already been legalized. We hold a
> +  /// reference to it in order to update as necessary on node deletion.
> +  SmallPtrSetImpl<SDNode *> &LegalizedNodes;
> +
> +  /// \brief A set of all the nodes updated during legalization.
> +  SmallSetVector<SDNode *, 16> *UpdatedNodes;
>  
>    EVT getSetCCResultType(EVT VT) const {
>      return TLI.getSetCCResultType(*DAG.getContext(), VT);
>    }
>  
>    // Libcall insertion helpers.
>  
>  public:
> -  explicit SelectionDAGLegalize(SelectionDAG &DAG);
> -
> -  void LegalizeDAG();
> -
> -private:
> -  /// LegalizeOp - Legalizes the given operation.
> +  SelectionDAGLegalize(SelectionDAG &DAG,
> +                       SelectionDAG::allnodes_iterator &LegalizePosition,
> +                       SmallPtrSetImpl<SDNode *> &LegalizedNodes,
> +                       SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr)
> +      : SelectionDAG::DAGUpdateListener(DAG), TM(DAG.getTarget()),
> +        TLI(DAG.getTargetLoweringInfo()), DAG(DAG),
> +        LegalizePosition(LegalizePosition), LegalizedNodes(LegalizedNodes),
> +        UpdatedNodes(UpdatedNodes) {}
> +
> +  /// \brief Legalizes the given operation.
>    void LegalizeOp(SDNode *Node);
>  
> +private:
>    SDValue OptimizeFloatStore(StoreSDNode *ST);
>  
>    void LegalizeLoadOps(SDNode *Node);
> @@ -149,14 +160,21 @@
>      LegalizedNodes.erase(N);
>      if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
>        ++LegalizePosition;
> +    if (UpdatedNodes)
> +      UpdatedNodes->remove(N);
>    }
>  
>  public:
>    // DAGUpdateListener implementation.
>    void NodeDeleted(SDNode *N, SDNode *E) override {
>      ForgetNode(N);
> +    if (UpdatedNodes && E)
> +      UpdatedNodes->insert(E);
> +  }
> +  void NodeUpdated(SDNode *N) override {
> +    if (UpdatedNodes)
> +      UpdatedNodes->insert(N);
>    }
> -  void NodeUpdated(SDNode *N) override {}
>  
>    // Node replacement helpers
>    void ReplacedNode(SDNode *N) {
> @@ -168,15 +186,25 @@
>    }
>    void ReplaceNode(SDNode *Old, SDNode *New) {
>      DAG.ReplaceAllUsesWith(Old, New);
> +    for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
> +      DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i));
>      ReplacedNode(Old);
> +    if (UpdatedNodes)
> +      UpdatedNodes->insert(New);
>    }
>    void ReplaceNode(SDValue Old, SDValue New) {
>      DAG.ReplaceAllUsesWith(Old, New);
>      ReplacedNode(Old.getNode());
> +    if (UpdatedNodes)
> +      UpdatedNodes->insert(New.getNode());
>    }
>    void ReplaceNode(SDNode *Old, const SDValue *New) {
>      DAG.ReplaceAllUsesWith(Old, New);
> +    for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
> +      DAG.TransferDbgValues(SDValue(Old, i), New[i]);
>      ReplacedNode(Old);
> +    if (UpdatedNodes)
> +      UpdatedNodes->insert(New->getNode());
>    }
>  };
>  }
> @@ -213,40 +241,6 @@
>    return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
>  }
>  
> -SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
> -  : SelectionDAG::DAGUpdateListener(dag),
> -    TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
> -    DAG(dag) {
> -}
> -
> -void SelectionDAGLegalize::LegalizeDAG() {
> -  DAG.AssignTopologicalOrder();
> -
> -  // Visit all the nodes. We start in topological order, so that we see
> -  // nodes with their original operands intact. Legalization can produce
> -  // new nodes which may themselves need to be legalized. Iterate until all
> -  // nodes have been legalized.
> -  for (;;) {
> -    bool AnyLegalized = false;
> -    for (LegalizePosition = DAG.allnodes_end();
> -         LegalizePosition != DAG.allnodes_begin(); ) {
> -      --LegalizePosition;
> -
> -      SDNode *N = LegalizePosition;
> -      if (LegalizedNodes.insert(N)) {
> -        AnyLegalized = true;
> -        LegalizeOp(N);
> -      }
> -    }
> -    if (!AnyLegalized)
> -      break;
> -
> -  }
> -
> -  // Remove dead nodes now.
> -  DAG.RemoveDeadNodes();
> -}
> -
>  /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
>  /// a load from the constant pool.
>  SDValue
> @@ -928,6 +922,10 @@
>        assert(RVal.getNode() != Node && "Load must be completely replaced");
>        DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
>        DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
> +      if (UpdatedNodes) {
> +        UpdatedNodes->insert(RVal.getNode());
> +        UpdatedNodes->insert(RChain.getNode());
> +      }
>        ReplacedNode(Node);
>      }
>      return;
> @@ -1148,6 +1146,10 @@
>      assert(Value.getNode() != Node && "Load must be completely replaced");
>      DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
>      DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
> +    if (UpdatedNodes) {
> +      UpdatedNodes->insert(Value.getNode());
> +      UpdatedNodes->insert(Chain.getNode());
> +    }
>      ReplacedNode(Node);
>    }
>  }
> @@ -1335,10 +1337,7 @@
>      }
>  
>      if (NewNode != Node) {
> -      DAG.ReplaceAllUsesWith(Node, NewNode);
> -      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
> -        DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
> -      ReplacedNode(Node);
> +      ReplaceNode(Node, NewNode);
>        Node = NewNode;
>      }
>      switch (Action) {
> @@ -1356,12 +1355,8 @@
>            else
>              ResultVals.push_back(Res.getValue(i));
>          }
> -        if (Res.getNode() != Node || Res.getResNo() != 0) {
> -          DAG.ReplaceAllUsesWith(Node, ResultVals.data());
> -          for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
> -            DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
> -          ReplacedNode(Node);
> -        }
> +        if (Res.getNode() != Node || Res.getResNo() != 0)
> +          ReplaceNode(Node, ResultVals.data());
>          return;
>        }
>      }
> @@ -4179,6 +4174,10 @@
>      // use the new one.
>      DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
>      DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
> +    if (UpdatedNodes) {
> +      UpdatedNodes->insert(Tmp2.getNode());
> +      UpdatedNodes->insert(Chain.getNode());
> +    }
>      ReplacedNode(Node);
>      break;
>    }
> @@ -4285,7 +4284,52 @@
>  // SelectionDAG::Legalize - This is the entry point for the file.
>  //
>  void SelectionDAG::Legalize() {
> -  /// run - This is the main entry point to this class.
> -  ///
> -  SelectionDAGLegalize(*this).LegalizeDAG();
> +  AssignTopologicalOrder();
> +
> +  allnodes_iterator LegalizePosition;
> +  SmallPtrSet<SDNode *, 16> LegalizedNodes;
> +  SelectionDAGLegalize Legalizer(*this, LegalizePosition, LegalizedNodes);
> +
> +  // Visit all the nodes. We start in topological order, so that we see
> +  // nodes with their original operands intact. Legalization can produce
> +  // new nodes which may themselves need to be legalized. Iterate until all
> +  // nodes have been legalized.
> +  for (;;) {
> +    bool AnyLegalized = false;
> +    for (LegalizePosition = allnodes_end();
> +         LegalizePosition != allnodes_begin(); ) {
> +      --LegalizePosition;
> +
> +      SDNode *N = LegalizePosition;
> +      if (LegalizedNodes.insert(N)) {
> +        AnyLegalized = true;
> +        Legalizer.LegalizeOp(N);
> +      }
> +    }
> +    if (!AnyLegalized)
> +      break;
> +
> +  }
> +
> +  // Remove dead nodes now.
> +  RemoveDeadNodes();
> +}
> +
> +// This is a somewhat hacky entry point to legalize a single sub-graph rooted
> +// at a node. This is used by the post-legalize DAG combiner to allow its DAG
> +// combines to produce illegal nodes and still have a chance for them to be
> +// legalized.
> +bool SelectionDAG::LegalizeOp(SDNode *N,
> +                              SmallSetVector<SDNode *, 16> &UpdatedNodes) {
> +  allnodes_iterator LegalizePosition(N);
> +  SmallPtrSet<SDNode *, 16> LegalizedNodes;
> +  SelectionDAGLegalize Legalizer(*this, LegalizePosition, LegalizedNodes,
> +                                 &UpdatedNodes);
> +
> +  // Directly insert the node in question, and legalize it. This will recurse
> +  // as needed through operands.
> +  LegalizedNodes.insert(N);
> +  Legalizer.LegalizeOp(N);
> +
> +  return LegalizedNodes.count(N);
>  }
> Index: test/CodeGen/R600/fp_to_sint.ll
> ===================================================================
> --- test/CodeGen/R600/fp_to_sint.ll
> +++ test/CodeGen/R600/fp_to_sint.ll
> @@ -49,8 +49,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  
>  ; Check that the compiler doesn't crash with a "cannot select" error
>  ; SI: S_ENDPGM
> @@ -81,8 +81,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  ; EG-DAG: AND_INT
>  ; EG-DAG: LSHR
>  ; EG-DAG: SUB_INT
> @@ -102,8 +102,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  
>  ; SI: S_ENDPGM
>  define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
> @@ -132,8 +132,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  ; EG-DAG: AND_INT
>  ; EG-DAG: LSHR
>  ; EG-DAG: SUB_INT
> @@ -153,8 +153,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  ; EG-DAG: AND_INT
>  ; EG-DAG: LSHR
>  ; EG-DAG: SUB_INT
> @@ -174,8 +174,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  ; EG-DAG: AND_INT
>  ; EG-DAG: LSHR
>  ; EG-DAG: SUB_INT
> @@ -195,8 +195,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  
>  ; SI: S_ENDPGM
>  define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
> Index: test/CodeGen/R600/fp_to_uint.ll
> ===================================================================
> --- test/CodeGen/R600/fp_to_uint.ll
> +++ test/CodeGen/R600/fp_to_uint.ll
> @@ -52,8 +52,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  
>  ; SI: S_ENDPGM
>  define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
> @@ -82,8 +82,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  ; EG-DAG: AND_INT
>  ; EG-DAG: LSHR
>  ; EG-DAG: SUB_INT
> @@ -103,8 +103,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  
>  ; SI: S_ENDPGM
>  define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
> @@ -133,8 +133,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  ; EG-DAG: AND_INT
>  ; EG-DAG: LSHR
>  ; EG-DAG: SUB_INT
> @@ -154,8 +154,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  ; EG-DAG: AND_INT
>  ; EG-DAG: LSHR
>  ; EG-DAG: SUB_INT
> @@ -175,8 +175,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  ; EG-DAG: AND_INT
>  ; EG-DAG: LSHR
>  ; EG-DAG: SUB_INT
> @@ -196,8 +196,8 @@
>  ; EG-DAG: XOR_INT
>  ; EG-DAG: SUB_INT
>  ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>  
>  ; SI: S_ENDPGM
>  define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
> Index: test/CodeGen/R600/setcc-equivalent.ll
> ===================================================================
> --- test/CodeGen/R600/setcc-equivalent.ll
> +++ test/CodeGen/R600/setcc-equivalent.ll
> @@ -1,5 +1,4 @@
>  ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
> -; XFAIL: *
>  
>  ; EG-LABEL: @and_setcc_setcc_i32
>  ; EG: AND_INT
> @@ -28,4 +27,4 @@
>    %ext = sext <4 x i1> %and to <4 x i32>
>    store <4 x i32> %ext, <4 x i32> addrspace(1)* %out, align 4
>    ret void
> -}
> \ No newline at end of file
> +}
> Index: test/CodeGen/X86/avx512-cmp.ll
> ===================================================================
> --- test/CodeGen/X86/avx512-cmp.ll
> +++ test/CodeGen/X86/avx512-cmp.ll
> @@ -28,10 +28,9 @@
>    ret float %c1
>  }
>  
> +; FIXME: Can use vcmpeqss and extract from the mask here in AVX512.
>  ; CHECK-LABEL: test3
> -; CHECK: vcmpeqss
> -; CHECK: kmov
> -; CHECK: ret
> +; CHECK: vucomiss {{.*}}encoding: [0x62
>  define i32 @test3(float %a, float %b) {
>  
>    %cmp10.i = fcmp oeq float %a, %b

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list