[PATCH] [SDAG] When performing post-legalize DAG combining, run the legalizer over each node in the worklist prior to combining.
Tom Stellard
tom at stellard.net
Thu Jul 17 12:11:24 PDT 2014
On Thu, Jul 17, 2014 at 04:55:00PM +0000, Chandler Carruth wrote:
> Hi hfinkel, grosbach,
>
> This allows the combiner to produce new nodes which need to go back
> through legalization. This is particularly useful when generating
> operands to target specific nodes in a post-legalize DAG combine where
> the operands are significantly easier to express as pre-legalized
> operations. My immediate use case will be PSHUFB formation where we need
> to build a constant shuffle mask with a build_vector node.
>
Since the post-legalize DAG combiner is now doing combining and
legalizing, do we still need to run legalize as a separate pass.
Can we instead just do:
combine -> legalize types -> combine w/ legalize ops
-Tom
> This also refactors the relevant functionality in the legalizer to
> support this, and updates relevant tests. I've spoken to the R600 folks
> and these changes look like improvements. The avx512 change needs to be
> investigated, I suspect there is a disagreement between the legalizer
> and the DAG combiner there, but it seems a minor issue so leaving it to
> be re-evaluated after this patch.
>
> http://reviews.llvm.org/D4564
>
> Files:
> include/llvm/CodeGen/SelectionDAG.h
> lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> test/CodeGen/R600/fp_to_sint.ll
> test/CodeGen/R600/fp_to_uint.ll
> test/CodeGen/R600/setcc-equivalent.ll
> test/CodeGen/X86/avx512-cmp.ll
> Index: include/llvm/CodeGen/SelectionDAG.h
> ===================================================================
> --- include/llvm/CodeGen/SelectionDAG.h
> +++ include/llvm/CodeGen/SelectionDAG.h
> @@ -16,6 +16,7 @@
> #define LLVM_CODEGEN_SELECTIONDAG_H
>
> #include "llvm/ADT/DenseSet.h"
> +#include "llvm/ADT/SetVector.h"
> #include "llvm/ADT/StringMap.h"
> #include "llvm/ADT/ilist.h"
> #include "llvm/CodeGen/DAGCombine.h"
> @@ -364,6 +365,15 @@
> /// the graph.
> void Legalize();
>
> + /// \brief Transforms a SelectionDAG node and any operands to it into a node
> + /// that is compatible with the target instruction selector, as indicated by
> + /// the TargetLowering object.
> + ///
> + /// This essentially runs a single recursive walk of the \c Legalize process
> + /// over the given node (and its operands). This can be used to incrementally
> + /// legalize the DAG.
> + bool LegalizeOp(SDNode *N, SmallSetVector<SDNode *, 16> &UpdatedNodes);
> +
> /// LegalizeVectors - This transforms the SelectionDAG into a SelectionDAG
> /// that only uses vector math operations supported by the target. This is
> /// necessary as a separate step from Legalize because unrolling a vector
> Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> @@ -1098,10 +1098,6 @@
> // changes of the root.
> HandleSDNode Dummy(DAG.getRoot());
>
> - // The root of the dag may dangle to deleted nodes until the dag combiner is
> - // done. Set it to null to avoid confusion.
> - DAG.setRoot(SDValue());
> -
> // while the worklist isn't empty, find a node and
> // try and combine it.
> while (!WorkListContents.empty()) {
> @@ -1126,6 +1122,22 @@
> continue;
> }
>
> + // If this combine is running after legalizing the DAG, re-legalize any
> + // nodes pulled off the worklist.
> + if (Level == AfterLegalizeDAG) {
> + WorkListRemover DeadNodes(*this);
> + SmallSetVector<SDNode *, 16> UpdatedNodes;
> + bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
> +
> + for (SDNode *LN : UpdatedNodes) {
> + assert(LN->getOpcode() != ISD::DELETED_NODE && "boom");
> + AddToWorkList(LN);
> + AddUsersToWorkList(LN);
> + }
> + if (!NIsValid)
> + continue;
> + }
> +
> SDValue RV = combine(N);
>
> if (!RV.getNode())
> Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> @@ -12,6 +12,7 @@
> //===----------------------------------------------------------------------===//
>
> #include "llvm/CodeGen/SelectionDAG.h"
> +#include "llvm/ADT/SetVector.h"
> #include "llvm/ADT/SmallPtrSet.h"
> #include "llvm/ADT/SmallSet.h"
> #include "llvm/ADT/SmallVector.h"
> @@ -53,27 +54,37 @@
> const TargetLowering &TLI;
> SelectionDAG &DAG;
>
> - /// LegalizePosition - The iterator for walking through the node list.
> - SelectionDAG::allnodes_iterator LegalizePosition;
> + /// \brief The iterator being used to walk the DAG. We hold a reference to it
> + /// in order to update it as necessary on node deletion.
> + SelectionDAG::allnodes_iterator &LegalizePosition;
>
> - /// LegalizedNodes - The set of nodes which have already been legalized.
> - SmallPtrSet<SDNode *, 16> LegalizedNodes;
> + /// \brief The set of nodes which have already been legalized. We hold a
> + /// reference to it in order to update as necessary on node deletion.
> + SmallPtrSetImpl<SDNode *> &LegalizedNodes;
> +
> + /// \brief A set of all the nodes updated during legalization.
> + SmallSetVector<SDNode *, 16> *UpdatedNodes;
>
> EVT getSetCCResultType(EVT VT) const {
> return TLI.getSetCCResultType(*DAG.getContext(), VT);
> }
>
> // Libcall insertion helpers.
>
> public:
> - explicit SelectionDAGLegalize(SelectionDAG &DAG);
> -
> - void LegalizeDAG();
> -
> -private:
> - /// LegalizeOp - Legalizes the given operation.
> + SelectionDAGLegalize(SelectionDAG &DAG,
> + SelectionDAG::allnodes_iterator &LegalizePosition,
> + SmallPtrSetImpl<SDNode *> &LegalizedNodes,
> + SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr)
> + : SelectionDAG::DAGUpdateListener(DAG), TM(DAG.getTarget()),
> + TLI(DAG.getTargetLoweringInfo()), DAG(DAG),
> + LegalizePosition(LegalizePosition), LegalizedNodes(LegalizedNodes),
> + UpdatedNodes(UpdatedNodes) {}
> +
> + /// \brief Legalizes the given operation.
> void LegalizeOp(SDNode *Node);
>
> +private:
> SDValue OptimizeFloatStore(StoreSDNode *ST);
>
> void LegalizeLoadOps(SDNode *Node);
> @@ -149,14 +160,21 @@
> LegalizedNodes.erase(N);
> if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
> ++LegalizePosition;
> + if (UpdatedNodes)
> + UpdatedNodes->remove(N);
> }
>
> public:
> // DAGUpdateListener implementation.
> void NodeDeleted(SDNode *N, SDNode *E) override {
> ForgetNode(N);
> + if (UpdatedNodes && E)
> + UpdatedNodes->insert(E);
> + }
> + void NodeUpdated(SDNode *N) override {
> + if (UpdatedNodes)
> + UpdatedNodes->insert(N);
> }
> - void NodeUpdated(SDNode *N) override {}
>
> // Node replacement helpers
> void ReplacedNode(SDNode *N) {
> @@ -168,15 +186,25 @@
> }
> void ReplaceNode(SDNode *Old, SDNode *New) {
> DAG.ReplaceAllUsesWith(Old, New);
> + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
> + DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i));
> ReplacedNode(Old);
> + if (UpdatedNodes)
> + UpdatedNodes->insert(New);
> }
> void ReplaceNode(SDValue Old, SDValue New) {
> DAG.ReplaceAllUsesWith(Old, New);
> ReplacedNode(Old.getNode());
> + if (UpdatedNodes)
> + UpdatedNodes->insert(New.getNode());
> }
> void ReplaceNode(SDNode *Old, const SDValue *New) {
> DAG.ReplaceAllUsesWith(Old, New);
> + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
> + DAG.TransferDbgValues(SDValue(Old, i), New[i]);
> ReplacedNode(Old);
> + if (UpdatedNodes)
> + UpdatedNodes->insert(New->getNode());
> }
> };
> }
> @@ -213,40 +241,6 @@
> return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
> }
>
> -SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
> - : SelectionDAG::DAGUpdateListener(dag),
> - TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
> - DAG(dag) {
> -}
> -
> -void SelectionDAGLegalize::LegalizeDAG() {
> - DAG.AssignTopologicalOrder();
> -
> - // Visit all the nodes. We start in topological order, so that we see
> - // nodes with their original operands intact. Legalization can produce
> - // new nodes which may themselves need to be legalized. Iterate until all
> - // nodes have been legalized.
> - for (;;) {
> - bool AnyLegalized = false;
> - for (LegalizePosition = DAG.allnodes_end();
> - LegalizePosition != DAG.allnodes_begin(); ) {
> - --LegalizePosition;
> -
> - SDNode *N = LegalizePosition;
> - if (LegalizedNodes.insert(N)) {
> - AnyLegalized = true;
> - LegalizeOp(N);
> - }
> - }
> - if (!AnyLegalized)
> - break;
> -
> - }
> -
> - // Remove dead nodes now.
> - DAG.RemoveDeadNodes();
> -}
> -
> /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
> /// a load from the constant pool.
> SDValue
> @@ -928,6 +922,10 @@
> assert(RVal.getNode() != Node && "Load must be completely replaced");
> DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
> DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
> + if (UpdatedNodes) {
> + UpdatedNodes->insert(RVal.getNode());
> + UpdatedNodes->insert(RChain.getNode());
> + }
> ReplacedNode(Node);
> }
> return;
> @@ -1148,6 +1146,10 @@
> assert(Value.getNode() != Node && "Load must be completely replaced");
> DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
> DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
> + if (UpdatedNodes) {
> + UpdatedNodes->insert(Value.getNode());
> + UpdatedNodes->insert(Chain.getNode());
> + }
> ReplacedNode(Node);
> }
> }
> @@ -1335,10 +1337,7 @@
> }
>
> if (NewNode != Node) {
> - DAG.ReplaceAllUsesWith(Node, NewNode);
> - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
> - DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
> - ReplacedNode(Node);
> + ReplaceNode(Node, NewNode);
> Node = NewNode;
> }
> switch (Action) {
> @@ -1356,12 +1355,8 @@
> else
> ResultVals.push_back(Res.getValue(i));
> }
> - if (Res.getNode() != Node || Res.getResNo() != 0) {
> - DAG.ReplaceAllUsesWith(Node, ResultVals.data());
> - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
> - DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
> - ReplacedNode(Node);
> - }
> + if (Res.getNode() != Node || Res.getResNo() != 0)
> + ReplaceNode(Node, ResultVals.data());
> return;
> }
> }
> @@ -4179,6 +4174,10 @@
> // use the new one.
> DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
> DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
> + if (UpdatedNodes) {
> + UpdatedNodes->insert(Tmp2.getNode());
> + UpdatedNodes->insert(Chain.getNode());
> + }
> ReplacedNode(Node);
> break;
> }
> @@ -4285,7 +4284,52 @@
> // SelectionDAG::Legalize - This is the entry point for the file.
> //
> void SelectionDAG::Legalize() {
> - /// run - This is the main entry point to this class.
> - ///
> - SelectionDAGLegalize(*this).LegalizeDAG();
> + AssignTopologicalOrder();
> +
> + allnodes_iterator LegalizePosition;
> + SmallPtrSet<SDNode *, 16> LegalizedNodes;
> + SelectionDAGLegalize Legalizer(*this, LegalizePosition, LegalizedNodes);
> +
> + // Visit all the nodes. We start in topological order, so that we see
> + // nodes with their original operands intact. Legalization can produce
> + // new nodes which may themselves need to be legalized. Iterate until all
> + // nodes have been legalized.
> + for (;;) {
> + bool AnyLegalized = false;
> + for (LegalizePosition = allnodes_end();
> + LegalizePosition != allnodes_begin(); ) {
> + --LegalizePosition;
> +
> + SDNode *N = LegalizePosition;
> + if (LegalizedNodes.insert(N)) {
> + AnyLegalized = true;
> + Legalizer.LegalizeOp(N);
> + }
> + }
> + if (!AnyLegalized)
> + break;
> +
> + }
> +
> + // Remove dead nodes now.
> + RemoveDeadNodes();
> +}
> +
> +// This is a somewhat hacky entry point to legalize a single sub-graph rooted
> +// at a node. This is used by the post-legalize DAG combiner to allow its DAG
> +// combines to produce illegal nodes and still have a chance for them to be
> +// legalized.
> +bool SelectionDAG::LegalizeOp(SDNode *N,
> + SmallSetVector<SDNode *, 16> &UpdatedNodes) {
> + allnodes_iterator LegalizePosition(N);
> + SmallPtrSet<SDNode *, 16> LegalizedNodes;
> + SelectionDAGLegalize Legalizer(*this, LegalizePosition, LegalizedNodes,
> + &UpdatedNodes);
> +
> + // Directly insert the node in question, and legalize it. This will recurse
> + // as needed through operands.
> + LegalizedNodes.insert(N);
> + Legalizer.LegalizeOp(N);
> +
> + return LegalizedNodes.count(N);
> }
> Index: test/CodeGen/R600/fp_to_sint.ll
> ===================================================================
> --- test/CodeGen/R600/fp_to_sint.ll
> +++ test/CodeGen/R600/fp_to_sint.ll
> @@ -49,8 +49,8 @@
> ; EG-DAG: XOR_INT
> ; EG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>
> ; Check that the compiler doesn't crash with a "cannot select" error
> ; SI: S_ENDPGM
> @@ -81,8 +81,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
> ; EG-DAG: AND_INT
> ; EG-DAG: LSHR
> ; EG-DAG: SUB_INT
> @@ -102,8 +102,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>
> ; SI: S_ENDPGM
> define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
> @@ -132,8 +132,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
> ; EG-DAG: AND_INT
> ; EG-DAG: LSHR
> ; EG-DAG: SUB_INT
> @@ -153,8 +153,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
> ; EG-DAG: AND_INT
> ; EG-DAG: LSHR
> ; EG-DAG: SUB_INT
> @@ -174,8 +174,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
> ; EG-DAG: AND_INT
> ; EG-DAG: LSHR
> ; EG-DAG: SUB_INT
> @@ -195,8 +195,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>
> ; SI: S_ENDPGM
> define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
> Index: test/CodeGen/R600/fp_to_uint.ll
> ===================================================================
> --- test/CodeGen/R600/fp_to_uint.ll
> +++ test/CodeGen/R600/fp_to_uint.ll
> @@ -52,8 +52,8 @@
> ; EG-DAG: XOR_INT
> ; EG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>
> ; SI: S_ENDPGM
> define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
> @@ -82,8 +82,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
> ; EG-DAG: AND_INT
> ; EG-DAG: LSHR
> ; EG-DAG: SUB_INT
> @@ -103,8 +103,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>
> ; SI: S_ENDPGM
> define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
> @@ -133,8 +133,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
> ; EG-DAG: AND_INT
> ; EG-DAG: LSHR
> ; EG-DAG: SUB_INT
> @@ -154,8 +154,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
> ; EG-DAG: AND_INT
> ; EG-DAG: LSHR
> ; EG-DAG: SUB_INT
> @@ -175,8 +175,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
> ; EG-DAG: AND_INT
> ; EG-DAG: LSHR
> ; EG-DAG: SUB_INT
> @@ -196,8 +196,8 @@
> ; EG-DAG: XOR_INT
> ; EG-DAG: SUB_INT
> ; EG-DAG: SUB_INT
> -; EG-DAG: CNDGE_INT
> -; EG-DAG: CNDGE_INT
> +; EG-DAG: CNDE_INT
> +; EG-DAG: CNDE_INT
>
> ; SI: S_ENDPGM
> define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
> Index: test/CodeGen/R600/setcc-equivalent.ll
> ===================================================================
> --- test/CodeGen/R600/setcc-equivalent.ll
> +++ test/CodeGen/R600/setcc-equivalent.ll
> @@ -1,5 +1,4 @@
> ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
> -; XFAIL: *
>
> ; EG-LABEL: @and_setcc_setcc_i32
> ; EG: AND_INT
> @@ -28,4 +27,4 @@
> %ext = sext <4 x i1> %and to <4 x i32>
> store <4 x i32> %ext, <4 x i32> addrspace(1)* %out, align 4
> ret void
> -}
> \ No newline at end of file
> +}
> Index: test/CodeGen/X86/avx512-cmp.ll
> ===================================================================
> --- test/CodeGen/X86/avx512-cmp.ll
> +++ test/CodeGen/X86/avx512-cmp.ll
> @@ -28,10 +28,9 @@
> ret float %c1
> }
>
> +; FIXME: Can use vcmpeqss and extract from the mask here in AVX512.
> ; CHECK-LABEL: test3
> -; CHECK: vcmpeqss
> -; CHECK: kmov
> -; CHECK: ret
> +; CHECK: vucomiss {{.*}}encoding: [0x62
> define i32 @test3(float %a, float %b) {
>
> %cmp10.i = fcmp oeq float %a, %b
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list