[llvm-branch-commits] [llvm] 9b7b8de - [TableGen] [ISel Matcher Emitter] Rework with two passes: one to size, one to emit

Paul C. Anagnostopoulos via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Nov 21 08:04:22 PST 2020


Author: Paul C. Anagnostopoulos
Date: 2020-11-21T10:59:13-05:00
New Revision: 9b7b8de6d12ff2e0bb7aa813f83a8053d302bc2b

URL: https://github.com/llvm/llvm-project/commit/9b7b8de6d12ff2e0bb7aa813f83a8053d302bc2b
DIFF: https://github.com/llvm/llvm-project/commit/9b7b8de6d12ff2e0bb7aa813f83a8053d302bc2b.diff

LOG: [TableGen] [ISel Matcher Emitter] Rework with two passes: one to size, one to emit

Differential Revision: https://reviews.llvm.org/D91632

Added: 
    

Modified: 
    llvm/utils/TableGen/DAGISelMatcher.h
    llvm/utils/TableGen/DAGISelMatcherEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/utils/TableGen/DAGISelMatcher.h b/llvm/utils/TableGen/DAGISelMatcher.h
index 223513fc8d38..dca1865b22e0 100644
--- a/llvm/utils/TableGen/DAGISelMatcher.h
+++ b/llvm/utils/TableGen/DAGISelMatcher.h
@@ -31,7 +31,7 @@ Matcher *ConvertPatternToMatcher(const PatternToMatch &Pattern,unsigned Variant,
                                  const CodeGenDAGPatterns &CGP);
 void OptimizeMatcher(std::unique_ptr<Matcher> &Matcher,
                      const CodeGenDAGPatterns &CGP);
-void EmitMatcherTable(const Matcher *Matcher, const CodeGenDAGPatterns &CGP,
+void EmitMatcherTable(Matcher *Matcher, const CodeGenDAGPatterns &CGP,
                       raw_ostream &OS);
 
 
@@ -41,6 +41,7 @@ class Matcher {
   // The next matcher node that is executed after this one.  Null if this is the
   // last stage of a match.
   std::unique_ptr<Matcher> Next;
+  size_t Size; // Size in bytes of matcher and all its children (if any).
   virtual void anchor();
 public:
   enum KindTy {
@@ -85,7 +86,10 @@ class Matcher {
     EmitNode,             // Create a DAG node
     EmitNodeXForm,        // Run a SDNodeXForm
     CompleteMatch,        // Finish a match and update the results.
-    MorphNodeTo           // Build a node, finish a match and update results.
+    MorphNodeTo,          // Build a node, finish a match and update results.
+
+    // Highest enum value; watch out when adding more.
+    HighestKind = MorphNodeTo
   };
   const KindTy Kind;
 
@@ -94,6 +98,8 @@ class Matcher {
 public:
   virtual ~Matcher() {}
 
+  unsigned getSize() const { return Size; }
+  void setSize(unsigned sz) { Size = sz; }
   KindTy getKind() const { return Kind; }
 
   Matcher *getNext() { return Next.get(); }

diff  --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index cf92391f30c4..03528a46aea7 100644
--- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+
 using namespace llvm;
 
 enum {
@@ -47,6 +48,8 @@ namespace {
 class MatcherTableEmitter {
   const CodeGenDAGPatterns &CGP;
 
+  SmallVector<unsigned, Matcher::HighestKind+1> OpcodeCounts;
+
   DenseMap<TreePattern *, unsigned> NodePredicateMap;
   std::vector<TreePredicateFn> NodePredicates;
   std::vector<TreePredicateFn> NodePredicatesWithOperands;
@@ -79,12 +82,15 @@ class MatcherTableEmitter {
   }
 
 public:
-  MatcherTableEmitter(const CodeGenDAGPatterns &cgp)
-    : CGP(cgp) {}
+  MatcherTableEmitter(const CodeGenDAGPatterns &cgp) : CGP(cgp) {
+    OpcodeCounts.assign(Matcher::HighestKind+1, 0);
+  }
 
-  unsigned EmitMatcherList(const Matcher *N, unsigned Indent,
+  unsigned EmitMatcherList(const Matcher *N, const unsigned Indent,
                            unsigned StartIdx, raw_ostream &OS);
 
+  unsigned SizeMatcherList(Matcher *N, raw_ostream &OS);
+
   void EmitPredicateFunctions(raw_ostream &OS);
 
   void EmitHistogram(const Matcher *N, raw_ostream &OS);
@@ -95,7 +101,9 @@ class MatcherTableEmitter {
   void EmitNodePredicatesFunction(const std::vector<TreePredicateFn> &Preds,
                                   StringRef Decl, raw_ostream &OS);
 
-  unsigned EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
+  unsigned SizeMatcher(Matcher *N, raw_ostream &OS);
+
+  unsigned EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
                        raw_ostream &OS);
 
   unsigned getNodePredicate(TreePredicateFn Pred) {
@@ -165,7 +173,7 @@ static std::string GetPatFromTreePatternNode(const TreePatternNode *N) {
   return str;
 }
 
-static unsigned GetVBRSize(unsigned Val) {
+static size_t GetVBRSize(unsigned Val) {
   if (Val <= 127) return 1;
 
   unsigned NumBytes = 0;
@@ -219,6 +227,78 @@ static std::string getIncludePath(const Record *R) {
   return str;
 }
 
+/// This function traverses the matcher tree and sizes all the nodes
+/// that are children of the three kinds of nodes that have them.
+unsigned MatcherTableEmitter::
+SizeMatcherList(Matcher *N, raw_ostream &OS) {
+  unsigned Size = 0;
+  while (N) {
+    Size += SizeMatcher(N, OS);
+    N = N->getNext();
+  }
+  return Size;
+}
+
+/// This function sizes the children of the three kinds of nodes that
+/// have them. It does so by using special cases for those three
+/// nodes, but sharing the code in EmitMatcher() for the other kinds.
+unsigned MatcherTableEmitter::
+SizeMatcher(Matcher *N, raw_ostream &OS) {
+  unsigned Idx = 0;
+
+  ++OpcodeCounts[N->getKind()];
+  switch (N->getKind()) {
+  // The Scope matcher has its kind, a series of child size + child,
+  // and a trailing zero.
+  case Matcher::Scope: {
+    ScopeMatcher *SM = cast<ScopeMatcher>(N);
+    assert(SM->getNext() == nullptr && "Scope matcher should not have next");
+    unsigned Size = 1; // Count the kind.
+    for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i) {
+      const size_t ChildSize = SizeMatcherList(SM->getChild(i), OS);
+      assert(ChildSize != 0 && "Matcher cannot have child of size 0");
+      SM->getChild(i)->setSize(ChildSize);
+      Size += GetVBRSize(ChildSize) + ChildSize; // Count VBR and child size.
+    }
+    ++Size; // Count the zero sentinel.
+    return Size;
+  }
+
+  // SwitchOpcode and SwitchType have their kind, a series of child size +
+  // opcode/type + child, and a trailing zero.
+  case Matcher::SwitchOpcode:
+  case Matcher::SwitchType: {
+    unsigned Size = 1; // Count the kind.
+    unsigned NumCases;
+    if (const SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N))
+      NumCases = SOM->getNumCases();
+    else
+      NumCases = cast<SwitchTypeMatcher>(N)->getNumCases();
+    for (unsigned i = 0, e = NumCases; i != e; ++i) {
+      Matcher *Child;
+      if (SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N)) {
+        Child = SOM->getCaseMatcher(i);
+        Size += 2; // Count the child's opcode.
+      } else {
+        Child = cast<SwitchTypeMatcher>(N)->getCaseMatcher(i);
+        ++Size; // Count the child's type.
+      }
+      const size_t ChildSize = SizeMatcherList(Child, OS);
+      assert(ChildSize != 0 && "Matcher cannot have child of size 0");
+      Child->setSize(ChildSize);
+      Size += GetVBRSize(ChildSize) + ChildSize; // Count VBR and child size.
+    }
+    ++Size; // Count the zero sentinel.
+    return Size;
+  }
+
+  default:
+    // Employ the matcher emitter to size other matchers.
+    return EmitMatcher(N, 0, Idx, OS);
+  }
+  llvm_unreachable("Unreachable");
+}
+
 static void BeginEmitFunction(raw_ostream &OS, StringRef RetType,
                               StringRef Decl, bool AddOverride) {
   OS << "#ifdef GET_DAGISEL_DECL\n";
@@ -279,19 +359,16 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
 /// EmitMatcher - Emit bytes for the specified matcher and return
 /// the number of bytes emitted.
 unsigned MatcherTableEmitter::
-EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
+EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
             raw_ostream &OS) {
   OS.indent(Indent);
 
   switch (N->getKind()) {
   case Matcher::Scope: {
     const ScopeMatcher *SM = cast<ScopeMatcher>(N);
-    assert(SM->getNext() == nullptr && "Shouldn't have next after scope");
-
     unsigned StartIdx = CurrentIdx;
 
     // Emit all of the children.
-    SmallString<128> TmpBuf;
     for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i) {
       if (i == 0) {
         OS << "OPC_Scope, ";
@@ -304,34 +381,21 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
           OS.indent(Indent);
       }
 
-      // We need to encode the child and the offset of the failure code before
-      // emitting either of them.  Handle this by buffering the output into a
-      // string while we get the size.  Unfortunately, the offset of the
-      // children depends on the VBR size of the child, so for large children we
-      // have to iterate a bit.
-      unsigned ChildSize = 0;
-      unsigned VBRSize = 0;
-      do {
-        VBRSize = GetVBRSize(ChildSize);
-
-        TmpBuf.clear();
-        raw_svector_ostream OS(TmpBuf);
-        ChildSize = EmitMatcherList(SM->getChild(i), Indent+1,
-                                    CurrentIdx+VBRSize, OS);
-      } while (GetVBRSize(ChildSize) != VBRSize);
-
-      assert(ChildSize != 0 && "Should not have a zero-sized child!");
-
-      CurrentIdx += EmitVBRValue(ChildSize, OS);
+      size_t ChildSize = SM->getChild(i)->getSize();
+      size_t VBRSize = GetVBRSize(ChildSize);
+      EmitVBRValue(ChildSize, OS);
       if (!OmitComments) {
-        OS << "/*->" << CurrentIdx+ChildSize << "*/";
-
+        OS << "/*->" << CurrentIdx + VBRSize + ChildSize << "*/";
         if (i == 0)
           OS << " // " << SM->getNumChildren() << " children in Scope";
       }
+      OS << '\n';
 
-      OS << '\n' << TmpBuf;
-      CurrentIdx += ChildSize;
+      ChildSize = EmitMatcherList(SM->getChild(i), Indent+1,
+                                  CurrentIdx + VBRSize, OS);
+      assert(ChildSize == SM->getChild(i)->getSize() &&
+             "Emitted child size does not match calculated size");
+      CurrentIdx += VBRSize + ChildSize;
     }
 
     // Emit a zero as a sentinel indicating end of 'Scope'.
@@ -450,7 +514,6 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     ++CurrentIdx;
 
     // For each case we emit the size, then the opcode, then the matcher.
-    SmallString<128> TmpBuf;
     for (unsigned i = 0, e = NumCases; i != e; ++i) {
       const Matcher *Child;
       unsigned IdxSize;
@@ -462,24 +525,6 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
         IdxSize = 1;  // size of type in table is 1 byte.
       }
 
-      // We need to encode the opcode and the offset of the case code before
-      // emitting the case code.  Handle this by buffering the output into a
-      // string while we get the size.  Unfortunately, the offset of the
-      // children depends on the VBR size of the child, so for large children we
-      // have to iterate a bit.
-      unsigned ChildSize = 0;
-      unsigned VBRSize = 0;
-      do {
-        VBRSize = GetVBRSize(ChildSize);
-
-        TmpBuf.clear();
-        raw_svector_ostream OS(TmpBuf);
-        ChildSize = EmitMatcherList(Child, Indent+1, CurrentIdx+VBRSize+IdxSize,
-                                    OS);
-      } while (GetVBRSize(ChildSize) != VBRSize);
-
-      assert(ChildSize != 0 && "Should not have a zero-sized child!");
-
       if (i != 0) {
         if (!OmitComments)
           OS << "/*" << format_decimal(CurrentIdx, IndexWidth) << "*/";
@@ -489,20 +534,19 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
                      "/*SwitchOpcode*/ " : "/*SwitchType*/ ");
       }
 
-      // Emit the VBR.
-      CurrentIdx += EmitVBRValue(ChildSize, OS);
-
+      size_t ChildSize = Child->getSize();
+      CurrentIdx += EmitVBRValue(ChildSize, OS) + IdxSize;
       if (const SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N))
         OS << "TARGET_VAL(" << SOM->getCaseOpcode(i).getEnumName() << "),";
       else
         OS << getEnumName(cast<SwitchTypeMatcher>(N)->getCaseType(i)) << ',';
-
-      CurrentIdx += IdxSize;
-
       if (!OmitComments)
-        OS << "// ->" << CurrentIdx+ChildSize;
+        OS << "// ->" << CurrentIdx + ChildSize;
       OS << '\n';
-      OS << TmpBuf;
+
+      ChildSize = EmitMatcherList(Child, Indent+1, CurrentIdx, OS);
+      assert(ChildSize == Child->getSize() &&
+             "Emitted child size does not match calculated size");
       CurrentIdx += ChildSize;
     }
 
@@ -515,8 +559,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
              " // EndSwitchOpcode" : " // EndSwitchType");
 
     OS << '\n';
-    ++CurrentIdx;
-    return CurrentIdx-StartIdx;
+    return CurrentIdx - StartIdx + 1;
   }
 
  case Matcher::CheckType:
@@ -810,9 +853,10 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
   llvm_unreachable("Unreachable");
 }
 
-/// EmitMatcherList - Emit the bytes for the specified matcher subtree.
+/// This function traverses the matcher tree and emits all the nodes.
+/// The nodes have already been sized.
 unsigned MatcherTableEmitter::
-EmitMatcherList(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
+EmitMatcherList(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
                 raw_ostream &OS) {
   unsigned Size = 0;
   while (N) {
@@ -841,12 +885,12 @@ void MatcherTableEmitter::EmitNodePredicatesFunction(
   OS << "  default: llvm_unreachable(\"Invalid predicate in table?\");\n";
   for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
     // Emit the predicate code corresponding to this pattern.
-    TreePredicateFn PredFn = Preds[i];
+    const TreePredicateFn PredFn = Preds[i];
 
     assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
     OS << "  case " << i << ": {\n";
     for (auto *SimilarPred :
-          NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
+             NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
       OS << "    // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
 
     OS << PredFn.getCodeToRunOnSDNode() << "\n  }\n";
@@ -975,28 +1019,6 @@ void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) {
   }
 }
 
-static void BuildHistogram(const Matcher *M, std::vector<unsigned> &OpcodeFreq){
-  for (; M != nullptr; M = M->getNext()) {
-    // Count this node.
-    if (unsigned(M->getKind()) >= OpcodeFreq.size())
-      OpcodeFreq.resize(M->getKind()+1);
-    OpcodeFreq[M->getKind()]++;
-
-    // Handle recursive nodes.
-    if (const ScopeMatcher *SM = dyn_cast<ScopeMatcher>(M)) {
-      for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i)
-        BuildHistogram(SM->getChild(i), OpcodeFreq);
-    } else if (const SwitchOpcodeMatcher *SOM =
-                 dyn_cast<SwitchOpcodeMatcher>(M)) {
-      for (unsigned i = 0, e = SOM->getNumCases(); i != e; ++i)
-        BuildHistogram(SOM->getCaseMatcher(i), OpcodeFreq);
-    } else if (const SwitchTypeMatcher *STM = dyn_cast<SwitchTypeMatcher>(M)) {
-      for (unsigned i = 0, e = STM->getNumCases(); i != e; ++i)
-        BuildHistogram(STM->getCaseMatcher(i), OpcodeFreq);
-    }
-  }
-}
-
 static StringRef getOpcodeString(Matcher::KindTy Kind) {
   switch (Kind) {
   case Matcher::Scope: return "OPC_Scope"; break;
@@ -1048,20 +1070,17 @@ void MatcherTableEmitter::EmitHistogram(const Matcher *M,
   if (OmitComments)
     return;
 
-  std::vector<unsigned> OpcodeFreq;
-  BuildHistogram(M, OpcodeFreq);
-
   OS << "  // Opcode Histogram:\n";
-  for (unsigned i = 0, e = OpcodeFreq.size(); i != e; ++i) {
+  for (unsigned i = 0, e = OpcodeCounts.size(); i != e; ++i) {
     OS << "  // #"
        << left_justify(getOpcodeString((Matcher::KindTy)i), HistOpcWidth)
-       << " = " << OpcodeFreq[i] << '\n';
+       << " = " << OpcodeCounts[i] << '\n';
   }
   OS << '\n';
 }
 
 
-void llvm::EmitMatcherTable(const Matcher *TheMatcher,
+void llvm::EmitMatcherTable(Matcher *TheMatcher,
                             const CodeGenDAGPatterns &CGP,
                             raw_ostream &OS) {
   OS << "#if defined(GET_DAGISEL_DECL) && defined(GET_DAGISEL_BODY)\n";
@@ -1096,12 +1115,23 @@ void llvm::EmitMatcherTable(const Matcher *TheMatcher,
   BeginEmitFunction(OS, "void", "SelectCode(SDNode *N)", false/*AddOverride*/);
   MatcherTableEmitter MatcherEmitter(CGP);
 
+  // First we size all the children of the three kinds of matchers that have
+  // them. This is done by sharing the code in EmitMatcher(). but we don't
+  // want to emit anything, so we turn off comments and use a null stream.
+  bool SaveOmitComments = OmitComments;
+  OmitComments = true;
+  raw_null_ostream NullOS;
+  unsigned TotalSize = MatcherEmitter.SizeMatcherList(TheMatcher, NullOS);
+  OmitComments = SaveOmitComments;
+
+  // Now that the matchers are sized, we can emit the code for them to the
+  // final stream.
   OS << "{\n";
   OS << "  // Some target values are emitted as 2 bytes, TARGET_VAL handles\n";
   OS << "  // this.\n";
   OS << "  #define TARGET_VAL(X) X & 255, unsigned(X) >> 8\n";
   OS << "  static const unsigned char MatcherTable[] = {\n";
-  unsigned TotalSize = MatcherEmitter.EmitMatcherList(TheMatcher, 1, 0, OS);
+  TotalSize = MatcherEmitter.EmitMatcherList(TheMatcher, 1, 0, OS);
   OS << "    0\n  }; // Total Array size is " << (TotalSize+1) << " bytes\n\n";
 
   MatcherEmitter.EmitHistogram(TheMatcher, OS);


        


More information about the llvm-branch-commits mailing list