[clang-tools-extra] 9b6bb12 - [pseudo] Add ForestNode descendants iterator, print ambiguous/opaque node stats.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 30 12:21:08 PDT 2022


Author: Sam McCall
Date: 2022-06-30T21:20:55+02:00
New Revision: 9b6bb12b85846e8337f8ff75aebd4e9798b56bdd

URL: https://github.com/llvm/llvm-project/commit/9b6bb12b85846e8337f8ff75aebd4e9798b56bdd
DIFF: https://github.com/llvm/llvm-project/commit/9b6bb12b85846e8337f8ff75aebd4e9798b56bdd.diff

LOG: [pseudo] Add ForestNode descendants iterator, print ambiguous/opaque node stats.

Differential Revision: https://reviews.llvm.org/D128930

Added: 
    

Modified: 
    clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
    clang-tools-extra/pseudo/lib/Forest.cpp
    clang-tools-extra/pseudo/test/glr.cpp
    clang-tools-extra/pseudo/tool/ClangPseudo.cpp
    clang-tools-extra/pseudo/unittests/ForestTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
index 29bcac98a64f8..ef9a222faf6bf 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
@@ -43,6 +43,7 @@ namespace pseudo {
 // doesn't have parent pointers.
 class alignas(class ForestNode *) ForestNode {
 public:
+  class RecursiveIterator;
   enum Kind {
     // A Terminal node is a single terminal symbol bound to a token.
     Terminal,
@@ -87,6 +88,22 @@ class alignas(class ForestNode *) ForestNode {
     return children(Data);
   }
 
+  llvm::ArrayRef<const ForestNode *> children() const {
+    switch (kind()) {
+    case Sequence:
+      return elements();
+    case Ambiguous:
+      return alternatives();
+    case Terminal:
+    case Opaque:
+      return {};
+    }
+    llvm_unreachable("Bad kind");
+  }
+
+  // Iteration over all nodes in the forest, including this.
+  llvm::iterator_range<RecursiveIterator> descendants() const;
+
   std::string dump(const Grammar &) const;
   std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const;
 
@@ -181,6 +198,25 @@ class ForestArena {
   uint32_t NodeCount = 0;
 };
 
+class ForestNode::RecursiveIterator
+    : public std::iterator<std::input_iterator_tag, const ForestNode> {
+  llvm::DenseSet<const ForestNode *> Seen;
+  struct StackFrame {
+    const ForestNode *Parent;
+    unsigned ChildIndex;
+  };
+  std::vector<StackFrame> Stack;
+  const ForestNode *Cur;
+
+public:
+  RecursiveIterator(const ForestNode *N = nullptr) : Cur(N) {}
+
+  const ForestNode &operator*() const { return *Cur; };
+  void operator++();
+  bool operator==(const RecursiveIterator &I) const { return Cur == I.Cur; }
+  bool operator!=(const RecursiveIterator &I) const { return !(*this == I); }
+};
+
 } // namespace pseudo
 } // namespace clang
 

diff  --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp
index 02818547761c8..bea93809c5b38 100644
--- a/clang-tools-extra/pseudo/lib/Forest.cpp
+++ b/clang-tools-extra/pseudo/lib/Forest.cpp
@@ -16,6 +16,35 @@
 namespace clang {
 namespace pseudo {
 
+void ForestNode::RecursiveIterator::operator++() {
+  auto C = Cur->children();
+  // Try to find a child of the current node to descend into.
+  for (unsigned I = 0; I < C.size(); ++I) {
+    if (Seen.insert(C[I]).second) {
+      Stack.push_back({Cur, I});
+      Cur = C[I];
+      return;
+    }
+  }
+  // Try to find a sibling af an ancestor to advance to.
+  for (; !Stack.empty(); Stack.pop_back()) {
+    C = Stack.back().Parent->children();
+    unsigned &Index = Stack.back().ChildIndex;
+    while (++Index < C.size()) {
+      if (Seen.insert(C[Index]).second) {
+        Cur = C[Index];
+        return;
+      }
+    }
+  }
+  Cur = nullptr;
+}
+
+llvm::iterator_range<ForestNode::RecursiveIterator>
+ForestNode::descendants() const {
+  return {RecursiveIterator(this), RecursiveIterator()};
+}
+
 std::string ForestNode::dump(const Grammar &G) const {
   switch (kind()) {
   case Ambiguous:

diff  --git a/clang-tools-extra/pseudo/test/glr.cpp b/clang-tools-extra/pseudo/test/glr.cpp
index 2df6fd7405597..0eb19fba2ecc6 100644
--- a/clang-tools-extra/pseudo/test/glr.cpp
+++ b/clang-tools-extra/pseudo/test/glr.cpp
@@ -1,4 +1,4 @@
-// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest | FileCheck %s
+// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest -print-statistics | FileCheck %s
 
 void foo() {
   T* a; // a multiply expression or a pointer declaration?
@@ -22,3 +22,10 @@ void foo() {
 // CHECK-NEXT:   │ └─ptr-declarator~id-expression =#1
 // CHECK-NEXT:   └─; := tok[8]
 }
+
+// CHECK:      3 Ambiguous nodes:
+// CHECK-NEXT: 1 simple-type-specifier
+// CHECK-NEXT: 1 statement
+// CHECK-NEXT: 1 type-name
+// CHECK-EMPTY:
+// CHECK-NEXT: 0 Opaque nodes:

diff  --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index 39ad751aab38d..ff9b893ed3f5a 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -14,6 +14,8 @@
 #include "clang-pseudo/grammar/LRGraph.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormatVariadic.h"
@@ -59,6 +61,34 @@ static std::string readOrDie(llvm::StringRef Path) {
   return Text.get()->getBuffer().str();
 }
 
+namespace clang {
+namespace pseudo {
+namespace {
+
+struct NodeStats {
+  unsigned Total = 0;
+  std::vector<std::pair<SymbolID, unsigned>> BySymbol;
+
+  NodeStats(const ForestNode &Root,
+            llvm::function_ref<bool(const ForestNode &)> Filter) {
+    llvm::DenseMap<SymbolID, unsigned> Map;
+    for (const ForestNode &N : Root.descendants())
+      if (Filter(N)) {
+        ++Total;
+        ++Map[N.symbol()];
+      }
+    BySymbol = {Map.begin(), Map.end()};
+    // Sort by count descending, then symbol ascending.
+    llvm::sort(BySymbol, [](const auto &L, const auto &R) {
+      return std::tie(R.second, L.first) < std::tie(L.second, R.first);
+    });
+  }
+};
+
+} // namespace
+} // namespace pseudo
+} // namespace clang
+
 int main(int argc, char *argv[]) {
   llvm::cl::ParseCommandLineOptions(argc, argv, "");
   llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
@@ -135,6 +165,17 @@ int main(int argc, char *argv[]) {
                      << " nodes: " << Arena.nodeCount() << "\n";
         llvm::outs() << "GSS bytes: " << GSS.bytes()
                      << " nodes: " << GSS.nodesCreated() << "\n";
+
+        for (auto &P :
+             {std::make_pair("Ambiguous", clang::pseudo::ForestNode::Ambiguous),
+              std::make_pair("Opaque", clang::pseudo::ForestNode::Opaque)}) {
+          clang::pseudo::NodeStats Stats(
+              Root, [&](const auto &N) { return N.kind() == P.second; });
+          llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n";
+          for (const auto &S : Stats.BySymbol)
+            llvm::outs() << llvm::formatv("  {0,3} {1}\n", S.second,
+                                          G.symbolName(S.first));
+        }
       }
     }
   }

diff  --git a/clang-tools-extra/pseudo/unittests/ForestTest.cpp b/clang-tools-extra/pseudo/unittests/ForestTest.cpp
index ece5f6b3d50bf..b5a2b7e9a3364 100644
--- a/clang-tools-extra/pseudo/unittests/ForestTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/ForestTest.cpp
@@ -151,6 +151,30 @@ TEST_F(ForestTest, DumpAbbreviatedShared) {
             "[  0, end) └─A~B =#1\n");
 }
 
+TEST_F(ForestTest, Iteration) {
+  //   Z
+  //  / \
+  //  X Y
+  //  |\|
+  //  A B
+  ForestArena Arena;
+  const auto *A = &Arena.createTerminal(tok::identifier, 0);
+  const auto *B = &Arena.createOpaque(1, 0);
+  const auto *X = &Arena.createSequence(2, 1, {A, B});
+  const auto *Y = &Arena.createSequence(2, 2, {B});
+  const auto *Z = &Arena.createAmbiguous(2, {X, Y});
+
+  std::vector<const ForestNode *> Nodes;
+  for (const ForestNode &N : Z->descendants())
+    Nodes.push_back(&N);
+  EXPECT_THAT(Nodes, testing::UnorderedElementsAre(A, B, X, Y, Z));
+
+  Nodes.clear();
+  for (const ForestNode &N : X->descendants())
+    Nodes.push_back(&N);
+  EXPECT_THAT(Nodes, testing::UnorderedElementsAre(X, A, B));
+}
+
 } // namespace
 } // namespace pseudo
 } // namespace clang


        


More information about the cfe-commits mailing list