[clang-tools-extra] 9b6bb12 - [pseudo] Add ForestNode descendants iterator, print ambiguous/opaque node stats.
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 30 12:21:08 PDT 2022
Author: Sam McCall
Date: 2022-06-30T21:20:55+02:00
New Revision: 9b6bb12b85846e8337f8ff75aebd4e9798b56bdd
URL: https://github.com/llvm/llvm-project/commit/9b6bb12b85846e8337f8ff75aebd4e9798b56bdd
DIFF: https://github.com/llvm/llvm-project/commit/9b6bb12b85846e8337f8ff75aebd4e9798b56bdd.diff
LOG: [pseudo] Add ForestNode descendants iterator, print ambiguous/opaque node stats.
Differential Revision: https://reviews.llvm.org/D128930
Added:
Modified:
clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
clang-tools-extra/pseudo/lib/Forest.cpp
clang-tools-extra/pseudo/test/glr.cpp
clang-tools-extra/pseudo/tool/ClangPseudo.cpp
clang-tools-extra/pseudo/unittests/ForestTest.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
index 29bcac98a64f8..ef9a222faf6bf 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
@@ -43,6 +43,7 @@ namespace pseudo {
// doesn't have parent pointers.
class alignas(class ForestNode *) ForestNode {
public:
+ class RecursiveIterator;
enum Kind {
// A Terminal node is a single terminal symbol bound to a token.
Terminal,
@@ -87,6 +88,22 @@ class alignas(class ForestNode *) ForestNode {
return children(Data);
}
+ llvm::ArrayRef<const ForestNode *> children() const {
+ switch (kind()) {
+ case Sequence:
+ return elements();
+ case Ambiguous:
+ return alternatives();
+ case Terminal:
+ case Opaque:
+ return {};
+ }
+ llvm_unreachable("Bad kind");
+ }
+
+ // Iteration over all nodes in the forest, including this.
+ llvm::iterator_range<RecursiveIterator> descendants() const;
+
std::string dump(const Grammar &) const;
std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const;
@@ -181,6 +198,25 @@ class ForestArena {
uint32_t NodeCount = 0;
};
+class ForestNode::RecursiveIterator
+ : public std::iterator<std::input_iterator_tag, const ForestNode> {
+ llvm::DenseSet<const ForestNode *> Seen;
+ struct StackFrame {
+ const ForestNode *Parent;
+ unsigned ChildIndex;
+ };
+ std::vector<StackFrame> Stack;
+ const ForestNode *Cur;
+
+public:
+ RecursiveIterator(const ForestNode *N = nullptr) : Cur(N) {}
+
+ const ForestNode &operator*() const { return *Cur; };
+ void operator++();
+ bool operator==(const RecursiveIterator &I) const { return Cur == I.Cur; }
+ bool operator!=(const RecursiveIterator &I) const { return !(*this == I); }
+};
+
} // namespace pseudo
} // namespace clang
diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp
index 02818547761c8..bea93809c5b38 100644
--- a/clang-tools-extra/pseudo/lib/Forest.cpp
+++ b/clang-tools-extra/pseudo/lib/Forest.cpp
@@ -16,6 +16,35 @@
namespace clang {
namespace pseudo {
+void ForestNode::RecursiveIterator::operator++() {
+ auto C = Cur->children();
+ // Try to find a child of the current node to descend into.
+ for (unsigned I = 0; I < C.size(); ++I) {
+ if (Seen.insert(C[I]).second) {
+ Stack.push_back({Cur, I});
+ Cur = C[I];
+ return;
+ }
+ }
+ // Try to find a sibling af an ancestor to advance to.
+ for (; !Stack.empty(); Stack.pop_back()) {
+ C = Stack.back().Parent->children();
+ unsigned &Index = Stack.back().ChildIndex;
+ while (++Index < C.size()) {
+ if (Seen.insert(C[Index]).second) {
+ Cur = C[Index];
+ return;
+ }
+ }
+ }
+ Cur = nullptr;
+}
+
+llvm::iterator_range<ForestNode::RecursiveIterator>
+ForestNode::descendants() const {
+ return {RecursiveIterator(this), RecursiveIterator()};
+}
+
std::string ForestNode::dump(const Grammar &G) const {
switch (kind()) {
case Ambiguous:
diff --git a/clang-tools-extra/pseudo/test/glr.cpp b/clang-tools-extra/pseudo/test/glr.cpp
index 2df6fd7405597..0eb19fba2ecc6 100644
--- a/clang-tools-extra/pseudo/test/glr.cpp
+++ b/clang-tools-extra/pseudo/test/glr.cpp
@@ -1,4 +1,4 @@
-// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest | FileCheck %s
+// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest -print-statistics | FileCheck %s
void foo() {
T* a; // a multiply expression or a pointer declaration?
@@ -22,3 +22,10 @@ void foo() {
// CHECK-NEXT: │ └─ptr-declarator~id-expression =#1
// CHECK-NEXT: └─; := tok[8]
}
+
+// CHECK: 3 Ambiguous nodes:
+// CHECK-NEXT: 1 simple-type-specifier
+// CHECK-NEXT: 1 statement
+// CHECK-NEXT: 1 type-name
+// CHECK-EMPTY:
+// CHECK-NEXT: 0 Opaque nodes:
diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index 39ad751aab38d..ff9b893ed3f5a 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -14,6 +14,8 @@
#include "clang-pseudo/grammar/LRGraph.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
@@ -59,6 +61,34 @@ static std::string readOrDie(llvm::StringRef Path) {
return Text.get()->getBuffer().str();
}
+namespace clang {
+namespace pseudo {
+namespace {
+
+struct NodeStats {
+ unsigned Total = 0;
+ std::vector<std::pair<SymbolID, unsigned>> BySymbol;
+
+ NodeStats(const ForestNode &Root,
+ llvm::function_ref<bool(const ForestNode &)> Filter) {
+ llvm::DenseMap<SymbolID, unsigned> Map;
+ for (const ForestNode &N : Root.descendants())
+ if (Filter(N)) {
+ ++Total;
+ ++Map[N.symbol()];
+ }
+ BySymbol = {Map.begin(), Map.end()};
+ // Sort by count descending, then symbol ascending.
+ llvm::sort(BySymbol, [](const auto &L, const auto &R) {
+ return std::tie(R.second, L.first) < std::tie(L.second, R.first);
+ });
+ }
+};
+
+} // namespace
+} // namespace pseudo
+} // namespace clang
+
int main(int argc, char *argv[]) {
llvm::cl::ParseCommandLineOptions(argc, argv, "");
llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
@@ -135,6 +165,17 @@ int main(int argc, char *argv[]) {
<< " nodes: " << Arena.nodeCount() << "\n";
llvm::outs() << "GSS bytes: " << GSS.bytes()
<< " nodes: " << GSS.nodesCreated() << "\n";
+
+ for (auto &P :
+ {std::make_pair("Ambiguous", clang::pseudo::ForestNode::Ambiguous),
+ std::make_pair("Opaque", clang::pseudo::ForestNode::Opaque)}) {
+ clang::pseudo::NodeStats Stats(
+ Root, [&](const auto &N) { return N.kind() == P.second; });
+ llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n";
+ for (const auto &S : Stats.BySymbol)
+ llvm::outs() << llvm::formatv(" {0,3} {1}\n", S.second,
+ G.symbolName(S.first));
+ }
}
}
}
diff --git a/clang-tools-extra/pseudo/unittests/ForestTest.cpp b/clang-tools-extra/pseudo/unittests/ForestTest.cpp
index ece5f6b3d50bf..b5a2b7e9a3364 100644
--- a/clang-tools-extra/pseudo/unittests/ForestTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/ForestTest.cpp
@@ -151,6 +151,30 @@ TEST_F(ForestTest, DumpAbbreviatedShared) {
"[ 0, end) └─A~B =#1\n");
}
+TEST_F(ForestTest, Iteration) {
+ // Z
+ // / \
+ // X Y
+ // |\|
+ // A B
+ ForestArena Arena;
+ const auto *A = &Arena.createTerminal(tok::identifier, 0);
+ const auto *B = &Arena.createOpaque(1, 0);
+ const auto *X = &Arena.createSequence(2, 1, {A, B});
+ const auto *Y = &Arena.createSequence(2, 2, {B});
+ const auto *Z = &Arena.createAmbiguous(2, {X, Y});
+
+ std::vector<const ForestNode *> Nodes;
+ for (const ForestNode &N : Z->descendants())
+ Nodes.push_back(&N);
+ EXPECT_THAT(Nodes, testing::UnorderedElementsAre(A, B, X, Y, Z));
+
+ Nodes.clear();
+ for (const ForestNode &N : X->descendants())
+ Nodes.push_back(&N);
+ EXPECT_THAT(Nodes, testing::UnorderedElementsAre(X, A, B));
+}
+
} // namespace
} // namespace pseudo
} // namespace clang
More information about the cfe-commits
mailing list