[PATCH] D130199: [pseudo] Add ambiguity & unparseability metrics to -print-statistics
Sam McCall via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 20 13:38:01 PDT 2022
sammccall created this revision.
sammccall added a reviewer: usaxena95.
Herald added a project: All.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999.
Herald added a project: clang-tools-extra.
These can be used to quantify parsing improvements from a change.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D130199
Files:
clang-tools-extra/pseudo/test/glr.cpp
clang-tools-extra/pseudo/tool/ClangPseudo.cpp
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===================================================================
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -24,6 +24,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Signals.h"
+using clang::pseudo::ForestNode;
+using clang::pseudo::Token;
using clang::pseudo::TokenStream;
using llvm::cl::desc;
using llvm::cl::init;
@@ -172,9 +174,8 @@
llvm::outs() << "GSS bytes: " << GSS.bytes()
<< " nodes: " << GSS.nodesCreated() << "\n";
- for (auto &P :
- {std::make_pair("Ambiguous", clang::pseudo::ForestNode::Ambiguous),
- std::make_pair("Opaque", clang::pseudo::ForestNode::Opaque)}) {
+ for (auto &P : {std::make_pair("Ambiguous", ForestNode::Ambiguous),
+ std::make_pair("Opaque", ForestNode::Opaque)}) {
clang::pseudo::NodeStats Stats(
Root, [&](const auto &N) { return N.kind() == P.second; });
llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n";
@@ -182,6 +183,39 @@
llvm::outs() << llvm::formatv(" {0,3} {1}\n", S.second,
Lang.G.symbolName(S.first));
}
+
+ // Metrics for how imprecise parsing was.
+ // These are rough but aim to be:
+ // - linear: if we eliminate half the errors the metric should halve
+ // - length-independent
+ unsigned UnparsedTokens = 0; // Tokens covered by Opaque. (not unique)
+ unsigned Misparses = 0; // Sum of alternatives-1
+ llvm::DenseSet<const ForestNode *> Visited;
+ auto DFS = [&](const ForestNode &N, Token::Index End, auto &DFS) -> void {
+ if (N.kind() == ForestNode::Opaque) {
+ UnparsedTokens += End - N.startTokenIndex();
+ } else if (N.kind() == ForestNode::Ambiguous) {
+ Misparses += N.alternatives().size() - 1;
+ for (const auto *C : N.alternatives())
+ if (Visited.insert(C).second)
+ DFS(*C, End, DFS);
+ } else if (N.kind() == ForestNode::Sequence) {
+ for (unsigned I = 0, E = N.children().size(); I < E; ++I)
+ if (Visited.insert(N.children()[I]).second)
+ DFS(*N.children()[I],
+ I + 1 == N.children().size()
+ ? End
+ : N.children()[I + 1]->startTokenIndex(),
+ DFS);
+ }
+ };
+ unsigned Len = ParseableStream->tokens().size();
+ DFS(Root, Len, DFS);
+ llvm::outs() << "\n";
+ llvm::outs() << llvm::formatv("Ambiguity: {0} misparses/token\n",
+ double(Misparses) / Len);
+ llvm::outs() << llvm::formatv("Unparsed: {0}%\n",
+ 100.0 * UnparsedTokens / Len);
}
}
Index: clang-tools-extra/pseudo/test/glr.cpp
===================================================================
--- clang-tools-extra/pseudo/test/glr.cpp
+++ clang-tools-extra/pseudo/test/glr.cpp
@@ -29,3 +29,6 @@
// CHECK-NEXT: 1 type-name
// CHECK-EMPTY:
// CHECK-NEXT: 0 Opaque nodes:
+// CHECK-EMPTY:
+// CHECK-NEXT: Ambiguity: 0.40 misparses/token
+// CHECK-NEXT: Unparsed: 0.00%
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D130199.446253.patch
Type: text/x-patch
Size: 3281 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20220720/c426a68c/attachment-0001.bin>
More information about the cfe-commits
mailing list