[clang-tools-extra] bc5e7ce - [pseudo] Fix bugs/inconsistencies in forest dump.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 29 13:57:55 PDT 2022


Author: Sam McCall
Date: 2022-06-29T22:56:26+02:00
New Revision: bc5e7ced1c7e6155e9d527f88a657793ffad4761

URL: https://github.com/llvm/llvm-project/commit/bc5e7ced1c7e6155e9d527f88a657793ffad4761
DIFF: https://github.com/llvm/llvm-project/commit/bc5e7ced1c7e6155e9d527f88a657793ffad4761.diff

LOG: [pseudo] Fix bugs/inconsistencies in forest dump.

- when printing a shared node for the second time, don't print its children
  (This keeps output proportional to the size of the structure)
- when printing a shared node for the second time, print its type only, not rule
  (for consistency with above: don't dump details of nodes twice)
- don't abbreviate shared nodes, to ensure we can prune the tree there

Differential Revision: https://reviews.llvm.org/D128805

Added: 
    

Modified: 
    clang-tools-extra/pseudo/lib/Forest.cpp
    clang-tools-extra/pseudo/test/glr.cpp
    clang-tools-extra/pseudo/unittests/ForestTest.cpp
    clang-tools-extra/pseudo/unittests/GLRTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp
index 3fd36d781beba..d9086c8dea5cd 100644
--- a/clang-tools-extra/pseudo/lib/Forest.cpp
+++ b/clang-tools-extra/pseudo/lib/Forest.cpp
@@ -63,6 +63,7 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
       Dump = [&](const ForestNode *P, Token::Index End,
                  llvm::Optional<SymbolID> ElidedParent,
                  LineDecoration LineDec) {
+        bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
         llvm::ArrayRef<const ForestNode *> Children;
         auto EndOfElement = [&](size_t ChildIndex) {
           return ChildIndex + 1 == Children.size()
@@ -74,7 +75,14 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
         } else if (P->kind() == Sequence) {
           Children = P->elements();
           if (Abbreviated) {
-            if (Children.size() == 1) {
+            // Abbreviate chains of trivial sequence nodes.
+            //    A := B, B := C, C := D, D := X Y Z
+            // becomes
+            //    A~D := X Y Z
+            //
+            // We can't hide nodes that appear multiple times in the tree,
+            // because we need to call out their identity with IDs.
+            if (Children.size() == 1 && !SharedNode) {
               assert(Children[0]->startTokenIndex() == P->startTokenIndex() &&
                      EndOfElement(0) == End);
               return Dump(Children[0], End,
@@ -94,14 +102,21 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
           Result += G.symbolName(*ElidedParent);
           Result += "~";
         }
-        Result.append(P->dump(G));
 
-        if (VisitCounts.find(P)->getSecond() > 1 &&
-            P->kind() != ForestNode::Terminal) {
-          // The first time, print as #1. Later, =#1.
+        if (SharedNode && P->kind() != ForestNode::Terminal) {
           auto It = ReferenceIds.try_emplace(P, ReferenceIds.size() + 1);
-          Result +=
-              llvm::formatv(" {0}#{1}", It.second ? "" : "=", It.first->second);
+          bool First = It.second;
+          unsigned ID = It.first->second;
+
+          // The first time, print as #1. Later, =#1.
+          if (First) {
+            Result += llvm::formatv("{0} #{1}", P->dump(G), ID);
+          } else {
+            Result += llvm::formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
+            Children = {}; // Don't walk the children again.
+          }
+        } else {
+          Result.append(P->dump(G));
         }
         Result.push_back('\n');
 

diff  --git a/clang-tools-extra/pseudo/test/glr.cpp b/clang-tools-extra/pseudo/test/glr.cpp
index 8817462d7d83e..2df6fd7405597 100644
--- a/clang-tools-extra/pseudo/test/glr.cpp
+++ b/clang-tools-extra/pseudo/test/glr.cpp
@@ -7,7 +7,8 @@ void foo() {
 // CHECK-NEXT: │ ├─expression~multiplicative-expression := multiplicative-expression * pm-expression
 // CHECK-NEXT: │ │ ├─multiplicative-expression~IDENTIFIER := tok[5]
 // CHECK-NEXT: │ │ ├─* := tok[6]
-// CHECK-NEXT: │ │ └─pm-expression~IDENTIFIER := tok[7]
+// CHECK-NEXT: │ │ └─pm-expression~id-expression := unqualified-id #1
+// CHECK-NEXT: │ │   └─unqualified-id~IDENTIFIER := tok[7]
 // CHECK-NEXT: │ └─; := tok[8]
 // CHECK-NEXT: └─statement~simple-declaration := decl-specifier-seq init-declarator-list ;
 // CHECK-NEXT:   ├─decl-specifier-seq~simple-type-specifier := <ambiguous>
@@ -18,6 +19,6 @@ void foo() {
 // CHECK-NEXT:   │ └─simple-type-specifier~IDENTIFIER := tok[5]
 // CHECK-NEXT:   ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator
 // CHECK-NEXT:   │ ├─ptr-operator~* := tok[6]
-// CHECK-NEXT:   │ └─ptr-declarator~IDENTIFIER := tok[7]
+// CHECK-NEXT:   │ └─ptr-declarator~id-expression =#1
 // CHECK-NEXT:   └─; := tok[8]
 }

diff  --git a/clang-tools-extra/pseudo/unittests/ForestTest.cpp b/clang-tools-extra/pseudo/unittests/ForestTest.cpp
index ae038f95db198..ece5f6b3d50bf 100644
--- a/clang-tools-extra/pseudo/unittests/ForestTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/ForestTest.cpp
@@ -122,8 +122,33 @@ TEST_F(ForestTest, DumpAmbiguousAndRefs) {
             "[  0, end) │     └─IDENTIFIER := tok[0]\n"
             "[  0, end) └─type := enum-type\n"
             "[  0, end)   └─enum-type := shared-type\n"
-            "[  0, end)     └─shared-type := IDENTIFIER =#1\n"
-            "[  0, end)       └─IDENTIFIER := tok[0]\n");
+            "[  0, end)     └─shared-type =#1\n");
+}
+
+TEST_F(ForestTest, DumpAbbreviatedShared) {
+  build(R"cpp(
+    _ := A
+    A := B
+    B := *
+  )cpp");
+
+  ForestArena Arena;
+  const auto *Star = &Arena.createTerminal(tok::star, 0);
+
+  const auto *B = &Arena.createSequence(symbol("B"), ruleFor("B"), {Star});
+  // We have two identical (but distinct) A nodes.
+  // The GLR parser would never produce this, but it makes the example simpler.
+  const auto *A1 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B});
+  const auto *A2 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B});
+  const auto *A = &Arena.createAmbiguous(symbol("A"), {A1, A2});
+
+  // We must not abbreviate away shared nodes: if we show A~* there's no way to
+  // show that the intermediate B node is shared between A1 and A2.
+  EXPECT_EQ(A->dumpRecursive(G, /*Abbreviate=*/true),
+            "[  0, end) A := <ambiguous>\n"
+            "[  0, end) ├─A~B := * #1\n"
+            "[  0, end) │ └─* := tok[0]\n"
+            "[  0, end) └─A~B =#1\n");
 }
 
 } // namespace

diff  --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index 846f2dbf16773..f061fb3a399e3 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -399,8 +399,7 @@ TEST_F(GLRTest, PerfectForestNodeSharing) {
                                      "[  0, end) └─test := left-paren expr\n"
                                      "[  0,   1)   ├─left-paren := {\n"
                                      "[  0,   1)   │ └─{ := tok[0]\n"
-                                     "[  1, end)   └─expr := IDENTIFIER =#1\n"
-                                     "[  1, end)     └─IDENTIFIER := tok[1]\n");
+                                     "[  1, end)   └─expr =#1\n");
 }
 
 TEST_F(GLRTest, GLRReduceOrder) {


        


More information about the cfe-commits mailing list