This revision was automatically updated to reflect the committed changes.
sammccall marked an inline comment as done.
Closed by commit rGbc5e7ced1c7e: [pseudo] Fix bugs/inconsistencies in forest 
dump. (authored by sammccall).

Changed prior to commit:
  https://reviews.llvm.org/D128805?vs=440939&id=441160#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128805/new/

https://reviews.llvm.org/D128805

Files:
  clang-tools-extra/pseudo/lib/Forest.cpp
  clang-tools-extra/pseudo/test/glr.cpp
  clang-tools-extra/pseudo/unittests/ForestTest.cpp
  clang-tools-extra/pseudo/unittests/GLRTest.cpp

Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -399,8 +399,7 @@
                                      "[  0, end) └─test := left-paren expr\n"
                                      "[  0,   1)   ├─left-paren := {\n"
                                      "[  0,   1)   │ └─{ := tok[0]\n"
-                                     "[  1, end)   └─expr := IDENTIFIER =#1\n"
-                                     "[  1, end)     └─IDENTIFIER := tok[1]\n");
+                                     "[  1, end)   └─expr =#1\n");
 }
 
 TEST_F(GLRTest, GLRReduceOrder) {
Index: clang-tools-extra/pseudo/unittests/ForestTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/ForestTest.cpp
+++ clang-tools-extra/pseudo/unittests/ForestTest.cpp
@@ -122,8 +122,33 @@
             "[  0, end) │     └─IDENTIFIER := tok[0]\n"
             "[  0, end) └─type := enum-type\n"
             "[  0, end)   └─enum-type := shared-type\n"
-            "[  0, end)     └─shared-type := IDENTIFIER =#1\n"
-            "[  0, end)       └─IDENTIFIER := tok[0]\n");
+            "[  0, end)     └─shared-type =#1\n");
+}
+
+TEST_F(ForestTest, DumpAbbreviatedShared) {
+  build(R"cpp(
+    _ := A
+    A := B
+    B := *
+  )cpp");
+
+  ForestArena Arena;
+  const auto *Star = &Arena.createTerminal(tok::star, 0);
+
+  const auto *B = &Arena.createSequence(symbol("B"), ruleFor("B"), {Star});
+  // We have two identical (but distinct) A nodes.
+  // The GLR parser would never produce this, but it makes the example simpler.
+  const auto *A1 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B});
+  const auto *A2 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B});
+  const auto *A = &Arena.createAmbiguous(symbol("A"), {A1, A2});
+
+  // We must not abbreviate away shared nodes: if we show A~* there's no way to
+  // show that the intermediate B node is shared between A1 and A2.
+  EXPECT_EQ(A->dumpRecursive(G, /*Abbreviate=*/true),
+            "[  0, end) A := <ambiguous>\n"
+            "[  0, end) ├─A~B := * #1\n"
+            "[  0, end) │ └─* := tok[0]\n"
+            "[  0, end) └─A~B =#1\n");
 }
 
 } // namespace
Index: clang-tools-extra/pseudo/test/glr.cpp
===================================================================
--- clang-tools-extra/pseudo/test/glr.cpp
+++ clang-tools-extra/pseudo/test/glr.cpp
@@ -7,7 +7,8 @@
 // CHECK-NEXT: │ ├─expression~multiplicative-expression := multiplicative-expression * pm-expression
 // CHECK-NEXT: │ │ ├─multiplicative-expression~IDENTIFIER := tok[5]
 // CHECK-NEXT: │ │ ├─* := tok[6]
-// CHECK-NEXT: │ │ └─pm-expression~IDENTIFIER := tok[7]
+// CHECK-NEXT: │ │ └─pm-expression~id-expression := unqualified-id #1
+// CHECK-NEXT: │ │   └─unqualified-id~IDENTIFIER := tok[7]
 // CHECK-NEXT: │ └─; := tok[8]
 // CHECK-NEXT: └─statement~simple-declaration := decl-specifier-seq init-declarator-list ;
 // CHECK-NEXT:   ├─decl-specifier-seq~simple-type-specifier := <ambiguous>
@@ -18,6 +19,6 @@
 // CHECK-NEXT:   │ └─simple-type-specifier~IDENTIFIER := tok[5]
 // CHECK-NEXT:   ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator
 // CHECK-NEXT:   │ ├─ptr-operator~* := tok[6]
-// CHECK-NEXT:   │ └─ptr-declarator~IDENTIFIER := tok[7]
+// CHECK-NEXT:   │ └─ptr-declarator~id-expression =#1
 // CHECK-NEXT:   └─; := tok[8]
 }
Index: clang-tools-extra/pseudo/lib/Forest.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/Forest.cpp
+++ clang-tools-extra/pseudo/lib/Forest.cpp
@@ -63,6 +63,7 @@
       Dump = [&](const ForestNode *P, Token::Index End,
                  llvm::Optional<SymbolID> ElidedParent,
                  LineDecoration LineDec) {
+        bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
         llvm::ArrayRef<const ForestNode *> Children;
         auto EndOfElement = [&](size_t ChildIndex) {
           return ChildIndex + 1 == Children.size()
@@ -74,7 +75,14 @@
         } else if (P->kind() == Sequence) {
           Children = P->elements();
           if (Abbreviated) {
-            if (Children.size() == 1) {
+            // Abbreviate chains of trivial sequence nodes.
+            //    A := B, B := C, C := D, D := X Y Z
+            // becomes
+            //    A~D := X Y Z
+            //
+            // We can't hide nodes that appear multiple times in the tree,
+            // because we need to call out their identity with IDs.
+            if (Children.size() == 1 && !SharedNode) {
               assert(Children[0]->startTokenIndex() == P->startTokenIndex() &&
                      EndOfElement(0) == End);
               return Dump(Children[0], End,
@@ -94,14 +102,21 @@
           Result += G.symbolName(*ElidedParent);
           Result += "~";
         }
-        Result.append(P->dump(G));
 
-        if (VisitCounts.find(P)->getSecond() > 1 &&
-            P->kind() != ForestNode::Terminal) {
-          // The first time, print as #1. Later, =#1.
+        if (SharedNode && P->kind() != ForestNode::Terminal) {
           auto It = ReferenceIds.try_emplace(P, ReferenceIds.size() + 1);
-          Result +=
-              llvm::formatv(" {0}#{1}", It.second ? "" : "=", It.first->second);
+          bool First = It.second;
+          unsigned ID = It.first->second;
+
+          // The first time, print as #1. Later, =#1.
+          if (First) {
+            Result += llvm::formatv("{0} #{1}", P->dump(G), ID);
+          } else {
+            Result += llvm::formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
+            Children = {}; // Don't walk the children again.
+          }
+        } else {
+          Result.append(P->dump(G));
         }
         Result.push_back('\n');
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to