[PATCH] D128307: [pseudo] Store reduction sequences by pointer in heaps, instead of by value.

Sam McCall via Phabricator via cfe-commits Thu, 23 Jun 2022 10:41:41 -0700

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
sammccall marked an inline comment as done.
Closed by commit rG7aff663b2a04: [pseudo] Store reduction sequences by pointer 
in heaps, instead of by value. (authored by sammccall).


Changed prior to commit:
  https://reviews.llvm.org/D128307?vs=438851&id=439462#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128307/new/

https://reviews.llvm.org/D128307

Files:
  clang-tools-extra/pseudo/lib/GLR.cpp


Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -192,13 +192,32 @@
   };
 
   // A sequence is the ForestNode payloads of the GSS nodes we are reducing.
-  // These are the RHS of the rule, the RuleID is stored in the Family.
-  // They specify a sequence ForestNode we may build (but we dedup first).
   using Sequence = llvm::SmallVector<const ForestNode *, Rule::MaxElements>;
+  // Like ArrayRef<const ForestNode*>, but with the missing operator<.
+  // (Sequences are big to move by value as the collections gets rearranged).
+  struct SequenceRef {
+    SequenceRef(const Sequence &S) : S(S) {}
+    llvm::ArrayRef<const ForestNode *> S;
+    friend bool operator==(SequenceRef A, SequenceRef B) { return A.S == B.S; }
+    friend bool operator<(const SequenceRef &A, const SequenceRef &B) {
+      return std::lexicographical_compare(A.S.begin(), A.S.end(), B.S.begin(),
+                                          B.S.end());
+    }
+  };
+  // Underlying storage for sequences pointed to by stored SequenceRefs.
+  std::deque<Sequence> SequenceStorage;
+  // We don't actually destroy the sequences between calls, to reuse storage.
+  // Everything SequenceStorage[ >=SequenceStorageCount ] is reusable scratch.
+  unsigned SequenceStorageCount;
+
+  // Halfway through a reduction (after the pop, before the push), we have
+  // collected nodes for the RHS of a rule, and reached a base node.
+  // They specify a sequence ForestNode we may build (but we dedup first).
+  // (The RuleID is not stored here, but rather in the Family).
   struct PushSpec {
     // A base node is the head after popping the GSS nodes we are reducing.
     const GSS::Node* Base = nullptr;
-    Sequence Seq;
+    Sequence *Seq = nullptr;
   };
   KeyedQueue<Family, PushSpec> Sequences; // FIXME: rename => PendingPushes?
 
@@ -219,6 +238,7 @@
     this->Heads = &Heads;
     this->Lookahead = Lookahead;
     assert(Sequences.empty());
+    SequenceStorageCount = 0;
 
     popPending();
     while (!Sequences.empty()) {
@@ -239,7 +259,14 @@
       if (I == Rule.Size) {
         F.Start = TempSequence.front()->startTokenIndex();
         LLVM_DEBUG(llvm::dbgs() << "    --> base at S" << N->State << "\n");
-        Sequences.emplace(F, PushSpec{N, TempSequence});
+
+        // Copy the chain to stable storage so it can be enqueued.
+        if (SequenceStorageCount == SequenceStorage.size())
+          SequenceStorage.emplace_back();
+        SequenceStorage[SequenceStorageCount] = TempSequence;
+        Sequence *Seq = &SequenceStorage[SequenceStorageCount++];
+
+        Sequences.emplace(F, PushSpec{N, Seq});
         return;
       }
       TempSequence[Rule.Size - 1 - I] = N->Payload;
@@ -266,7 +293,7 @@
 
   // Storage reused by each call to pushNext.
   std::vector<std::pair</*Goto*/ StateID, const GSS::Node *>> FamilyBases;
-  std::vector<std::pair<RuleID, Sequence>> FamilySequences;
+  std::vector<std::pair<RuleID, SequenceRef>> FamilySequences;
   std::vector<const GSS::Node *> Parents;
   std::vector<const ForestNode *> SequenceNodes;
 
@@ -287,7 +314,7 @@
     FamilyBases.clear();
     do {
       FamilySequences.emplace_back(Sequences.top().first.Rule,
-                                   Sequences.top().second.Seq);
+                                   *Sequences.top().second.Seq);
       FamilyBases.emplace_back(
           Params.Table.getGoToState(Sequences.top().second.Base->State,
                                     F.Symbol),
@@ -300,7 +327,7 @@
     SequenceNodes.clear();
     for (const auto &SequenceSpec : FamilySequences)
       SequenceNodes.push_back(&Params.Forest.createSequence(
-          F.Symbol, SequenceSpec.first, SequenceSpec.second));
+          F.Symbol, SequenceSpec.first, SequenceSpec.second.S));
     // Wrap in an ambiguous node if needed.
     const ForestNode *Parsed =
         SequenceNodes.size() == 1

Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -192,13 +192,32 @@
   };
 
   // A sequence is the ForestNode payloads of the GSS nodes we are reducing.
-  // These are the RHS of the rule, the RuleID is stored in the Family.
-  // They specify a sequence ForestNode we may build (but we dedup first).
   using Sequence = llvm::SmallVector<const ForestNode *, Rule::MaxElements>;
+  // Like ArrayRef<const ForestNode*>, but with the missing operator<.
+  // (Sequences are big to move by value as the collections gets rearranged).
+  struct SequenceRef {
+    SequenceRef(const Sequence &S) : S(S) {}
+    llvm::ArrayRef<const ForestNode *> S;
+    friend bool operator==(SequenceRef A, SequenceRef B) { return A.S == B.S; }
+    friend bool operator<(const SequenceRef &A, const SequenceRef &B) {
+      return std::lexicographical_compare(A.S.begin(), A.S.end(), B.S.begin(),
+                                          B.S.end());
+    }
+  };
+  // Underlying storage for sequences pointed to by stored SequenceRefs.
+  std::deque<Sequence> SequenceStorage;
+  // We don't actually destroy the sequences between calls, to reuse storage.
+  // Everything SequenceStorage[ >=SequenceStorageCount ] is reusable scratch.
+  unsigned SequenceStorageCount;
+
+  // Halfway through a reduction (after the pop, before the push), we have
+  // collected nodes for the RHS of a rule, and reached a base node.
+  // They specify a sequence ForestNode we may build (but we dedup first).
+  // (The RuleID is not stored here, but rather in the Family).
   struct PushSpec {
     // A base node is the head after popping the GSS nodes we are reducing.
     const GSS::Node* Base = nullptr;
-    Sequence Seq;
+    Sequence *Seq = nullptr;
   };
   KeyedQueue<Family, PushSpec> Sequences; // FIXME: rename => PendingPushes?
 
@@ -219,6 +238,7 @@
     this->Heads = &Heads;
     this->Lookahead = Lookahead;
     assert(Sequences.empty());
+    SequenceStorageCount = 0;
 
     popPending();
     while (!Sequences.empty()) {
@@ -239,7 +259,14 @@
       if (I == Rule.Size) {
         F.Start = TempSequence.front()->startTokenIndex();
         LLVM_DEBUG(llvm::dbgs() << "    --> base at S" << N->State << "\n");
-        Sequences.emplace(F, PushSpec{N, TempSequence});
+
+        // Copy the chain to stable storage so it can be enqueued.
+        if (SequenceStorageCount == SequenceStorage.size())
+          SequenceStorage.emplace_back();
+        SequenceStorage[SequenceStorageCount] = TempSequence;
+        Sequence *Seq = &SequenceStorage[SequenceStorageCount++];
+
+        Sequences.emplace(F, PushSpec{N, Seq});
         return;
       }
       TempSequence[Rule.Size - 1 - I] = N->Payload;
@@ -266,7 +293,7 @@
 
   // Storage reused by each call to pushNext.
   std::vector<std::pair</*Goto*/ StateID, const GSS::Node *>> FamilyBases;
-  std::vector<std::pair<RuleID, Sequence>> FamilySequences;
+  std::vector<std::pair<RuleID, SequenceRef>> FamilySequences;
   std::vector<const GSS::Node *> Parents;
   std::vector<const ForestNode *> SequenceNodes;
 
@@ -287,7 +314,7 @@
     FamilyBases.clear();
     do {
       FamilySequences.emplace_back(Sequences.top().first.Rule,
-                                   Sequences.top().second.Seq);
+                                   *Sequences.top().second.Seq);
       FamilyBases.emplace_back(
           Params.Table.getGoToState(Sequences.top().second.Base->State,
                                     F.Symbol),
@@ -300,7 +327,7 @@
     SequenceNodes.clear();
     for (const auto &SequenceSpec : FamilySequences)
       SequenceNodes.push_back(&Params.Forest.createSequence(
-          F.Symbol, SequenceSpec.first, SequenceSpec.second));
+          F.Symbol, SequenceSpec.first, SequenceSpec.second.S));
     // Wrap in an ambiguous node if needed.
     const ForestNode *Parsed =
         SequenceNodes.size() == 1

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D128307: [pseudo] Store reduction sequences by pointer in heaps, instead of by value.

Reply via email to