This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG85eaecbe8e54: [pseudo] Check follow-sets instead of tying
reduce actions to lookahead tokens. (authored by sammccall).
Changed prior to commit:
https://reviews.llvm.org/D128472?vs=440415&id=440417#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D128472/new/
https://reviews.llvm.org/D128472
Files:
clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
clang-tools-extra/pseudo/lib/GLR.cpp
clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
clang-tools-extra/pseudo/test/lr-build-basic.test
clang-tools-extra/pseudo/test/lr-build-conflicts.test
clang-tools-extra/pseudo/unittests/GLRTest.cpp
clang-tools-extra/pseudo/unittests/LRTableTest.cpp
Index: clang-tools-extra/pseudo/unittests/LRTableTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/LRTableTest.cpp
+++ clang-tools-extra/pseudo/unittests/LRTableTest.cpp
@@ -9,6 +9,7 @@
#include "clang-pseudo/grammar/LRTable.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang/Basic/TokenKinds.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <vector>
@@ -17,36 +18,59 @@
namespace pseudo {
namespace {
-using testing::IsEmpty;
-using testing::UnorderedElementsAre;
+using llvm::ValueIs;
+using testing::ElementsAre;
using Action = LRTable::Action;
TEST(LRTable, Builder) {
- GrammarTable GTable;
-
- // eof semi ...
- // +-------+----+-------+---
- // |state0 | | s0,r0 |...
- // |state1 | acc| |...
- // |state2 | | r1 |...
- // +-------+----+-------+---
+ std::vector<std::string> GrammarDiags;
+ auto G = Grammar::parseBNF(R"bnf(
+ _ := expr # rule 0
+ expr := term # rule 1
+ expr := expr + term # rule 2
+ term := IDENTIFIER # rule 3
+ )bnf",
+ GrammarDiags);
+ EXPECT_THAT(GrammarDiags, testing::IsEmpty());
+
+ SymbolID Term = *G->findNonterminal("term");
+ SymbolID Eof = tokenSymbol(tok::eof);
+ SymbolID Identifier = tokenSymbol(tok::identifier);
+ SymbolID Plus = tokenSymbol(tok::plus);
+
+ // eof IDENT term
+ // +-------+----+-------+------+
+ // |state0 | | s0 | |
+ // |state1 | | | g3 |
+ // |state2 | | | |
+ // +-------+----+-------+------+-------
std::vector<LRTable::Entry> Entries = {
- {/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)},
- {/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)},
- {/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)},
- {/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}};
- GrammarTable GT;
- LRTable T = LRTable::buildForTests(GT, Entries);
- EXPECT_THAT(T.find(0, tokenSymbol(tok::eof)), IsEmpty());
- EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),
- UnorderedElementsAre(Action::shift(0), Action::reduce(0)));
- EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),
- UnorderedElementsAre(Action::reduce(2)));
- EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());
- EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
- UnorderedElementsAre(Action::reduce(1)));
+ {/* State */ 0, Identifier, Action::shift(0)},
+ {/* State */ 1, Term, Action::goTo(3)},
+ };
+ std::vector<LRTable::ReduceEntry> ReduceEntries = {
+ {/*State=*/0, /*Rule=*/0},
+ {/*State=*/1, /*Rule=*/2},
+ {/*State=*/2, /*Rule=*/1},
+ };
+ LRTable T = LRTable::buildForTests(*G, Entries, ReduceEntries);
+ EXPECT_EQ(T.getShiftState(0, Eof), llvm::None);
+ EXPECT_THAT(T.getShiftState(0, Identifier), ValueIs(0));
+ EXPECT_THAT(T.getReduceRules(0), ElementsAre(0));
+
+ EXPECT_EQ(T.getShiftState(1, Eof), llvm::None);
+ EXPECT_EQ(T.getShiftState(1, Identifier), llvm::None);
+ EXPECT_EQ(T.getGoToState(1, Term), 3);
+ EXPECT_THAT(T.getReduceRules(1), ElementsAre(2));
+
// Verify the behaivor for other non-available-actions terminals.
- EXPECT_THAT(T.find(2, tokenSymbol(tok::kw_int)), IsEmpty());
+ SymbolID Int = tokenSymbol(tok::kw_int);
+ EXPECT_EQ(T.getShiftState(2, Int), llvm::None);
+
+ // Check follow sets.
+ EXPECT_TRUE(T.canFollow(Term, Plus));
+ EXPECT_TRUE(T.canFollow(Term, Eof));
+ EXPECT_FALSE(T.canFollow(Term, Int));
}
} // namespace
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -31,6 +31,7 @@
using Action = LRTable::Action;
using testing::AllOf;
+using testing::ElementsAre;
using testing::UnorderedElementsAre;
MATCHER_P(state, StateID, "") { return arg->State == StateID; }
@@ -112,11 +113,13 @@
buildGrammar({}, {}); // Create a fake empty grammar.
LRTable T =
- LRTable::buildForTests(G->table(), /*Entries=*/{
+ LRTable::buildForTests(*G, /*Entries=*/
+ {
{1, tokenSymbol(tok::semi), Action::shift(4)},
{2, tokenSymbol(tok::semi), Action::shift(4)},
{3, tokenSymbol(tok::semi), Action::shift(5)},
- });
+ },
+ {});
ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
std::vector<const GSS::Node *> NewHeads;
@@ -142,14 +145,15 @@
{"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
LRTable Table = LRTable::buildForTests(
- G->table(), {
- {/*State=*/0, id("class-name"), Action::goTo(2)},
- {/*State=*/0, id("enum-name"), Action::goTo(3)},
- {/*State=*/1, tokenSymbol(tok::l_brace),
- Action::reduce(ruleFor("class-name"))},
- {/*State=*/1, tokenSymbol(tok::l_brace),
- Action::reduce(ruleFor("enum-name"))},
- });
+ *G,
+ {
+ {/*State=*/0, id("class-name"), Action::goTo(2)},
+ {/*State=*/0, id("enum-name"), Action::goTo(3)},
+ },
+ {
+ {/*State=*/1, ruleFor("class-name")},
+ {/*State=*/1, ruleFor("enum-name")},
+ });
const auto *GSSNode0 =
GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{});
@@ -157,7 +161,7 @@
GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});
std::vector<const GSS::Node *> Heads = {GSSNode1};
- glrReduce(Heads, tokenSymbol(tok::l_brace), {*G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof), {*G, Table, Arena, GSStack});
EXPECT_THAT(Heads, UnorderedElementsAre(
GSSNode1,
AllOf(state(2), parsedSymbolID(id("class-name")),
@@ -189,15 +193,16 @@
/*Parents=*/{GSSNode2, GSSNode3});
LRTable Table = LRTable::buildForTests(
- G->table(),
+ *G,
{
{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
{/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)},
- {/*State=*/4, tokenSymbol(tok::identifier),
- Action::reduce(ruleFor("ptr-operator"))},
+ },
+ {
+ {/*State=*/4, ruleFor("ptr-operator")},
});
std::vector<const GSS::Node *> Heads = {GSSNode4};
- glrReduce(Heads, tokenSymbol(tok::identifier), {*G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof), {*G, Table, Arena, GSStack});
EXPECT_THAT(Heads, UnorderedElementsAre(
GSSNode4,
@@ -242,17 +247,17 @@
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
LRTable Table = LRTable::buildForTests(
- G->table(),
+ *G,
{
{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
{/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)},
- {/*State=*/3, tokenSymbol(tok::l_paren),
- Action::reduce(/* type-name := class-name */ 0)},
- {/*State=*/4, tokenSymbol(tok::l_paren),
- Action::reduce(/* type-name := enum-name */ 1)},
+ },
+ {
+ {/*State=*/3, /* type-name := class-name */ 0},
+ {/*State=*/4, /* type-name := enum-name */ 1},
});
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
- glrReduce(Heads, tokenSymbol(tok::l_paren), {*G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof), {*G, Table, Arena, GSStack});
// Verify that the stack heads are joint at state 5 after reduces.
EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@@ -299,16 +304,17 @@
/*Parents=*/{GSSNode2});
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
- LRTable Table = LRTable::buildForTests(
- G->table(), {
- {/*State=*/0, id("pointer"), Action::goTo(5)},
- {3, tokenSymbol(tok::l_paren),
- Action::reduce(/* pointer := class-name */ 0)},
- {4, tokenSymbol(tok::l_paren),
- Action::reduce(/* pointer := enum-name */ 1)},
- });
+ LRTable Table =
+ LRTable::buildForTests(*G,
+ {
+ {/*State=*/0, id("pointer"), Action::goTo(5)},
+ },
+ {
+ {3, /* pointer := class-name */ 0},
+ {4, /* pointer := enum-name */ 1},
+ });
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
- glrReduce(Heads, tokenSymbol(tok::l_paren), {*G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof), {*G, Table, Arena, GSStack});
EXPECT_THAT(
Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@@ -325,6 +331,38 @@
"[ 1, end) ââ* := tok[1]\n");
}
+TEST_F(GLRTest, ReduceLookahead) {
+ // A term can be followed by +, but not by -.
+ buildGrammar({"sum", "term"}, {"expr := term + term", "term := IDENTIFIER"});
+ LRTable Table =
+ LRTable::buildForTests(*G,
+ {
+ {/*State=*/0, id("term"), Action::goTo(2)},
+ },
+ {
+ {/*State=*/1, 0},
+ });
+
+ auto *Identifier = &Arena.createTerminal(tok::identifier, /*Start=*/0);
+
+ const auto *Root =
+ GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{});
+ const auto *GSSNode1 =
+ GSStack.addNode(/*State=*/1, /*ForestNode=*/Identifier, {Root});
+
+ // When the lookahead is +, reduce is performed.
+ std::vector<const GSS::Node *> Heads = {GSSNode1};
+ glrReduce(Heads, tokenSymbol(tok::plus), {*G, Table, Arena, GSStack});
+ EXPECT_THAT(Heads,
+ ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")),
+ parents(Root))));
+
+ // When the lookahead is -, reduce is not performed.
+ Heads = {GSSNode1};
+ glrReduce(Heads, tokenSymbol(tok::minus), {*G, Table, Arena, GSStack});
+ EXPECT_THAT(Heads, ElementsAre(GSSNode1));
+}
+
TEST_F(GLRTest, PerfectForestNodeSharing) {
// Run the GLR on a simple grammar and test that we build exactly one forest
// node per (SymbolID, token range).
Index: clang-tools-extra/pseudo/test/lr-build-conflicts.test
===================================================================
--- clang-tools-extra/pseudo/test/lr-build-conflicts.test
+++ clang-tools-extra/pseudo/test/lr-build-conflicts.test
@@ -30,17 +30,15 @@
# RUN: clang-pseudo -grammar %s -print-table | FileCheck %s --check-prefix=TABLE
# TABLE: LRTable:
# TABLE-NEXT: State 0
-# TABLE-NEXT: 'IDENTIFIER': shift state 2
-# TABLE-NEXT: 'expr': go to state 1
+# TABLE-NEXT: IDENTIFIER: shift state 2
+# TABLE-NEXT: expr: go to state 1
# TABLE-NEXT: State 1
-# TABLE-NEXT: '-': shift state 3
+# TABLE-NEXT: -: shift state 3
# TABLE-NEXT: State 2
-# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER'
-# TABLE-NEXT: '-': reduce by rule 1 'expr := IDENTIFIER'
+# TABLE-NEXT: EOF -: reduce by rule 1 'expr := IDENTIFIER'
# TABLE-NEXT: State 3
-# TABLE-NEXT: 'IDENTIFIER': shift state 2
-# TABLE-NEXT: 'expr': go to state 4
+# TABLE-NEXT: IDENTIFIER: shift state 2
+# TABLE-NEXT: expr: go to state 4
# TABLE-NEXT: State 4
-# TABLE-NEXT: 'EOF': reduce by rule 0 'expr := expr - expr'
-# TABLE-NEXT: '-': shift state 3
-# TABLE-NEXT: '-': reduce by rule 0 'expr := expr - expr'
+# TABLE-NEXT: -: shift state 3
+# TABLE-NEXT: EOF -: reduce by rule 0 'expr := expr - expr'
Index: clang-tools-extra/pseudo/test/lr-build-basic.test
===================================================================
--- clang-tools-extra/pseudo/test/lr-build-basic.test
+++ clang-tools-extra/pseudo/test/lr-build-basic.test
@@ -18,11 +18,11 @@
# RUN: clang-pseudo -grammar %s -print-table | FileCheck %s --check-prefix=TABLE
# TABLE: LRTable:
# TABLE-NEXT: State 0
-# TABLE-NEXT: 'IDENTIFIER': shift state 3
-# TABLE-NEXT: 'expr': go to state 1
-# TABLE-NEXT: 'id': go to state 2
+# TABLE-NEXT: IDENTIFIER: shift state 3
+# TABLE-NEXT: expr: go to state 1
+# TABLE-NEXT: id: go to state 2
# TABLE-NEXT: State 1
# TABLE-NEXT: State 2
-# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id'
+# TABLE-NEXT: EOF: reduce by rule 1 'expr := id'
# TABLE-NEXT: State 3
-# TABLE-NEXT: 'EOF': reduce by rule 0 'id := IDENTIFIER'
+# TABLE-NEXT: EOF: reduce by rule 0 'id := IDENTIFIER'
Index: clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -10,6 +10,7 @@
#include "clang-pseudo/grammar/LRGraph.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/SmallSet.h"
#include <cstdint>
namespace llvm {
@@ -38,13 +39,13 @@
namespace clang {
namespace pseudo {
-class LRTable::Builder {
-public:
- Builder(llvm::ArrayRef<std::pair<SymbolID, StateID>> StartStates)
- : StartStates(StartStates) {}
+struct LRTable::Builder {
+ std::vector<std::pair<SymbolID, StateID>> StartStates;
+ llvm::DenseSet<Entry> Entries;
+ llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduces;
+ std::vector<llvm::DenseSet<SymbolID>> FollowSets;
- bool insert(Entry E) { return Entries.insert(std::move(E)).second; }
- LRTable build(const GrammarTable >, unsigned NumStates) && {
+ LRTable build(unsigned NumStates) && {
// E.g. given the following parsing table with 3 states and 3 terminals:
//
// a b c
@@ -86,16 +87,34 @@
++SortedIndex;
}
Table.StartStates = std::move(StartStates);
+
+ // Compile the follow sets into a bitmap.
+ Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size());
+ for (SymbolID NT = 0; NT < FollowSets.size(); ++NT)
+ for (SymbolID Follow : FollowSets[NT])
+ Table.FollowSets.set(NT * tok::NUM_TOKENS + symbolToToken(Follow));
+
+ // Store the reduce actions in a vector partitioned by state.
+ Table.ReduceOffset.reserve(NumStates + 1);
+ std::vector<RuleID> StateRules;
+ for (StateID S = 0; S < NumStates; ++S) {
+ Table.ReduceOffset.push_back(Table.Reduces.size());
+ auto It = Reduces.find(S);
+ if (It == Reduces.end())
+ continue;
+ Table.Reduces.insert(Table.Reduces.end(), It->second.begin(),
+ It->second.end());
+ std::sort(Table.Reduces.begin() + Table.ReduceOffset.back(),
+ Table.Reduces.end());
+ }
+ Table.ReduceOffset.push_back(Table.Reduces.size());
+
return Table;
}
-
-private:
- llvm::DenseSet<Entry> Entries;
- std::vector<std::pair<SymbolID, StateID>> StartStates;
};
-LRTable LRTable::buildForTests(const GrammarTable >,
- llvm::ArrayRef<Entry> Entries) {
+LRTable LRTable::buildForTests(const Grammar &G, llvm::ArrayRef<Entry> Entries,
+ llvm::ArrayRef<ReduceEntry> Reduces) {
StateID MaxState = 0;
for (const auto &Entry : Entries) {
MaxState = std::max(MaxState, Entry.State);
@@ -104,22 +123,26 @@
if (Entry.Act.kind() == LRTable::Action::GoTo)
MaxState = std::max(MaxState, Entry.Act.getGoToState());
}
- Builder Build({});
- for (const Entry &E : Entries)
- Build.insert(E);
- return std::move(Build).build(GT, /*NumStates=*/MaxState + 1);
+ Builder Build;
+ Build.Entries.insert(Entries.begin(), Entries.end());
+ for (const ReduceEntry &E : Reduces)
+ Build.Reduces[E.State].insert(E.Rule);
+ Build.FollowSets = followSets(G);
+ return std::move(Build).build(/*NumStates=*/MaxState + 1);
}
LRTable LRTable::buildSLR(const Grammar &G) {
auto Graph = LRGraph::buildLR0(G);
- Builder Build(Graph.startStates());
+ Builder Build;
+ Build.StartStates = Graph.startStates();
for (const auto &T : Graph.edges()) {
Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
- Build.insert({T.Src, T.Label, Act});
+ Build.Entries.insert({T.Src, T.Label, Act});
}
+ Build.FollowSets = followSets(G);
assert(Graph.states().size() <= (1 << StateBits) &&
"Graph states execceds the maximum limit!");
- auto FollowSets = followSets(G);
+ // Add reduce actions.
for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
for (const Item &I : Graph.states()[SID].Items) {
// If we've just parsed the start symbol, this means we successfully parse
@@ -127,17 +150,13 @@
// LRTable (the GLR parser handles it specifically).
if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext())
continue;
- if (!I.hasNext()) {
+ if (!I.hasNext())
// If we've reached the end of a rule A := ..., then we can reduce if
// the next token is in the follow set of A.
- for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) {
- assert(isToken(Follow));
- Build.insert({SID, Follow, Action::reduce(I.rule())});
- }
- }
+ Build.Reduces[SID].insert(I.rule());
}
}
- return std::move(Build).build(G.table(), Graph.states().size());
+ return std::move(Build).build(Graph.states().size());
}
} // namespace pseudo
Index: clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
+++ clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
@@ -10,6 +10,7 @@
#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
@@ -21,8 +22,6 @@
switch (A.kind()) {
case LRTable::Action::Shift:
return OS << llvm::formatv("shift state {0}", A.getShiftState());
- case LRTable::Action::Reduce:
- return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule());
case LRTable::Action::GoTo:
return OS << llvm::formatv("go to state {0}", A.getGoToState());
case LRTable::Action::Sentinel:
@@ -36,9 +35,11 @@
Statistics of the LR parsing table:
number of states: {0}
number of actions: {1}
- size of the table (bytes): {2}
+ number of reduces: {2}
+ size of the table (bytes): {3}
)",
- StateOffset.size() - 1, Actions.size(), bytes())
+ StateOffset.size() - 1, Actions.size(), Reduces.size(),
+ bytes())
.str();
}
@@ -52,19 +53,27 @@
SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal));
for (auto A : find(S, TokID)) {
if (A.kind() == LRTable::Action::Shift)
- OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n",
+ OS.indent(4) << llvm::formatv("{0}: shift state {1}\n",
G.symbolName(TokID), A.getShiftState());
- else if (A.kind() == LRTable::Action::Reduce)
- OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n",
- G.symbolName(TokID), A.getReduceRule(),
- G.dumpRule(A.getReduceRule()));
}
}
+ for (RuleID R : getReduceRules(S)) {
+ SymbolID Target = G.lookupRule(R).Target;
+ std::vector<llvm::StringRef> Terminals;
+ for (unsigned Terminal = 0; Terminal < NumTerminals; ++Terminal) {
+ SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal));
+ if (canFollow(Target, TokID))
+ Terminals.push_back(G.symbolName(TokID));
+ }
+ OS.indent(4) << llvm::formatv("{0}: reduce by rule {1} '{2}'\n",
+ llvm::join(Terminals, " "), R,
+ G.dumpRule(R));
+ }
for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size();
++NontermID) {
if (find(S, NontermID).empty())
continue;
- OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n",
+ OS.indent(4) << llvm::formatv("{0}: go to state {1}\n",
G.symbolName(NontermID),
getGoToState(S, NontermID));
}
@@ -77,18 +86,12 @@
// FIXME: we spend a significant amount of time on misses here.
// We could consider storing a std::bitset for a cheaper test?
assert(pseudo::isToken(Terminal) && "expected terminal symbol!");
- for (const auto &Result : getActions(State, Terminal))
+ for (const auto &Result : find(State, Terminal))
if (Result.kind() == Action::Shift)
return Result.getShiftState(); // unique: no shift/shift conflicts.
return llvm::None;
}
-llvm::ArrayRef<LRTable::Action> LRTable::getActions(StateID State,
- SymbolID Terminal) const {
- assert(pseudo::isToken(Terminal) && "expect terminal symbol!");
- return find(State, Terminal);
-}
-
LRTable::StateID LRTable::getGoToState(StateID State,
SymbolID Nonterminal) const {
assert(pseudo::isNonterminal(Nonterminal) && "expected nonterminal symbol!");
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -251,9 +251,8 @@
private:
// pop walks up the parent chain(s) for a reduction from Head by to Rule.
// Once we reach the end, record the bases and sequences.
- void pop(const GSS::Node *Head, RuleID RID) {
+ void pop(const GSS::Node *Head, RuleID RID, const Rule &Rule) {
LLVM_DEBUG(llvm::dbgs() << " Pop " << Params.G.dumpRule(RID) << "\n");
- const auto &Rule = Params.G.lookupRule(RID);
Family F{/*Start=*/0, /*Symbol=*/Rule.Target, /*Rule=*/RID};
TempSequence.resize_for_overwrite(Rule.Size);
auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) {
@@ -286,11 +285,11 @@
// In trivial cases, we perform the complete reduce here!
if (popAndPushTrivial())
continue;
- for (const auto &A :
- Params.Table.getActions((*Heads)[NextPopHead]->State, Lookahead)) {
- if (A.kind() != LRTable::Action::Reduce)
- continue;
- pop((*Heads)[NextPopHead], A.getReduceRule());
+ for (RuleID RID :
+ Params.Table.getReduceRules((*Heads)[NextPopHead]->State)) {
+ const auto &Rule = Params.G.lookupRule(RID);
+ if (Params.Table.canFollow(Rule.Target, Lookahead))
+ pop((*Heads)[NextPopHead], RID, Rule);
}
}
}
@@ -367,21 +366,23 @@
// - the head must have only one reduction rule
// - the reduction path must be a straight line (no multiple parents)
// (Roughly this means there's no local ambiguity, so the LR algorithm works).
+ //
+ // Returns true if we successfully consumed the next unpopped head.
bool popAndPushTrivial() {
if (!Sequences.empty() || Heads->size() != NextPopHead + 1)
return false;
const GSS::Node *Head = Heads->back();
llvm::Optional<RuleID> RID;
- for (auto &A : Params.Table.getActions(Head->State, Lookahead)) {
- if (A.kind() != LRTable::Action::Reduce)
- continue;
- if (RID)
+ for (RuleID R : Params.Table.getReduceRules(Head->State)) {
+ if (RID.hasValue())
return false;
- RID = A.getReduceRule();
+ RID = R;
}
if (!RID)
return true; // no reductions available, but we've processed the head!
const auto &Rule = Params.G.lookupRule(*RID);
+ if (!Params.Table.canFollow(Rule.Target, Lookahead))
+ return true; // reduction is not available
const GSS::Node *Base = Head;
TempSequence.resize_for_overwrite(Rule.Size);
for (unsigned I = 0; I < Rule.Size; ++I) {
Index: clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
@@ -38,6 +38,8 @@
#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/Capacity.h"
#include <cstdint>
#include <vector>
@@ -62,6 +64,9 @@
// Action represents the terminal and nonterminal actions, it combines the
// entry of the ACTION and GOTO tables from the LR literature.
+ //
+ // FIXME: as we move away from a homogeneous table structure shared between
+ // action types, this class becomes less useful. Remove it.
class Action {
public:
enum Kind : uint8_t {
@@ -73,8 +78,6 @@
// A shift is a forward transition, and the value n is the next state that
// the parser is to enter.
Shift,
- // Reduce by a rule: pop the state stack.
- Reduce,
// NOTE: there are no typical accept actions in the LRtable, accept
// actions are handled specifically in the parser -- if the parser
@@ -91,7 +94,6 @@
static Action goTo(StateID S) { return Action(GoTo, S); }
static Action shift(StateID S) { return Action(Shift, S); }
- static Action reduce(RuleID RID) { return Action(Reduce, RID); }
static Action sentinel() { return Action(Sentinel, 0); }
StateID getShiftState() const {
@@ -102,10 +104,6 @@
assert(kind() == GoTo);
return Value;
}
- RuleID getReduceRule() const {
- assert(kind() == Reduce);
- return Value;
- }
Kind kind() const { return static_cast<Kind>(K); }
bool operator==(const Action &L) const { return opaque() == L.opaque(); }
@@ -123,9 +121,6 @@
uint16_t Value : ValueBits;
};
- // Returns all available actions for the given state on a terminal.
- // Expected to be called by LR parsers.
- llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const;
// Returns the state after we reduce a nonterminal.
// Expected to be called by LR parsers.
// REQUIRES: Nonterminal is valid here.
@@ -135,9 +130,26 @@
// If the terminal is invalid here, returns None.
llvm::Optional<StateID> getShiftState(StateID State, SymbolID Terminal) const;
- // Looks up available actions.
- // Returns empty if no available actions in the table.
- llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const;
+ // Returns the possible reductions from a state.
+ //
+ // These are not keyed by a lookahead token. Instead, call canFollow() to
+ // check whether a reduction should apply in the current context:
+ // for (RuleID R : LR.getReduceRules(S)) {
+ // if (!LR.canFollow(G.lookupRule(R).Target, NextToken))
+ // continue;
+ // // ...apply reduce...
+ // }
+ llvm::ArrayRef<RuleID> getReduceRules(StateID State) const {
+ return llvm::makeArrayRef(&Reduces[ReduceOffset[State]],
+ &Reduces[ReduceOffset[State + 1]]);
+ }
+ // Returns whether Terminal can follow Nonterminal in a valid source file.
+ bool canFollow(SymbolID Nonterminal, SymbolID Terminal) const {
+ assert(isToken(Terminal));
+ assert(isNonterminal(Nonterminal));
+ return FollowSets.test(tok::NUM_TOKENS * Nonterminal +
+ symbolToToken(Terminal));
+ }
// Returns the state from which the LR parser should start to parse the input
// tokens as the given StartSymbol.
@@ -151,9 +163,12 @@
StateID getStartState(SymbolID StartSymbol) const;
size_t bytes() const {
- return sizeof(*this) + Actions.capacity() * sizeof(Action) +
- Symbols.capacity() * sizeof(SymbolID) +
- StateOffset.capacity() * sizeof(uint32_t);
+ return sizeof(*this) + llvm::capacity_in_bytes(Actions) +
+ llvm::capacity_in_bytes(Symbols) +
+ llvm::capacity_in_bytes(StateOffset) +
+ llvm::capacity_in_bytes(Reduces) +
+ llvm::capacity_in_bytes(ReduceOffset) +
+ llvm::capacity_in_bytes(FollowSets);
}
std::string dumpStatistics() const;
@@ -162,17 +177,25 @@
// Build a SLR(1) parsing table.
static LRTable buildSLR(const Grammar &G);
- class Builder;
+ struct Builder;
// Represents an entry in the table, used for building the LRTable.
struct Entry {
StateID State;
SymbolID Symbol;
Action Act;
};
+ struct ReduceEntry {
+ StateID State;
+ RuleID Rule;
+ };
// Build a specifid table for testing purposes.
- static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef<Entry>);
+ static LRTable buildForTests(const Grammar &G, llvm::ArrayRef<Entry>,
+ llvm::ArrayRef<ReduceEntry>);
private:
+ // Looks up actions stored in the generic table.
+ llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const;
+
// Conceptually the LR table is a multimap from (State, SymbolID) => Action.
// Our physical representation is quite different for compactness.
@@ -188,6 +211,17 @@
std::vector<Action> Actions;
// A sorted table, storing the start state for each target parsing symbol.
std::vector<std::pair<SymbolID, StateID>> StartStates;
+
+ // Given a state ID S, the half-open range of Reduces is
+ // [ReduceOffset[S], ReduceOffset[S+1])
+ std::vector<uint32_t> ReduceOffset;
+ std::vector<RuleID> Reduces;
+ // Conceptually this is a bool[SymbolID][Token], each entry describing whether
+ // the grammar allows the (nonterminal) symbol to be followed by the token.
+ //
+ // This is flattened by encoding the (SymbolID Nonterminal, tok::Kind Token)
+ // as an index: Nonterminal * NUM_TOKENS + Token.
+ llvm::BitVector FollowSets;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits