sammccall updated this revision to Diff 438732.
sammccall added a comment.
tweak & document fast-path
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D127357/new/
https://reviews.llvm.org/D127357
Files:
clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
clang-tools-extra/pseudo/lib/GLR.cpp
clang-tools-extra/pseudo/lib/cxx/CXX.cpp
clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
clang-tools-extra/pseudo/test/lr-build-basic.test
clang-tools-extra/pseudo/test/lr-build-conflicts.test
clang-tools-extra/pseudo/tool/ClangPseudo.cpp
clang-tools-extra/pseudo/unittests/GLRTest.cpp
clang-tools-extra/pseudo/unittests/GrammarTest.cpp
clang-tools-extra/pseudo/unittests/LRTableTest.cpp
Index: clang-tools-extra/pseudo/unittests/LRTableTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/LRTableTest.cpp
+++ clang-tools-extra/pseudo/unittests/LRTableTest.cpp
@@ -17,36 +17,33 @@
namespace pseudo {
namespace {
+using testing::ElementsAre;
using testing::IsEmpty;
using testing::UnorderedElementsAre;
-using Action = LRTable::Action;
TEST(LRTable, Builder) {
- GrammarTable GTable;
-
+ GrammarTable GT;
// eof semi ...
// +-------+----+-------+---
// |state0 | | s0,r0 |...
// |state1 | acc| |...
// |state2 | | r1 |...
// +-------+----+-------+---
- std::vector<LRTable::Entry> Entries = {
- {/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)},
- {/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)},
- {/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)},
- {/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}};
- GrammarTable GT;
- LRTable T = LRTable::buildForTests(GT, Entries);
- EXPECT_THAT(T.find(0, tokenSymbol(tok::eof)), IsEmpty());
- EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),
- UnorderedElementsAre(Action::shift(0), Action::reduce(0)));
- EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),
- UnorderedElementsAre(Action::reduce(2)));
- EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());
- EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
- UnorderedElementsAre(Action::reduce(1)));
+ LRTable::Builder Builder;
+ Builder.StateCount = 3;
+ Builder.Shift[{/*State=*/0, tokenSymbol(tok::semi)}] = 0;
+ Builder.Reduce.push_back({/*State=*/0, /*Rule=*/0});
+ Builder.Reduce.push_back({/*State=*/1, /*Rule=*/2});
+ Builder.Reduce.push_back({/*State=*/2, /*Rule=*/1});
+ LRTable T = std::move(Builder).build();
+ EXPECT_EQ(T.getShiftState(0, tokenSymbol(tok::eof)), llvm::None);
+ EXPECT_EQ(T.getShiftState(0, tokenSymbol(tok::semi)), LRTable::StateID{0});
+ EXPECT_THAT(T.getReduceRules(0), ElementsAre(0));
+ EXPECT_EQ(T.getShiftState(1, tokenSymbol(tok::semi)), llvm::None);
+ EXPECT_THAT(T.getReduceRules(1), ElementsAre(2));
+ EXPECT_THAT(T.getReduceRules(2), ElementsAre(1));
// Verify the behaivor for other non-available-actions terminals.
- EXPECT_THAT(T.find(2, tokenSymbol(tok::kw_int)), IsEmpty());
+ EXPECT_EQ(T.getShiftState(2, tokenSymbol(tok::kw_int)), llvm::None);
}
} // namespace
Index: clang-tools-extra/pseudo/unittests/GrammarTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GrammarTest.cpp
+++ clang-tools-extra/pseudo/unittests/GrammarTest.cpp
@@ -142,66 +142,6 @@
"Unknown attribute 'unknown'"));
}
-TEST_F(GrammarTest, FirstAndFollowSets) {
- build(
- R"bnf(
-_ := expr
-expr := expr - term
-expr := term
-term := IDENTIFIER
-term := ( expr )
-)bnf");
- ASSERT_TRUE(Diags.empty());
- auto ToPairs = [](std::vector<llvm::DenseSet<SymbolID>> Input) {
- std::vector<std::pair<SymbolID, llvm::DenseSet<SymbolID>>> Sets;
- for (SymbolID ID = 0; ID < Input.size(); ++ID)
- Sets.emplace_back(ID, std::move(Input[ID]));
- return Sets;
- };
-
- EXPECT_THAT(
- ToPairs(firstSets(*G)),
- UnorderedElementsAre(
- Pair(id("_"), UnorderedElementsAre(id("IDENTIFIER"), id("("))),
- Pair(id("expr"), UnorderedElementsAre(id("IDENTIFIER"), id("("))),
- Pair(id("term"), UnorderedElementsAre(id("IDENTIFIER"), id("(")))));
- EXPECT_THAT(
- ToPairs(followSets(*G)),
- UnorderedElementsAre(
- Pair(id("_"), UnorderedElementsAre(id("EOF"))),
- Pair(id("expr"), UnorderedElementsAre(id("-"), id("EOF"), id(")"))),
- Pair(id("term"), UnorderedElementsAre(id("-"), id("EOF"), id(")")))));
-
- build(R"bnf(
-# A simplfied C++ decl-specifier-seq.
-_ := decl-specifier-seq
-decl-specifier-seq := decl-specifier decl-specifier-seq
-decl-specifier-seq := decl-specifier
-decl-specifier := simple-type-specifier
-decl-specifier := INLINE
-simple-type-specifier := INT
- )bnf");
- ASSERT_TRUE(Diags.empty());
- EXPECT_THAT(
- ToPairs(firstSets(*G)),
- UnorderedElementsAre(
- Pair(id("_"), UnorderedElementsAre(id("INLINE"), id("INT"))),
- Pair(id("decl-specifier-seq"),
- UnorderedElementsAre(id("INLINE"), id("INT"))),
- Pair(id("simple-type-specifier"), UnorderedElementsAre(id("INT"))),
- Pair(id("decl-specifier"),
- UnorderedElementsAre(id("INLINE"), id("INT")))));
- EXPECT_THAT(
- ToPairs(followSets(*G)),
- UnorderedElementsAre(
- Pair(id("_"), UnorderedElementsAre(id("EOF"))),
- Pair(id("decl-specifier-seq"), UnorderedElementsAre(id("EOF"))),
- Pair(id("decl-specifier"),
- UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF"))),
- Pair(id("simple-type-specifier"),
- UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF")))));
-}
-
} // namespace
} // namespace pseudo
} // namespace clang
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -29,8 +29,9 @@
namespace {
-using Action = LRTable::Action;
using testing::AllOf;
+using testing::ElementsAre;
+using testing::UnorderedElementsAre;
MATCHER_P(state, StateID, "") { return arg->State == StateID; }
MATCHER_P(parsedSymbol, FNode, "") { return arg->Payload == FNode; }
@@ -83,17 +84,10 @@
return 0;
}
- NewHeadCallback captureNewHeads() {
- return [this](const GSS::Node *NewHead) {
- NewHeadResults.push_back(NewHead);
- };
- };
-
protected:
std::unique_ptr<Grammar> G;
ForestArena Arena;
GSS GSStack;
- std::vector<const GSS::Node*> NewHeadResults;
};
TEST_F(GLRTest, ShiftMergingHeads) {
@@ -107,33 +101,35 @@
// 0---1---4
// â---2---â
// â---3---5
+ LRTable::Builder LR;
+ LR.StateCount = 6;
+ LR.Shift[{1, tokenSymbol(tok::semi)}] = 4;
+ LR.Shift[{2, tokenSymbol(tok::semi)}] = 4;
+ LR.Shift[{3, tokenSymbol(tok::semi)}] = 5;
+
auto *GSSNode0 =
GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{});
- auto *GSSNode1 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr,
+ auto *GSSNode1 = GSStack.addNode(/*State=*/1, /*ForestNode=*/nullptr,
/*Parents=*/{GSSNode0});
- auto *GSSNode2 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr,
+ auto *GSSNode2 = GSStack.addNode(/*State=*/2, /*ForestNode=*/nullptr,
/*Parents=*/{GSSNode0});
- auto *GSSNode3 = GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr,
+ auto *GSSNode3 = GSStack.addNode(/*State=*/3, /*ForestNode=*/nullptr,
/*Parents=*/{GSSNode0});
buildGrammar({}, {}); // Create a fake empty grammar.
- LRTable T = LRTable::buildForTests(G->table(), /*Entries=*/{});
+ LRTable T = std::move(LR).build();
ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
- std::vector<ParseStep> PendingShift = {
- {GSSNode1, Action::shift(4)},
- {GSSNode3, Action::shift(5)},
- {GSSNode2, Action::shift(4)},
- };
- glrShift(PendingShift, SemiTerminal, {*G, T, Arena, GSStack},
- captureNewHeads());
-
- EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(
- AllOf(state(4), parsedSymbol(&SemiTerminal),
- parents({GSSNode1, GSSNode2})),
- AllOf(state(5), parsedSymbol(&SemiTerminal),
- parents({GSSNode3}))))
- << NewHeadResults;
+ std::vector<const GSS::Node *> NewHeads;
+ glrShift({GSSNode1, GSSNode3, GSSNode2}, SemiTerminal,
+ {*G, T, Arena, GSStack}, NewHeads);
+
+ EXPECT_THAT(NewHeads, UnorderedElementsAre(
+ AllOf(state(4), parsedSymbol(&SemiTerminal),
+ parents({GSSNode1, GSSNode2})),
+ AllOf(state(5), parsedSymbol(&SemiTerminal),
+ parents({GSSNode3}))))
+ << NewHeads;
}
TEST_F(GLRTest, ReduceConflictsSplitting) {
@@ -146,26 +142,28 @@
buildGrammar({"class-name", "enum-name"},
{"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
- LRTable Table = LRTable::buildForTests(
- G->table(), {{/*State=*/0, id("class-name"), Action::goTo(2)},
- {/*State=*/0, id("enum-name"), Action::goTo(3)}});
+ LRTable::Builder LR;
+ LR.StateCount = 4;
+ LR.GoTo[{0, id("class-name")}] = 2;
+ LR.GoTo[{0, id("enum-name")}] = 3;
+ LR.Reduce.push_back({1, ruleFor("class-name")});
+ LR.Reduce.push_back({1, ruleFor("enum-name")});
+ LRTable Table = std::move(LR).build();
const auto *GSSNode0 =
GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{});
const auto *GSSNode1 =
- GSStack.addNode(3, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});
-
- std::vector<ParseStep> PendingReduce = {
- {GSSNode1, Action::reduce(ruleFor("class-name"))},
- {GSSNode1, Action::reduce(ruleFor("enum-name"))}};
- glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
- captureNewHeads());
- EXPECT_THAT(NewHeadResults,
- testing::UnorderedElementsAre(
- AllOf(state(2), parsedSymbolID(id("class-name")),
- parents({GSSNode0})),
- AllOf(state(3), parsedSymbolID(id("enum-name")),
- parents({GSSNode0})))) << NewHeadResults;
+ GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});
+
+ std::vector<const GSS::Node *> Heads = {GSSNode1};
+ glrReduce(Heads, {*G, Table, Arena, GSStack});
+ EXPECT_THAT(Heads, UnorderedElementsAre(
+ GSSNode1,
+ AllOf(state(2), parsedSymbolID(id("class-name")),
+ parents({GSSNode0})),
+ AllOf(state(3), parsedSymbolID(id("enum-name")),
+ parents({GSSNode0}))))
+ << Heads;
}
TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
@@ -189,24 +187,26 @@
/*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
/*Parents=*/{GSSNode2, GSSNode3});
- LRTable Table = LRTable::buildForTests(
- G->table(),
- {{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
- {/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)}});
- std::vector<ParseStep> PendingReduce = {
- {GSSNode4, Action::reduce(ruleFor("ptr-operator"))}};
- glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
- captureNewHeads());
-
- EXPECT_THAT(NewHeadResults,
- testing::UnorderedElementsAre(
- AllOf(state(5), parsedSymbolID(id("ptr-operator")),
- parents({GSSNode2})),
- AllOf(state(6), parsedSymbolID(id("ptr-operator")),
- parents({GSSNode3})))) << NewHeadResults;
+ LRTable::Builder LR;
+ LR.StateCount = 7;
+ LR.GoTo[{2, id("ptr-operator")}] = 5;
+ LR.GoTo[{3, id("ptr-operator")}] = 6;
+ LR.Reduce.push_back({4, ruleFor("ptr-operator")});
+ LRTable Table = std::move(LR).build();
+
+ std::vector<const GSS::Node *> Heads = {GSSNode4};
+ glrReduce(Heads, {*G, Table, Arena, GSStack});
+
+ EXPECT_THAT(Heads, UnorderedElementsAre(
+ GSSNode4,
+ AllOf(state(5), parsedSymbolID(id("ptr-operator")),
+ parents({GSSNode2})),
+ AllOf(state(6), parsedSymbolID(id("ptr-operator")),
+ parents({GSSNode3}))))
+ << Heads;
// Verify that the payload of the two new heads is shared, only a single
// ptr-operator node is created in the forest.
- EXPECT_EQ(NewHeadResults[0]->Payload, NewHeadResults[1]->Payload);
+ EXPECT_EQ(Heads[1]->Payload, Heads[2]->Payload);
}
TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
@@ -238,28 +238,26 @@
GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode,
/*Parents=*/{GSSNode2});
- LRTable Table = LRTable::buildForTests(
- G->table(),
- {{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
- {/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)}});
+ LRTable::Builder LR;
+ LR.StateCount = 6;
+ LR.GoTo[{1, id("type-name")}] = 5;
+ LR.GoTo[{2, id("type-name")}] = 5;
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
- std::vector<ParseStep> PendingReduce = {
- {
- GSSNode3, Action::reduce(/*RuleID=*/0) // type-name := class-name
- },
- {
- GSSNode4, Action::reduce(/*RuleID=*/1) // type-name := enum-name
- }};
- glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
- captureNewHeads());
+ LR.Reduce.push_back({3, /* type-name := class-name */0});
+ LR.Reduce.push_back({4, /* type-name := enum-name */1});
+ LRTable Table = std::move(LR).build();
+
+ std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
+ glrReduce(Heads, {*G, Table, Arena, GSStack});
// Verify that the stack heads are joint at state 5 after reduces.
- EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(AllOf(
- state(5), parsedSymbolID(id("type-name")),
- parents({GSSNode1, GSSNode2}))))
- << NewHeadResults;
+ EXPECT_THAT(Heads, ElementsAre(GSSNode3, GSSNode4,
+ AllOf(state(5),
+ parsedSymbolID(id("type-name")),
+ parents({GSSNode1, GSSNode2}))))
+ << Heads;
// Verify that we create an ambiguous ForestNode of two parses of `type-name`.
- EXPECT_EQ(NewHeadResults.front()->Payload->dumpRecursive(*G),
+ EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G),
"[ 1, end) type-name := <ambiguous>\n"
"[ 1, end) ââtype-name := class-name\n"
"[ 1, end) â ââclass-name := <opaque>\n"
@@ -295,25 +293,22 @@
const auto *GSSNode4 =
GSStack.addNode(/*State=*/4, /*ForestNode=*/StartTerminal,
/*Parents=*/{GSSNode2});
-
- LRTable Table = LRTable::buildForTests(
- G->table(), {{/*State=*/0, id("pointer"), Action::goTo(5)}});
+ LRTable::Builder LR;
+ LR.StateCount = 6;
+ LR.GoTo[{0, id("pointer")}] = 5;
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
- std::vector<ParseStep> PendingReduce = {
- {
- GSSNode3, Action::reduce(/*RuleID=*/0) // pointer := class-name *
- },
- {
- GSSNode4, Action::reduce(/*RuleID=*/1) // pointer := enum-name *
- }};
- glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
- captureNewHeads());
-
- EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(
- AllOf(state(5), parsedSymbolID(id("pointer")),
- parents({GSSNode0}))))
- << NewHeadResults;
- EXPECT_EQ(NewHeadResults.front()->Payload->dumpRecursive(*G),
+ LR.Reduce.push_back({3, /* pointer := class-name */0});
+ LR.Reduce.push_back({4, /* pointer := enum-name */1});
+ LRTable Table = std::move(LR).build();
+
+ std::vector<const GSS::Node *> Heads = { GSSNode3, GSSNode4 };
+ glrReduce(Heads, {*G, Table, Arena, GSStack});
+
+ EXPECT_THAT(Heads, ElementsAre(GSSNode3, GSSNode4,
+ AllOf(state(5), parsedSymbolID(id("pointer")),
+ parents({GSSNode0}))))
+ << Heads;
+ EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G),
"[ 0, end) pointer := <ambiguous>\n"
"[ 0, end) ââpointer := class-name *\n"
"[ 0, 1) â ââclass-name := <opaque>\n"
@@ -342,7 +337,7 @@
)bnf");
clang::LangOptions LOptions;
const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
- auto LRTable = LRTable::buildSLR(*G);
+ auto LRTable = LRTable::buildLR0(*G);
const ForestNode &Parsed =
glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
@@ -380,7 +375,7 @@
)bnf");
clang::LangOptions LOptions;
const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
- auto LRTable = LRTable::buildSLR(*G);
+ auto LRTable = LRTable::buildLR0(*G);
const ForestNode &Parsed =
glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
@@ -405,7 +400,7 @@
// of the nonterminal `test` when the next token is `eof`, verify that the
// parser stops at the right state.
const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
- auto LRTable = LRTable::buildSLR(*G);
+ auto LRTable = LRTable::buildLR0(*G);
const ForestNode &Parsed =
glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===================================================================
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -108,7 +108,7 @@
llvm::outs() << G->dump();
if (PrintGraph)
llvm::outs() << clang::pseudo::LRGraph::buildLR0(*G).dumpForTests(*G);
- auto LRTable = clang::pseudo::LRTable::buildSLR(*G);
+ auto LRTable = clang::pseudo::LRTable::buildLR0(*G);
if (PrintTable)
llvm::outs() << LRTable.dumpForTests(*G);
if (PrintStatistics)
Index: clang-tools-extra/pseudo/test/lr-build-conflicts.test
===================================================================
--- clang-tools-extra/pseudo/test/lr-build-conflicts.test
+++ clang-tools-extra/pseudo/test/lr-build-conflicts.test
@@ -35,12 +35,10 @@
# TABLE-NEXT: State 1
# TABLE-NEXT: '-': shift state 3
# TABLE-NEXT: State 2
-# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER'
-# TABLE-NEXT: '-': reduce by rule 1 'expr := IDENTIFIER'
+# TABLE-NEXT: reduce by rule 1 'expr := IDENTIFIER'
# TABLE-NEXT: State 3
# TABLE-NEXT: 'IDENTIFIER': shift state 2
# TABLE-NEXT: 'expr': go to state 4
# TABLE-NEXT: State 4
-# TABLE-NEXT: 'EOF': reduce by rule 0 'expr := expr - expr'
+# TABLE-NEXT: reduce by rule 0 'expr := expr - expr'
# TABLE-NEXT: '-': shift state 3
-# TABLE-NEXT: '-': reduce by rule 0 'expr := expr - expr'
Index: clang-tools-extra/pseudo/test/lr-build-basic.test
===================================================================
--- clang-tools-extra/pseudo/test/lr-build-basic.test
+++ clang-tools-extra/pseudo/test/lr-build-basic.test
@@ -23,6 +23,6 @@
# TABLE-NEXT: 'id': go to state 2
# TABLE-NEXT: State 1
# TABLE-NEXT: State 2
-# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id'
+# TABLE-NEXT: reduce by rule 1 'expr := id'
# TABLE-NEXT: State 3
-# TABLE-NEXT: 'EOF': reduce by rule 0 'id := IDENTIFIER'
+# TABLE-NEXT: reduce by rule 0 'id := IDENTIFIER'
Index: clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -9,130 +9,40 @@
#include "clang-pseudo/Grammar.h"
#include "clang-pseudo/LRGraph.h"
#include "clang-pseudo/LRTable.h"
-#include "clang/Basic/TokenKinds.h"
#include <cstdint>
-namespace llvm {
-template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> {
- using Entry = clang::pseudo::LRTable::Entry;
- static inline Entry getEmptyKey() {
- static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0,
- clang::pseudo::LRTable::Action::sentinel()};
- return E;
- }
- static inline Entry getTombstoneKey() {
- static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0,
- clang::pseudo::LRTable::Action::sentinel()};
- return E;
- }
- static unsigned getHashValue(const Entry &I) {
- return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque());
- }
- static bool isEqual(const Entry &LHS, const Entry &RHS) {
- return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol &&
- LHS.Act == RHS.Act;
- }
-};
-} // namespace llvm
-
namespace clang {
namespace pseudo {
-class LRTable::Builder {
-public:
- Builder(llvm::ArrayRef<std::pair<SymbolID, StateID>> StartStates)
- : StartStates(StartStates) {}
-
- bool insert(Entry E) { return Entries.insert(std::move(E)).second; }
- LRTable build(const GrammarTable >, unsigned NumStates) && {
- // E.g. given the following parsing table with 3 states and 3 terminals:
- //
- // a b c
- // +-------+----+-------+-+
- // |state0 | | s0,r0 | |
- // |state1 | acc| | |
- // |state2 | | r1 | |
- // +-------+----+-------+-+
- //
- // The final LRTable:
- // - StateOffset: [s0] = 0, [s1] = 2, [s2] = 3, [sentinel] = 4
- // - Symbols: [ b, b, a, b]
- // Actions: [ s0, r0, acc, r1]
- // ~~~~~~ range for state 0
- // ~~~~ range for state 1
- // ~~ range for state 2
- // First step, we sort all entries by (State, Symbol, Action).
- std::vector<Entry> Sorted(Entries.begin(), Entries.end());
- llvm::sort(Sorted, [](const Entry &L, const Entry &R) {
- return std::forward_as_tuple(L.State, L.Symbol, L.Act.opaque()) <
- std::forward_as_tuple(R.State, R.Symbol, R.Act.opaque());
- });
-
- LRTable Table;
- Table.Actions.reserve(Sorted.size());
- Table.Symbols.reserve(Sorted.size());
- // We are good to finalize the States and Actions.
- for (const auto &E : Sorted) {
- Table.Actions.push_back(E.Act);
- Table.Symbols.push_back(E.Symbol);
- }
- // Initialize the terminal and nonterminal offset, all ranges are empty by
- // default.
- Table.StateOffset = std::vector<uint32_t>(NumStates + 1, 0);
- size_t SortedIndex = 0;
- for (StateID State = 0; State < Table.StateOffset.size(); ++State) {
- Table.StateOffset[State] = SortedIndex;
- while (SortedIndex < Sorted.size() && Sorted[SortedIndex].State == State)
- ++SortedIndex;
- }
- Table.StartStates = std::move(StartStates);
- return Table;
- }
-
-private:
- llvm::DenseSet<Entry> Entries;
- std::vector<std::pair<SymbolID, StateID>> StartStates;
-};
+LRTable LRTable::buildLR0(const Grammar &G) {
+ auto Graph = LRGraph::buildLR0(G);
+ assert(Graph.states().size() <= (1 << StateBits) &&
+ "Graph states execceds the maximum limit!");
-LRTable LRTable::buildForTests(const GrammarTable >,
- llvm::ArrayRef<Entry> Entries) {
- StateID MaxState = 0;
- for (const auto &Entry : Entries)
- MaxState = std::max(MaxState, Entry.State);
- Builder Build({});
- for (const Entry &E : Entries)
- Build.insert(E);
- return std::move(Build).build(GT, /*NumStates=*/MaxState + 1);
-}
+ Builder B;
+ B.StartStates = Graph.startStates();
+ B.StateCount = Graph.states().size();
-LRTable LRTable::buildSLR(const Grammar &G) {
- auto Graph = LRGraph::buildLR0(G);
- Builder Build(Graph.startStates());
for (const auto &T : Graph.edges()) {
- Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
- Build.insert({T.Src, T.Label, Act});
+ (isToken(T.Label) ? B.Shift : B.GoTo)
+ .try_emplace(StateSymbol{T.Src, T.Label}, T.Dst);
}
- assert(Graph.states().size() <= (1 << StateBits) &&
- "Graph states execceds the maximum limit!");
- auto FollowSets = followSets(G);
+
for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
for (const Item &I : Graph.states()[SID].Items) {
- // If we've just parsed the start symbol, this means we successfully parse
- // the input. We don't add the reduce action of `_ := start_symbol` in the
- // LRTable (the GLR parser handles it specifically).
- if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext())
- continue;
if (!I.hasNext()) {
- // If we've reached the end of a rule A := ..., then we can reduce if
- // the next token is in the follow set of A.
- for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) {
- assert(isToken(Follow));
- Build.insert({SID, Follow, Action::reduce(I.rule())});
- }
+ // If we've just parsed the start symbol, this means we successfully
+ // parse the input. We don't add the reduce action of `_ :=
+ // start_symbol` in the LRTable (the GLR parser handles it
+ // specifically).
+ if (G.lookupRule(I.rule()).Target == G.underscore())
+ continue;
+ // If we've reached the end of a rule A := ..., then we can reduce.
+ B.Reduce.push_back({SID, I.rule()});
}
}
}
- return std::move(Build).build(G.table(), Graph.states().size());
+ return LRTable(std::move(B));
}
} // namespace pseudo
Index: clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
+++ clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
@@ -10,35 +10,19 @@
#include "clang-pseudo/Grammar.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace pseudo {
-llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
- switch (A.kind()) {
- case LRTable::Action::Shift:
- return OS << llvm::formatv("shift state {0}", A.getShiftState());
- case LRTable::Action::Reduce:
- return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule());
- case LRTable::Action::GoTo:
- return OS << llvm::formatv("go to state {0}", A.getGoToState());
- case LRTable::Action::Sentinel:
- llvm_unreachable("unexpected Sentinel action kind!");
- }
- llvm_unreachable("unexpected action kind!");
-}
-
std::string LRTable::dumpStatistics() const {
return llvm::formatv(R"(
Statistics of the LR parsing table:
- number of states: {0}
- number of actions: {1}
- size of the table (bytes): {2}
+ number of actions: shift={0} reduce={1} goto={2}
+ size of the table (bytes): {3}
)",
- StateOffset.size() - 1, Actions.size(), bytes())
+ Shift.size(), Reduce.size(), Goto.size(), bytes())
.str();
}
@@ -46,66 +30,28 @@
std::string Result;
llvm::raw_string_ostream OS(Result);
OS << "LRTable:\n";
- for (StateID S = 0; S < StateOffset.size() - 1; ++S) {
+ for (StateID S = 0; S < Reduce.keys(); ++S) {
OS << llvm::formatv("State {0}\n", S);
+ for (RuleID R : getReduceRules(S))
+ OS.indent(4) << llvm::formatv("reduce by rule {0} '{1}'\n", R,
+ G.dumpRule(R));
for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) {
SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal));
- for (auto A : find(S, TokID)) {
- if (A.kind() == LRTable::Action::Shift)
- OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n",
- G.symbolName(TokID), A.getShiftState());
- else if (A.kind() == LRTable::Action::Reduce)
- OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n",
- G.symbolName(TokID), A.getReduceRule(),
- G.dumpRule(A.getReduceRule()));
- }
+ if (auto Next = getShiftState(S, TokID))
+ OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n",
+ G.symbolName(TokID), *Next);
}
for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size();
++NontermID) {
- if (find(S, NontermID).empty())
- continue;
- OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n",
- G.symbolName(NontermID),
- getGoToState(S, NontermID));
+ auto It = Goto.find(StateSymbol{S, NontermID});
+ if (It != Goto.end())
+ OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n",
+ G.symbolName(NontermID), It->second);
}
}
return OS.str();
}
-llvm::ArrayRef<LRTable::Action> LRTable::getActions(StateID State,
- SymbolID Terminal) const {
- assert(pseudo::isToken(Terminal) && "expect terminal symbol!");
- return find(State, Terminal);
-}
-
-LRTable::StateID LRTable::getGoToState(StateID State,
- SymbolID Nonterminal) const {
- assert(pseudo::isNonterminal(Nonterminal) && "expected nonterminal symbol!");
- auto Result = find(State, Nonterminal);
- assert(Result.size() == 1 && Result.front().kind() == Action::GoTo);
- return Result.front().getGoToState();
-}
-
-llvm::ArrayRef<LRTable::Action> LRTable::find(StateID Src, SymbolID ID) const {
- assert(Src + 1u < StateOffset.size());
- std::pair<size_t, size_t> Range =
- std::make_pair(StateOffset[Src], StateOffset[Src + 1]);
- auto SymbolRange = llvm::makeArrayRef(Symbols.data() + Range.first,
- Symbols.data() + Range.second);
-
- assert(llvm::is_sorted(SymbolRange) &&
- "subrange of the Symbols should be sorted!");
- const LRTable::StateID *Start =
- llvm::partition_point(SymbolRange, [&ID](SymbolID S) { return S < ID; });
- if (Start == SymbolRange.end())
- return {};
- const LRTable::StateID *End = Start;
- while (End != SymbolRange.end() && *End == ID)
- ++End;
- return llvm::makeArrayRef(&Actions[Start - Symbols.data()],
- /*length=*/End - Start);
-}
-
LRTable::StateID LRTable::getStartState(SymbolID Target) const {
assert(llvm::is_sorted(StartStates) && "StartStates must be sorted!");
auto It = llvm::partition_point(
@@ -117,5 +63,14 @@
return It->second;
}
+LRTable::LRTable(Builder B)
+ : Shift(std::move(B.Shift)), Reduce(std::move(B.Reduce), B.StateCount),
+ Goto(std::move(B.GoTo)), StartStates(std::move(B.StartStates)) {
+ assert(llvm::all_of(Shift,
+ [&](auto &E) { return E.first.State < B.StateCount; }));
+ assert(llvm::all_of(Goto,
+ [&](auto &E) { return E.first.State < B.StateCount; }));
+}
+
} // namespace pseudo
} // namespace clang
Index: clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
+++ clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
@@ -87,87 +87,6 @@
return OS.str();
}
-std::vector<llvm::DenseSet<SymbolID>> firstSets(const Grammar &G) {
- std::vector<llvm::DenseSet<SymbolID>> FirstSets(
- G.table().Nonterminals.size());
- auto ExpandFirstSet = [&FirstSets](SymbolID Target, SymbolID First) {
- assert(isNonterminal(Target));
- if (isToken(First))
- return FirstSets[Target].insert(First).second;
- bool Changed = false;
- for (SymbolID SID : FirstSets[First])
- Changed |= FirstSets[Target].insert(SID).second;
- return Changed;
- };
-
- // A rule S := T ... implies elements in FIRST(S):
- // - if T is a terminal, FIRST(S) contains T
- // - if T is a nonterminal, FIRST(S) contains FIRST(T)
- // Since FIRST(T) may not have been fully computed yet, FIRST(S) itself may
- // end up being incomplete.
- // We iterate until we hit a fixed point.
- // (This isn't particularly efficient, but table building isn't on the
- // critical path).
- bool Changed = true;
- while (Changed) {
- Changed = false;
- for (const auto &R : G.table().Rules)
- // We only need to consider the first element because symbols are
- // non-nullable.
- Changed |= ExpandFirstSet(R.Target, R.seq().front());
- }
- return FirstSets;
-}
-
-std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &G) {
- auto FirstSets = firstSets(G);
- std::vector<llvm::DenseSet<SymbolID>> FollowSets(
- G.table().Nonterminals.size());
- // Expand the follow set of a nonterminal symbol Y by adding all from the
- // given symbol set.
- auto ExpandFollowSet = [&FollowSets](SymbolID Y,
- const llvm::DenseSet<SymbolID> &ToAdd) {
- assert(isNonterminal(Y));
- bool Changed = false;
- for (SymbolID F : ToAdd)
- Changed |= FollowSets[Y].insert(F).second;
- return Changed;
- };
- // Follow sets is computed based on the following 3 rules, the computation
- // is completed at a fixed point where there is no more new symbols can be
- // added to any of the follow sets.
- //
- // Rule 1: add endmarker to the FOLLOW(S), where S is the start symbol of the
- // augmented grammar, in our case it is '_'.
- FollowSets[G.underscore()].insert(tokenSymbol(tok::eof));
- bool Changed = true;
- while (Changed) {
- Changed = false;
- for (const auto &R : G.table().Rules) {
- // Rule 2: for a rule X := ... Y Z, we add all symbols from FIRST(Z) to
- // FOLLOW(Y).
- for (size_t I = 0; I + 1 < R.seq().size(); ++I) {
- if (isToken(R.seq()[I]))
- continue;
- // We only need to consider the next symbol because symbols are
- // non-nullable.
- SymbolID Next = R.seq()[I + 1];
- if (isToken(Next))
- // First set for a terminal is itself.
- Changed |= ExpandFollowSet(R.seq()[I], {Next});
- else
- Changed |= ExpandFollowSet(R.seq()[I], FirstSets[Next]);
- }
- // Rule 3: for a rule X := ... Z, we add all symbols from FOLLOW(X) to
- // FOLLOW(Z).
- SymbolID Z = R.seq().back();
- if (isNonterminal(Z))
- Changed |= ExpandFollowSet(Z, FollowSets[R.Target]);
- }
- }
- return FollowSets;
-}
-
static llvm::ArrayRef<std::string> getTerminalNames() {
static const auto &TerminalNames = []() {
auto TerminalNames = new std::string[NumTerminals];
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -25,7 +25,7 @@
}
const LRTable &getLRTable() {
- static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar()));
+ static LRTable *Table = new LRTable(LRTable::buildLR0(getGrammar()));
return *Table;
}
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -46,67 +46,35 @@
auto &GSS = Params.GSStack;
// Lists of active shift, reduce actions.
- std::vector<ParseStep> PendingShift, PendingReduce;
- auto AddSteps = [&](const GSS::Node *Head, SymbolID NextTok) {
- for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) {
- switch (Action.kind()) {
- case LRTable::Action::Shift:
- PendingShift.push_back({Head, Action});
- break;
- case LRTable::Action::Reduce:
- PendingReduce.push_back({Head, Action});
- break;
- default:
- llvm_unreachable("unexpected action kind!");
- }
- }
- };
StateID StartState = Params.Table.getStartState(StartSymbol);
- std::vector<const GSS::Node *> NewHeads = {
+ std::vector<const GSS::Node *> Heads = {
GSS.addNode(/*State=*/StartState,
/*ForestNode=*/nullptr, {})};
+ std::vector<const GSS::Node *> NextHeads;
auto MaybeGC = [&, Roots(std::vector<const GSS::Node *>{}), I(0u)]() mutable {
- assert(PendingShift.empty() && PendingReduce.empty() &&
- "Running GC at the wrong time!");
-
+ assert(NextHeads.empty() && "Running GC at the wrong time!");
if (++I != 20) // Run periodically to balance CPU and memory usage.
return;
I = 0;
// We need to copy the list: Roots is consumed by the GC.
- Roots = NewHeads;
+ Roots = Heads;
GSS.gc(std::move(Roots));
};
for (const ForestNode &Terminal : Terminals) {
LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next token {0} (id={1})\n",
G.symbolName(Terminal.symbol()),
Terminal.symbol()));
- for (const auto *Head : NewHeads)
- AddSteps(Head, Terminal.symbol());
- NewHeads.clear();
- glrReduce(PendingReduce, Params,
- [&](const GSS::Node * NewHead) {
- // A reduce will enable more steps.
- AddSteps(NewHead, Terminal.symbol());
- });
-
- glrShift(PendingShift, Terminal, Params,
- [&](const GSS::Node *NewHead) { NewHeads.push_back(NewHead); });
+ glrShift(Heads, Terminal, Params, NextHeads);
+ std::swap(Heads, NextHeads);
+ glrReduce(Heads, Params);
+ NextHeads.clear();
MaybeGC();
}
- LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n"));
- for (const auto *Heads : NewHeads)
- AddSteps(Heads, tokenSymbol(tok::eof));
+ LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n"));
StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
// Collect new heads created from the final reduce.
- std::vector<const GSS::Node*> Heads;
- glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) {
- Heads.push_back(NewHead);
- // A reduce will enable more steps.
- AddSteps(NewHead, tokenSymbol(tok::eof));
- });
-
const ForestNode *Result = nullptr;
for (const auto *Head : Heads) {
if (Head->State == AcceptState) {
@@ -138,42 +106,40 @@
// After the shift action, the GSS is:
// 0---1---2---4
// â---3---â
-void glrShift(std::vector<ParseStep> &PendingShift, const ForestNode &NewTok,
- const ParseParams &Params, NewHeadCallback NewHeadCB) {
+void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
+ const ForestNode &NewTok, const ParseParams &Params,
+ std::vector<const GSS::Node *> &NewHeads) {
assert(NewTok.kind() == ForestNode::Terminal);
- assert(llvm::all_of(PendingShift,
- [](const ParseStep &Step) {
- return Step.Action.kind() == LRTable::Action::Shift;
- }) &&
- "Pending shift actions must be shift actions");
LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" Shift {0} ({1} active heads):\n",
Params.G.symbolName(NewTok.symbol()),
- PendingShift.size()));
+ OldHeads.size()));
// We group pending shifts by their target state so we can merge them.
- llvm::stable_sort(PendingShift, [](const ParseStep &L, const ParseStep &R) {
- return L.Action.getShiftState() < R.Action.getShiftState();
- });
- auto Rest = llvm::makeArrayRef(PendingShift);
+ llvm::SmallVector<std::pair<StateID, const GSS::Node *>, 8> Shifts;
+ for (const auto* H : OldHeads)
+ if (auto S = Params.Table.getShiftState(H->State, NewTok.symbol()))
+ Shifts.push_back({*S, H});
+ llvm::stable_sort(Shifts, llvm::less_first{});
+
+ auto Rest = llvm::makeArrayRef(Shifts);
llvm::SmallVector<const GSS::Node *> Parents;
while (!Rest.empty()) {
// Collect the batch of PendingShift that have compatible shift states.
// Their heads become TempParents, the parents of the new GSS node.
- StateID NextState = Rest.front().Action.getShiftState();
+ StateID NextState = Rest.front().first;
Parents.clear();
for (const auto &Base : Rest) {
- if (Base.Action.getShiftState() != NextState)
+ if (Base.first != NextState)
break;
- Parents.push_back(Base.Head);
+ Parents.push_back(Base.second);
}
Rest = Rest.drop_front(Parents.size());
LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" --> S{0} ({1} heads)\n",
NextState, Parents.size()));
- NewHeadCB(Params.GSStack.addNode(NextState, &NewTok, Parents));
+ NewHeads.push_back(Params.GSStack.addNode(NextState, &NewTok, Parents));
}
- PendingShift.clear();
}
namespace {
@@ -231,8 +197,8 @@
// After reducing 3 by `pointer := class-name STAR` and
// 2 by`enum-name := class-name STAR`:
// 0--5(pointer) // 5 is goto(0, pointer)
-void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
- NewHeadCallback NewHeadCB) {
+void glrReduce(std::vector<const GSS::Node *> &Heads,
+ const ParseParams &Params) {
// There are two interacting complications:
// 1. Performing one reduce can unlock new reduces on the newly-created head.
// 2a. The ambiguous ForestNodes must be complete (have all sequence nodes).
@@ -291,6 +257,36 @@
KeyedQueue<Family, PushSpec> Sequences;
Sequence TempSequence;
+
+ // We treat Heads as a queue of Pop operations still to be performed.
+ // PoppedHeads is our position within it.
+ unsigned PoppedHeads = 0;
+ // In general the sequencing is complicated: each pop can yield multiple
+ // pending pushes that might run in a different order than we found them.
+ // However in trivial cases (only pop that yields only one push) we can
+ // bypass all these fancy queues and pop+push directly. This is very common.
+ auto PopAndPushTrivial = [&]() -> bool {
+ if (!Sequences.empty() || Heads.size() != PoppedHeads + 1)
+ return false;
+ const GSS::Node *Head = Heads.back();
+ auto Rules = Params.Table.getReduceRules(Head->State);
+ if (Rules.size() != 1)
+ return false;
+ const auto &Rule = Params.G.lookupRule(Rules.front());
+ const GSS::Node *Base = Head;
+ TempSequence.resize_for_overwrite(Rule.Size);
+ for (unsigned I = 0; I < Rule.Size; ++I) {
+ if (Base->parents().size() != 1)
+ return false;
+ TempSequence[Rule.Size - 1 - I] = Base->Payload;
+ Base = Base->parents().front();
+ }
+ const ForestNode *Parsed =
+ &Params.Forest.createSequence(Rule.Target, Rules.front(), TempSequence);
+ StateID NextState = Params.Table.getGoToState(Base->State, Rule.Target);
+ Heads.push_back(Params.GSStack.addNode(NextState, Parsed, {Base}));
+ return true;
+ };
// Pop walks up the parent chain(s) for a reduction from Head by to Rule.
// Once we reach the end, record the bases and sequences.
auto Pop = [&](const GSS::Node *Head, RuleID RID) {
@@ -312,9 +308,12 @@
DFS(Head, 0, DFS);
};
auto PopPending = [&] {
- for (const ParseStep &Pending : PendingReduce)
- Pop(Pending.Head, Pending.Action.getReduceRule());
- PendingReduce.clear();
+ for (; PoppedHeads < Heads.size(); ++PoppedHeads) {
+ if (PopAndPushTrivial())
+ continue;
+ for (RuleID R : Params.Table.getReduceRules(Heads[PoppedHeads]->State))
+ Pop(Heads[PoppedHeads], R);
+ }
};
std::vector<std::pair</*Goto*/ StateID, const GSS::Node *>> FamilyBases;
@@ -379,9 +378,7 @@
}
BasesLeft = BasesLeft.drop_front(Parents.size());
- // Invoking the callback for new heads, a real GLR parser may add new
- // reduces to the PendingReduce queue!
- NewHeadCB(Params.GSStack.addNode(NextState, Parsed, Parents));
+ Heads.push_back(Params.GSStack.addNode(NextState, Parsed, Parents));
}
PopPending();
}
Index: clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
@@ -38,9 +38,39 @@
#include "clang-pseudo/Grammar.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Capacity.h"
+#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <vector>
+namespace clang {
+namespace pseudo {
+struct StateSymbol {
+ using StateID = uint16_t; // XXX
+ StateID State;
+ SymbolID Symbol;
+};
+} // namespace pseudo
+} // namespace clang
+namespace llvm {
+template <> struct llvm::DenseMapInfo<clang::pseudo::StateSymbol> {
+ using StateSymbol = clang::pseudo::StateSymbol;
+ static inline StateSymbol getEmptyKey() {
+ return StateSymbol{StateSymbol::StateID(-1), 0};
+ }
+ static inline StateSymbol getTombstoneKey() {
+ return StateSymbol{StateSymbol::StateID(-1), 1};
+ }
+ static unsigned getHashValue(const StateSymbol &Val) {
+ return (Val.State * 2754435761U) ^ Val.Symbol; // Knuth hash
+ }
+ static bool isEqual(const StateSymbol &LHS, const StateSymbol &RHS) {
+ return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol;
+ }
+};
+} // namespace llvm
namespace clang {
namespace pseudo {
@@ -60,79 +90,26 @@
using StateID = uint16_t;
static constexpr unsigned StateBits = 13;
- // Action represents the terminal and nonterminal actions, it combines the
- // entry of the ACTION and GOTO tables from the LR literature.
- class Action {
- public:
- enum Kind : uint8_t {
- Sentinel = 0,
- // Terminal actions, corresponding to entries of ACTION table.
-
- // Shift to state n: move forward with the lookahead, and push state n
- // onto the state stack.
- // A shift is a forward transition, and the value n is the next state that
- // the parser is to enter.
- Shift,
- // Reduce by a rule: pop the state stack.
- Reduce,
-
- // NOTE: there are no typical accept actions in the LRtable, accept
- // actions are handled specifically in the parser -- if the parser
- // reaches to a target state (which is goto(StartState, StartSymbol)) at
- // the EOF token after a reduce, this indicates the input has been parsed
- // as the StartSymbol successfully.
-
- // Nonterminal actions, corresponding to entry of GOTO table.
-
- // Go to state n: push state n onto the state stack.
- // Similar to Shift, but it is a nonterminal forward transition.
- GoTo,
- };
-
- static Action goTo(StateID S) { return Action(GoTo, S); }
- static Action shift(StateID S) { return Action(Shift, S); }
- static Action reduce(RuleID RID) { return Action(Reduce, RID); }
- static Action sentinel() { return Action(Sentinel, 0); }
-
- StateID getShiftState() const {
- assert(kind() == Shift);
- return Value;
- }
- StateID getGoToState() const {
- assert(kind() == GoTo);
- return Value;
- }
- RuleID getReduceRule() const {
- assert(kind() == Reduce);
- return Value;
- }
- Kind kind() const { return static_cast<Kind>(K); }
-
- bool operator==(const Action &L) const { return opaque() == L.opaque(); }
- uint16_t opaque() const { return K << ValueBits | Value; };
-
- private:
- Action(Kind K1, unsigned Value) : K(K1), Value(Value) {}
- static constexpr unsigned ValueBits = StateBits;
- static constexpr unsigned KindBits = 3;
- static_assert(ValueBits >= RuleBits, "Value must be able to store RuleID");
- static_assert(KindBits + ValueBits <= 16,
- "Must be able to store kind and value efficiently");
- uint16_t K : KindBits;
- // Either StateID or RuleID, depending on the Kind.
- uint16_t Value : ValueBits;
- };
-
- // Returns all available actions for the given state on a terminal.
- // Expected to be called by LR parsers.
- llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const;
- // Returns the state after we reduce a nonterminal.
- // Expected to be called by LR parsers.
- StateID getGoToState(StateID State, SymbolID Nonterminal) const;
-
- // Looks up available actions.
- // Returns empty if no available actions in the table.
- llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const;
+ // Return the list of reductions applicable from a given state.
+ // These correspond to items with the dot at the end.
+ llvm::ArrayRef<RuleID> getReduceRules(StateID State) const {
+ return Reduce.find(State);
+ }
+ // Return the state to transition to after shifting a token Terminal.
+ // Returns None if this terminal is not legal here.
+ llvm::Optional<StateID> getShiftState(StateID From, SymbolID Terminal) const {
+ auto It = Shift.find(StateSymbol{From, Terminal});
+ if (It == Shift.end())
+ return llvm::None;
+ return It->second;
+ }
+ // Return the state to transition to after reducing a symbol Nonterminal.
+ // REQUIRES: this nonterminal is legal here.
+ StateID getGoToState(StateID From, SymbolID Nonterminal) const {
+ auto It = Goto.find(StateSymbol{From, Nonterminal});
+ assert(It != Goto.end());
+ return It->second;
+ }
// Returns the state from which the LR parser should start to parse the input
// tokens as the given StartSymbol.
@@ -146,47 +123,75 @@
StateID getStartState(SymbolID StartSymbol) const;
size_t bytes() const {
- return sizeof(*this) + Actions.capacity() * sizeof(Action) +
- Symbols.capacity() * sizeof(SymbolID) +
- StateOffset.capacity() * sizeof(uint32_t);
+ return sizeof(*this) + llvm::capacity_in_bytes(Shift) +
+ llvm::capacity_in_bytes(Goto) + Reduce.bytes();
}
std::string dumpStatistics() const;
std::string dumpForTests(const Grammar &G) const;
- // Build a SLR(1) parsing table.
- static LRTable buildSLR(const Grammar &G);
+ static LRTable buildLR0(const Grammar &G);
+ struct Builder {
+ unsigned StateCount = 0;
+ std::vector<std::pair<StateID, RuleID>> Reduce;
+ llvm::DenseMap<StateSymbol, StateID> Shift;
+ llvm::DenseMap<StateSymbol, StateID> GoTo;
+ std::vector<std::pair<SymbolID, StateID>> StartStates;
- class Builder;
- // Represents an entry in the table, used for building the LRTable.
- struct Entry {
- StateID State;
- SymbolID Symbol;
- Action Act;
+ LRTable build() && { return LRTable(std::move(*this)); }
};
- // Build a specifid table for testing purposes.
- static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef<Entry>);
private:
- // Conceptually the LR table is a multimap from (State, SymbolID) => Action.
- // Our physical representation is quite different for compactness.
-
- // Index is StateID, value is the offset into Symbols/Actions
- // where the entries for this state begin.
- // Give a state id, the corresponding half-open range of Symbols/Actions is
- // [StateOffset[id], StateOffset[id+1]).
- std::vector<uint32_t> StateOffset;
- // Parallel to Actions, the value is SymbolID (columns of the matrix).
- // Grouped by the StateID, and only subranges are sorted.
- std::vector<SymbolID> Symbols;
- // A flat list of available actions, sorted by (State, SymbolID).
- std::vector<Action> Actions;
+ // A multimap from K => V, where Ks form a dense range [0, n).
+ // A flat array stores the values:
+ // Values = [ values for k=0 | values for k=1 | values for k=2 | ... ]
+ // And another stores the index for each K:
+ // Keys = [ index for k=0, index for k=1, ... , Values.size()]
+ // Lookup[k] is Values[ Keys[k]...Keys[k+1] ]
+ template <typename K, typename V> struct OffsetTable {
+ std::vector<uint32_t> Keys;
+ std::vector<V> Values;
+
+ OffsetTable(std::vector<std::pair<K, V>> &&Pairs, K Limit) {
+ assert(llvm::all_of(Pairs, [&](auto &P) { return P.first < Limit; }));
+ llvm::stable_sort(Pairs, llvm::less_first{});
+ Keys.reserve(Limit + 1);
+ Values.reserve(Pairs.size());
+ unsigned I = 0;
+ for (K Key = 0; Key < Limit; ++Key) {
+ Keys.push_back(Values.size());
+ while (I < Pairs.size() && Pairs[I].first == Key)
+ Values.push_back(Pairs[I++].second);
+ }
+ Keys.push_back(Values.size());
+ assert(Values.size() == Pairs.size());
+ assert(Keys.size() == Limit + 1);
+ }
+
+ size_t bytes() const {
+ return sizeof(*this) + llvm::capacity_in_bytes(Keys) +
+ llvm::capacity_in_bytes(Values);
+ }
+ size_t size() const { return Values.size(); }
+ size_t keys() const { return Keys.size() - 1; }
+
+ llvm::ArrayRef<V> find(K Key) const {
+ return llvm::makeArrayRef(&Values[Keys[Key]], &Values[Keys[Key + 1]]);
+ }
+ };
+
+ llvm::DenseMap<StateSymbol, StateID> Shift;
+ OffsetTable<StateID, RuleID> Reduce;
+ llvm::DenseMap<StateSymbol, StateID> Goto;
// A sorted table, storing the start state for each target parsing symbol.
std::vector<std::pair<SymbolID, StateID>> StartStates;
+
+ LRTable(Builder B);
};
-llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);
} // namespace pseudo
} // namespace clang
+namespace llvm {} // namespace llvm
+
#endif // CLANG_PSEUDO_LRTABLE_H
Index: clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
@@ -174,12 +174,6 @@
// the augmented grammar.)
SymbolID Underscore;
};
-// For each nonterminal X, computes the set of terminals that begin strings
-// derived from X. (Known as FIRST sets in grammar-based parsers).
-std::vector<llvm::DenseSet<SymbolID>> firstSets(const Grammar &);
-// For each nonterminal X, computes the set of terminals that could immediately
-// follow X. (Known as FOLLOW sets in grammar-based parsers).
-std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &);
// Storage for the underlying data of the Grammar.
// It can be constructed dynamically (from compiling BNF file) or statically
Index: clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -132,34 +132,23 @@
const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params,
SymbolID StartSymbol);
-// An active stack head can have multiple available actions (reduce/reduce
-// actions, reduce/shift actions).
-// A step is any one action applied to any one stack head.
-struct ParseStep {
- // A specific stack head.
- const GSS::Node *Head = nullptr;
- // An action associated with the head.
- LRTable::Action Action = LRTable::Action::sentinel();
-};
-// A callback is invoked whenever a new GSS head is created during the GLR
-// parsing process (glrShift, or glrReduce).
-using NewHeadCallback = std::function<void(const GSS::Node *)>;
// Apply all PendingShift actions on a given GSS state, newly-created heads are
// passed to the callback.
//
// When this function returns, PendingShift is empty.
//
// Exposed for testing only.
-void glrShift(std::vector<ParseStep> &PendingShift, const ForestNode &NextTok,
- const ParseParams &Params, NewHeadCallback NewHeadCB);
+void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
+ const ForestNode &NextTok, const ParseParams &Params,
+ std::vector<const GSS::Node *> &NewHeads);
// Applies PendingReduce actions, until no more reduce actions are available.
//
// When this function returns, PendingReduce is empty. Calls to NewHeadCB may
// add elements to PendingReduce
//
// Exposed for testing only.
-void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
- NewHeadCallback NewHeadCB);
+void glrReduce(std::vector<const GSS::Node *> &Heads,
+ const ParseParams &Params);
} // namespace pseudo
} // namespace clang
Index: clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
===================================================================
--- clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
+++ clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -25,7 +25,7 @@
class Fuzzer {
clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
std::unique_ptr<Grammar> G;
- LRTable T;
+ llvm::Optional<LRTable> T;
bool Print;
public:
@@ -44,7 +44,7 @@
llvm::errs() << Diag << "\n";
std::exit(1);
}
- T = LRTable::buildSLR(*G);
+ T = LRTable::buildLR0(*G);
}
void operator()(llvm::StringRef Code) {
@@ -58,9 +58,9 @@
clang::pseudo::ForestArena Arena;
clang::pseudo::GSS GSS;
- auto &Root =
- glrParse(ParseableStream, clang::pseudo::ParseParams{*G, T, Arena, GSS},
- *G->findNonterminal("translation-unit"));
+ auto &Root = glrParse(ParseableStream,
+ clang::pseudo::ParseParams{*G, *T, Arena, GSS},
+ *G->findNonterminal("translation-unit"));
if (Print)
llvm::outs() << Root.dumpRecursive(*G);
}
Index: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
===================================================================
--- clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
+++ clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
@@ -77,11 +77,11 @@
}
BENCHMARK(parseBNF);
-static void buildSLR(benchmark::State &State) {
+static void buildLR0(benchmark::State &State) {
for (auto _ : State)
- LRTable::buildSLR(*G);
+ LRTable::buildLR0(*G);
}
-BENCHMARK(buildSLR);
+BENCHMARK(buildLR0);
TokenStream lexAndPreprocess() {
clang::LangOptions LangOpts = genericLangOpts();
@@ -129,7 +129,7 @@
BENCHMARK(preprocess);
static void glrParse(benchmark::State &State) {
- LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
+ LRTable Table = clang::pseudo::LRTable::buildLR0(*G);
SymbolID StartSymbol = *G->findNonterminal("translation-unit");
TokenStream Stream = lexAndPreprocess();
for (auto _ : State) {
@@ -143,7 +143,7 @@
BENCHMARK(glrParse);
static void full(benchmark::State &State) {
- LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
+ LRTable Table = clang::pseudo::LRTable::buildLR0(*G);
SymbolID StartSymbol = *G->findNonterminal("translation-unit");
for (auto _ : State) {
TokenStream Stream = lexAndPreprocess();
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits