hokein created this revision.
hokein added a reviewer: sammccall.
Herald added a subscriber: mgorny.
Herald added a project: All.
hokein requested review of this revision.
Herald added a subscriber: alextsao1999.
Herald added a project: clang-tools-extra.

It compiles the cxx bnf grammar, and generates enum-type grammar symbols
and prebuilt LRTable for the pseudo-parser.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125231

Files:
  clang-tools-extra/pseudo/CMakeLists.txt
  clang-tools-extra/pseudo/gen/CMakeLists.txt
  clang-tools-extra/pseudo/gen/Cxx.cmake
  clang-tools-extra/pseudo/gen/CxxGen.cpp
  clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
  clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
  clang-tools-extra/pseudo/lib/CMakeLists.txt

Index: clang-tools-extra/pseudo/lib/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/CMakeLists.txt
@@ -1,6 +1,11 @@
 set(LLVM_LINK_COMPONENTS Support)
 
-add_clang_library(clangPseudo
+include(${CMAKE_CURRENT_SOURCE_DIR}/../gen/Cxx.cmake)
+set(CXX_GRAMMAR ${CMAKE_CURRENT_LIST_DIR}/cxx.bnf)
+gen_cxx(${CXX_GRAMMAR} "Cxx")
+
+# Needed by LLVM's CMake checks because this file defines multiple targets.
+set(LLVM_OPTIONAL_SOURCES
   DirectiveTree.cpp
   Forest.cpp
   GLR.cpp
@@ -11,8 +16,34 @@
   LRTable.cpp
   LRTableBuild.cpp
   Token.cpp
+  )
+
+add_clang_library(clangPseudoBasic
+  Grammar.cpp
+  GrammarBNF.cpp
+  LRGraph.cpp
+  LRTable.cpp
+  LRTableBuild.cpp
+
+  LINK_LIBS
+  clangBasic
+  )
+
+add_clang_library(clangPseudo
+  DirectiveTree.cpp
+  Forest.cpp
+  GLR.cpp
+  Lex.cpp
+  Token.cpp
 
   LINK_LIBS
   clangBasic
   clangLex
+  clangPseudoBasic
   )
+
+add_clang_library(clangPseudoCXX
+  ${CLANG_PSEUDO_BINARY_DIR}/Cxx.cpp
+  LINK_LIBS
+  clangBasic
+)
Index: clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
@@ -165,7 +165,6 @@
   // Build a specifid table for testing purposes.
   static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef<Entry>);
 
-private:
   // Conceptually the LR table is a multimap from (State, SymbolID) => Action.
   // Our physical representation is quite different for compactness.
 
Index: clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
@@ -153,8 +153,6 @@
 // It can be constructed dynamically (from compiling BNF file) or statically
 // (a compiled data-source).
 struct GrammarTable {
-  GrammarTable();
-
   struct Nonterminal {
     std::string Name;
     // Corresponding rules that construct the nonterminal, it is a [Start, End)
@@ -164,6 +162,11 @@
       RuleID End;
     } RuleRange;
   };
+  GrammarTable();
+  GrammarTable(std::vector<Rule> Rules, std::vector<Nonterminal> Nonterminals,
+               llvm::ArrayRef<std::string> Terminals)
+      : Rules(std::move(Rules)), Terminals(Terminals),
+        Nonterminals(std::move(Nonterminals)){};
 
   // RuleID is an index into this table of rule definitions.
   //
Index: clang-tools-extra/pseudo/gen/CxxGen.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/gen/CxxGen.cpp
@@ -0,0 +1,230 @@
+//===-- CxxGen.cpp - Compile BNF grammar and LR table ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/Grammar.h"
+#include "clang-pseudo/LRGraph.h"
+#include "clang-pseudo/LRTable.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+
+using clang::pseudo::Grammar;
+using llvm::cl::desc;
+using llvm::cl::init;
+using llvm::cl::opt;
+
+static opt<std::string>
+    Grammar("grammar", desc("Parse and check a BNF grammar file."), init(""));
+static opt<std::string>
+    Filename("filename", desc("Output file name (without file extension)"),
+             init("Cxx"));
+static opt<std::string> OutputDir("output-dir", desc("Output directory"),
+                                  init(""));
+
+static std::string readOrDie(llvm::StringRef Path) {
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
+      llvm::MemoryBuffer::getFile(Path);
+  if (std::error_code EC = Text.getError()) {
+    llvm::errs() << "Error: can't read grammar file '" << Path
+                 << "': " << EC.message() << "\n";
+    ::exit(1);
+  }
+  return Text.get()->getBuffer().str();
+}
+
+static std::string genHeaderCode(const clang::pseudo::Grammar &G,
+                                 llvm::StringRef Filename) {
+  std::vector<std::string> NonterminalEnums;
+  NonterminalEnums.reserve(G.table().Nonterminals.size());
+  for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
+       ++ID) {
+    std::string Name = G.symbolName(ID).str();
+    // translation-unit -> translation_unit
+    std::replace(Name.begin(), Name.end(), '-', '_');
+    NonterminalEnums.push_back(llvm::formatv("  {0} = {1}", Name, ID));
+  }
+  std::string HeaderGuard =
+      llvm::formatv("GENERATED_CLANG_PSEUDO_{0}_H", Filename);
+  return llvm::formatv(R"cpp(
+#ifndef {0}
+#define {0}
+
+#include "clang-pseudo/Grammar.h"
+#include "llvm/Support/Compiler.h"
+
+namespace clang {
+namespace pseudo {
+class LRTable;
+namespace cxx {
+
+enum Symbol : SymbolID {
+{1}
+};
+
+const Grammar& getGrammar();
+const LRTable& getLRTable();
+
+} // namespace cxx
+} // namespace pseudo
+} // namespace clang
+
+#endif // {0})cpp",
+                       HeaderGuard, llvm::join(NonterminalEnums, ",\n"));
+}
+
+template <typename Container>
+std::string genericJoin(const Container &C, llvm::StringRef Separator) {
+  std::vector<std::string> Strings;
+  for (const auto &E : C)
+    Strings.push_back(llvm::formatv("{0}", E));
+  return llvm::join(Strings, Separator);
+}
+
+static std::string genCppCode(const clang::pseudo::Grammar &G,
+                              llvm::StringRef Filename) {
+  auto ToNames = [&](llvm::ArrayRef<clang::pseudo::SymbolID> Syms) {
+    std::vector<std::string> Names;
+    for (auto SID : Syms)
+      Names.push_back(llvm::formatv("/*{0}*/{1}", G.symbolName(SID), SID));
+    return Names;
+  };
+  std::vector<std::string> Rules;
+  for (const auto &R : G.table().Rules) {
+    Rules.push_back(llvm::formatv("      { /*{0}*/{1}, /*Seq=*/{ {2} } }",
+                                  G.symbolName(R.Target), R.Target,
+                                  llvm::join(ToNames(R.seq()), ", ")));
+  }
+
+  std::vector<std::string> Nonterminals;
+  for (const auto &NT : G.table().Nonterminals) {
+    Nonterminals.push_back(
+        llvm::formatv("      { \"{0}\", {/*Start*/{1}, /*End*/{2} } }", NT.Name,
+                      NT.RuleRange.Start, NT.RuleRange.End));
+  }
+  std::vector<std::string> Terminals;
+  for (const auto &T : G.table().Terminals) {
+    Terminals.push_back(llvm::formatv("      \"{0}\"", T));
+  }
+
+  auto LRTable = clang::pseudo::LRTable::buildSLR(G);
+
+  std::string LRNontermOffset = genericJoin(LRTable.NontermOffset, ", ");
+  std::string LRTermOffsetCode = genericJoin(LRTable.TerminalOffset, ", ");
+  std::string LRStates = genericJoin(LRTable.States, ", ");
+  std::vector<std::string> LRActions;
+  for (const auto &Action : LRTable.Actions) {
+    switch (Action.kind()) {
+    case clang::pseudo::LRTable::Action::Shift:
+      LRActions.push_back(
+          llvm::formatv("Action::shift({0})", Action.getShiftState()));
+      break;
+    case clang::pseudo::LRTable::Action::Reduce:
+      LRActions.push_back(
+          llvm::formatv("Action::reduce({0})", Action.getReduceRule()));
+      break;
+    case clang::pseudo::LRTable::Action::Accept:
+      // FIXME: use a real RID here
+      LRActions.push_back(llvm::formatv("Action::accept(0)"));
+      break;
+    case clang::pseudo::LRTable::Action::GoTo:
+      LRActions.push_back(
+          llvm::formatv("Action::goTo({0})", Action.getGoToState()));
+      break;
+    default:
+      assert(false);
+      break;
+    }
+  }
+  std::vector<std::string> LRStartStates;
+  for (const auto &SA : LRTable.StartStates) {
+    LRStartStates.push_back(llvm::formatv(
+        "{ /*SymbolID*/{0}, /*StartState*/{1} }", SA.first, SA.second));
+  }
+  return llvm::formatv(
+      R"cpp(#include <memory>
+
+#include "{0}.h"
+#include "clang-pseudo/Grammar.h"
+#include "clang-pseudo/LRTable.h"
+
+namespace clang {
+namespace pseudo {
+
+namespace cxx {
+
+const Grammar& getGrammar() {
+  static GrammarTable* Table = new GrammarTable({
+    {  // Rules
+{1}
+    }, // Rules
+    {  // Nonterminals
+{2}
+    }, // Nonterminals
+    {  // Terminals
+{3}
+    }  // Terminals
+  });
+  static Grammar* G = new Grammar(std::unique_ptr<GrammarTable>(Table));
+  return *G;
+}
+
+const LRTable& getLRTable() {
+  using Action = LRTable::Action;
+  static LRTable* Table = new LRTable({
+    /*NontermOffset=*/{ {4} },
+    /*TermOffset=*/{ {5} },
+    /*States=*/{ {6} },
+    /*Actions=*/{ {7} },
+    /*StartStates=*/{ {8} },
+  });
+  return *Table;
+}
+
+} // namespace cxx
+} // namespace pseudo
+} // namespace clang
+)cpp",
+      Filename, llvm::join(Rules, ",\n"), llvm::join(Nonterminals, ", "),
+      llvm::join(Terminals, ", "), LRNontermOffset, LRTermOffsetCode, LRStates,
+      llvm::join(LRActions, ", "), llvm::join(LRStartStates, ", "));
+}
+
+void writeFile(llvm::StringRef Filepath, llvm::StringRef Content) {
+  std::error_code EC;
+  llvm::raw_fd_ostream FD(llvm::StringRef(Filepath), EC);
+  if (EC) {
+    llvm::errs() << "Faile to open file: " << Filepath << ": " << EC.message();
+    exit(1);
+  }
+  FD << Content;
+}
+
+int main(int argc, char *argv[]) {
+  llvm::cl::ParseCommandLineOptions(argc, argv, "");
+  if (!Grammar.getNumOccurrences()) {
+    llvm::errs() << "Grammar file must be provided!\n";
+    return 1;
+  }
+
+  std::string GrammarText = readOrDie(Grammar);
+  std::vector<std::string> Diags;
+  auto G = Grammar::parseBNF(GrammarText, Diags);
+
+  if (!Diags.empty()) {
+    llvm::errs() << llvm::join(Diags, "\n");
+    return 1;
+  }
+
+  std::string HeaderPath = llvm::formatv("{0}/{1}.h", OutputDir, Filename);
+  std::string CppPath = llvm::formatv("{0}/{1}.cpp", OutputDir, Filename);
+  writeFile(HeaderPath, genHeaderCode(*G, Filename));
+  writeFile(CppPath, genCppCode(*G, Filename));
+  return 0;
+}
Index: clang-tools-extra/pseudo/gen/Cxx.cmake
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/gen/Cxx.cmake
@@ -0,0 +1,21 @@
+# Compiles the BNF grammar file, and produces a pair of files called
+# ${filename}.h and ${filename}.cpp in the ${CLANG_PSEUDO_BINARY_DIR}.
+function(gen_cxx grammar_file filename)
+  set(header_file ${CLANG_PSEUDO_BINARY_DIR}/${filename}.h)
+  set(cpp_file ${CLANG_PSEUDO_BINARY_DIR}/${filename}.cpp)
+
+  add_custom_command(OUTPUT ${header_file} ${cpp_file}
+    COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-cxx-gen"
+      --grammar ${grammar_file}
+      --output-dir ${CLANG_PSEUDO_BINARY_DIR}
+      --filename ${filename}
+    COMMENT "Generating code for cxx grammar..."
+    DEPENDS pseudo-cxx-gen
+    VERBATIM)
+
+  set_source_files_properties(${header_file} PROPERTIES
+    GENERATED 1)
+  set_source_files_properties(${cpp_file} PROPERTIES
+    GENERATED 1)
+
+endfunction()
Index: clang-tools-extra/pseudo/gen/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/gen/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_LINK_COMPONENTS Support)
+
+add_clang_executable(pseudo-cxx-gen
+  CxxGen.cpp
+  )
+
+target_link_libraries(pseudo-cxx-gen
+  PRIVATE
+  clangPseudoBasic
+  )
Index: clang-tools-extra/pseudo/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/CMakeLists.txt
+++ clang-tools-extra/pseudo/CMakeLists.txt
@@ -1,9 +1,12 @@
+set(CLANG_PSEUDO_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+
 include_directories(include)
 include_directories(${CMAKE_CURRENT_BINARY_DIR}/include)
 add_subdirectory(lib)
 add_subdirectory(tool)
 add_subdirectory(fuzzer)
 add_subdirectory(benchmarks)
+add_subdirectory(gen)
 if(CLANG_INCLUDE_TESTS)
   add_subdirectory(unittests)
   add_subdirectory(test)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to