sammccall created this revision.
sammccall added a reviewer: kbobyrev.
Herald added subscribers: cfe-commits, kadircet, arphaman, jkorous, MaskRay, 
ioeric, ilya-biryukov, mgorny.

Interface is in one file, implementation in two as they have little in common.
A couple of ad-hoc YAML functions left exposed:

- symbol -> YAML I expect to keep for tools like dexp
- YAML -> symbol is used for the MR-style indexer, I think we can eliminate 
this (merge-on-the-fly, else use a different serialization)


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D52453

Files:
  clangd/CMakeLists.txt
  clangd/index/Serialization.cpp
  clangd/index/Serialization.h
  clangd/index/SymbolYAML.cpp
  clangd/index/SymbolYAML.h
  clangd/index/YAMLSerialization.cpp
  clangd/index/dex/dexp/Dexp.cpp
  clangd/indexer/IndexerMain.cpp
  clangd/tool/ClangdMain.cpp
  unittests/clangd/SerializationTests.cpp
  unittests/clangd/SymbolCollectorTests.cpp

Index: unittests/clangd/SymbolCollectorTests.cpp
===================================================================
--- unittests/clangd/SymbolCollectorTests.cpp
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -11,7 +11,6 @@
 #include "TestFS.h"
 #include "TestTU.h"
 #include "index/SymbolCollector.h"
-#include "index/SymbolYAML.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/FileSystemOptions.h"
 #include "clang/Basic/VirtualFileSystem.h"
Index: unittests/clangd/SerializationTests.cpp
===================================================================
--- unittests/clangd/SerializationTests.cpp
+++ unittests/clangd/SerializationTests.cpp
@@ -9,7 +9,6 @@
 
 #include "index/Index.h"
 #include "index/Serialization.h"
-#include "index/SymbolYAML.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -20,7 +19,7 @@
 namespace clangd {
 namespace {
 
-const char *YAML1 = R"(
+const char *YAML = R"(
 ---
 ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
 Name:   'Foo1'
@@ -46,9 +45,6 @@
   - Header:    'include2'
     References:    3
 ...
-)";
-
-const char *YAML2 = R"(
 ---
 ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
 Name:   'Foo2'
@@ -76,7 +72,10 @@
 }
 
 TEST(SerializationTest, YAMLConversions) {
-  auto Symbols1 = symbolsFromYAML(YAML1);
+  auto In = readIndexFile(YAML);
+  EXPECT_TRUE(bool(In)) << In.takeError();
+#if 0
+  auto Symbols = symbolsFromYAML(YAML);
   ASSERT_EQ(Symbols1.size(), 1u);
   const auto &Sym1 = *Symbols1.begin();
   EXPECT_THAT(Sym1, QName("clang::Foo1"));
@@ -136,6 +135,7 @@
   // Assert the YAML serializations match, for nice comparisons and diffs.
   EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
               UnorderedElementsAreArray(YAMLFromSymbols(Slab)));
+#endif
 }
 
 } // namespace
Index: clangd/tool/ClangdMain.cpp
===================================================================
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -12,7 +12,7 @@
 #include "Path.h"
 #include "RIFF.h"
 #include "Trace.h"
-#include "index/SymbolYAML.h"
+#include "index/Serialization.h"
 #include "clang/Basic/Version.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
Index: clangd/indexer/IndexerMain.cpp
===================================================================
--- clangd/indexer/IndexerMain.cpp
+++ clangd/indexer/IndexerMain.cpp
@@ -18,7 +18,7 @@
 #include "index/Merge.h"
 #include "index/Serialization.h"
 #include "index/SymbolCollector.h"
-#include "index/SymbolYAML.h"
+#include "index/Serialization.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Index/IndexDataConsumer.h"
@@ -60,12 +60,13 @@
         "MapReduce."),
     llvm::cl::init(true), llvm::cl::Hidden);
 
-enum IndexFormat { YAML, Binary };
-static llvm::cl::opt<IndexFormat> Format(
-    "format", llvm::cl::desc("Format of the index to be written"),
-    llvm::cl::values(clEnumValN(YAML, "yaml", "human-readable YAML format"),
-                     clEnumValN(Binary, "binary", "binary RIFF format")),
-    llvm::cl::init(YAML));
+static llvm::cl::opt<IndexFileFormat>
+    Format("format", llvm::cl::desc("Format of the index to be written"),
+           llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
+                                       "human-readable YAML format"),
+                            clEnumValN(IndexFileFormat::RIFF, "binary",
+                                       "binary RIFF format")),
+           llvm::cl::init(IndexFileFormat::YAML));
 
 /// Responsible for aggregating symbols from each processed file and producing
 /// the final results. All methods in this class must be thread-safe,
@@ -162,16 +163,15 @@
 
   void consumeSymbols(SymbolSlab Symbols) override {
     for (const auto &Sym : Symbols)
-      Executor.getExecutionContext()->reportResult(Sym.ID.str(),
-                                                   SymbolToYAML(Sym));
+      Executor.getExecutionContext()->reportResult(Sym.ID.str(), toYAML(Sym));
   }
 
   SymbolSlab mergeResults() override {
     SymbolSlab::Builder UniqueSymbols;
     Executor.getToolResults()->forEachResult(
         [&](llvm::StringRef Key, llvm::StringRef Value) {
           llvm::yaml::Input Yin(Value);
-          auto Sym = clang::clangd::SymbolFromYAML(Yin);
+          auto Sym = cantFail(symbolFromYAML(Yin));
           auto ID = cantFail(clang::clangd::SymbolID::fromStr(Key));
           if (const auto *Existing = UniqueSymbols.find(ID))
             UniqueSymbols.insert(mergeSymbol(*Existing, Sym));
@@ -270,15 +270,9 @@
   // Reduce phase: combine symbols with the same IDs.
   auto UniqueSymbols = Consumer->mergeResults();
   // Output phase: emit result symbols.
-  switch (clang::clangd::Format) {
-  case clang::clangd::IndexFormat::YAML:
-    SymbolsToYAML(UniqueSymbols, llvm::outs());
-    break;
-  case clang::clangd::IndexFormat::Binary: {
-    clang::clangd::IndexFileOut Out;
-    Out.Symbols = &UniqueSymbols;
-    llvm::outs() << Out;
-  }
-  }
+  clang::clangd::IndexFileOut Out;
+  Out.Symbols = &UniqueSymbols;
+  Out.Format = clang::clangd::Format;
+  llvm::outs() << Out;
   return 0;
 }
Index: clangd/index/dex/dexp/Dexp.cpp
===================================================================
--- clangd/index/dex/dexp/Dexp.cpp
+++ clangd/index/dex/dexp/Dexp.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "../../../index/SymbolYAML.h"
+#include "../../Serialization.h"
 #include "../Dex.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -155,7 +155,7 @@
     bool FoundSymbol = false;
     Index->lookup(Request, [&](const Symbol &Sym) {
       FoundSymbol = true;
-      llvm::outs() << SymbolToYAML(Sym);
+      llvm::outs() << toYAML(Sym);
     });
     if (!FoundSymbol)
       llvm::outs() << "not found\n";
Index: clangd/index/YAMLSerialization.cpp
===================================================================
--- clangd/index/YAMLSerialization.cpp
+++ clangd/index/YAMLSerialization.cpp
@@ -7,19 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SymbolYAML.h"
-#include "Index.h"
 #include "Serialization.h"
+#include "Index.h"
 #include "Trace.h"
 #include "dex/Dex.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
 #include <cstdint>
 
-LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(clang::clangd::Symbol)
 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences)
 
 namespace llvm {
@@ -186,65 +185,45 @@
 namespace clang {
 namespace clangd {
 
-SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent) {
-  llvm::yaml::Input Yin(YAMLContent);
-  std::vector<Symbol> S;
-  Yin >> S;
-
-  SymbolSlab::Builder Syms;
-  for (auto &Sym : S)
-    Syms.insert(Sym);
-  return std::move(Syms).build();
-}
-
-Symbol SymbolFromYAML(llvm::yaml::Input &Input) {
-  Symbol S;
-  Input >> S;
-  return S;
-}
-
-void SymbolsToYAML(const SymbolSlab &Symbols, llvm::raw_ostream &OS) {
+void writeYAML(const IndexFileOut &O, raw_ostream &OS) {
   llvm::yaml::Output Yout(OS);
-  for (Symbol S : Symbols) // copy: Yout<< requires mutability.
-    Yout << S;
+  for (Symbol Sym : *O.Symbols) // copy: Yout<< requires mutability.
+    Yout << Sym;
 }
 
-std::string SymbolToYAML(Symbol Sym) {
-  std::string Str;
-  llvm::raw_string_ostream OS(Str);
-  llvm::yaml::Output Yout(OS);
-  Yout << Sym;
-  return OS.str();
+Expected<IndexFileIn> readYAML(StringRef Data) {
+  SymbolSlab::Builder Symbols;
+  llvm::yaml::Input Yin(Data);
+  do {
+    Symbol S;
+    Yin >> S;
+    if (Yin.error())
+      return llvm::errorCodeToError(Yin.error());
+    Symbols.insert(S);
+  } while(Yin.nextDocument());
+
+  IndexFileIn Result;
+  Result.Symbols.emplace(std::move(Symbols).build());
+  return std::move(Result);
 }
 
-std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
-                                       llvm::ArrayRef<std::string> URISchemes,
-                                       bool UseDex) {
-  trace::Span OverallTracer("LoadIndex");
-  auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
-  if (!Buffer) {
-    llvm::errs() << "Can't open " << SymbolFilename << "\n";
-    return nullptr;
-  }
-  StringRef Data = Buffer->get()->getBuffer();
-
-  llvm::Optional<SymbolSlab> Slab;
-  if (Data.startswith("RIFF")) { // Magic for binary index file.
-    trace::Span Tracer("ParseRIFF");
-    if (auto RIFF = readIndexFile(Data))
-      Slab = std::move(RIFF->Symbols);
-    else
-      llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n";
-  } else {
-    trace::Span Tracer("ParseYAML");
-    Slab = symbolsFromYAML(Data);
+std::string toYAML(const Symbol& S) {
+  std::string Buf;
+  {
+    llvm::raw_string_ostream OS(Buf);
+    llvm::yaml::Output Yout(OS);
+    Symbol Sym = S; // copy: Yout<< requires mutability.
+    OS << Sym;
   }
+  return Buf;
+}
 
-  if (!Slab)
-    return nullptr;
-  trace::Span Tracer("BuildIndex");
-  return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes)
-                : MemIndex::build(std::move(*Slab), RefSlab());
+Expected<Symbol> symbolFromYAML(llvm::yaml::Input &Yin) {
+  Symbol S;
+  Yin >> S;
+  if (Yin.error())
+    return llvm::errorCodeToError(Yin.error());
+  return S;
 }
 
 } // namespace clangd
Index: clangd/index/SymbolYAML.h
===================================================================
--- clangd/index/SymbolYAML.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===--- SymbolYAML.h --------------------------------------------*- C++-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// SymbolYAML provides facilities to convert Symbol to YAML, and vice versa.
-// The YAML format of Symbol is designed for simplicity and experiment, but
-// isn't a suitable/efficient store.
-//
-// This is for **experimental** only. Don't use it in the production code.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H
-
-#include "Index.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/YAMLTraits.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace clang {
-namespace clangd {
-
-// Read symbols from a YAML-format string.
-SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent);
-
-// Read one symbol from a YAML-stream.
-// The returned symbol is backed by Input.
-Symbol SymbolFromYAML(llvm::yaml::Input &Input);
-
-// Convert a single symbol to YAML-format string.
-// The YAML result is safe to concatenate.
-std::string SymbolToYAML(Symbol Sym);
-
-// Convert symbols to a YAML-format string.
-// The YAML result is safe to concatenate if you have multiple symbol slabs.
-void SymbolsToYAML(const SymbolSlab &Symbols, llvm::raw_ostream &OS);
-
-// Build an in-memory static index for global symbols from a symbol file.
-// The size of global symbols should be relatively small, so that all symbols
-// can be managed in memory.
-std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
-                                       llvm::ArrayRef<std::string> URISchemes,
-                                       bool UseDex = true);
-
-} // namespace clangd
-} // namespace clang
-
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H
Index: clangd/index/Serialization.h
===================================================================
--- clangd/index/Serialization.h
+++ clangd/index/Serialization.h
@@ -7,41 +7,68 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file provides a compact binary serialization of indexed symbols.
+// This file provides serialization of indexed symbols and other data.
 //
-// It writes two sections:
+// It writes sections:
+//  - metadata such as version info
 //  - a string table (which is compressed)
 //  - lists of encoded symbols
 //
-// The format has a simple versioning scheme: the version is embedded in the
-// data and non-current versions are rejected when reading.
+// The format has a simple versioning scheme: the format version number is
+// written in the file and non-current versions are rejected when reading.
+//
+// Human-readable YAML serialization is also supported, and recommended for
+// debugging and experiments only.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
 #include "Index.h"
 #include "llvm/Support/Error.h"
 
+namespace llvm {
+namespace yaml {
+class Input;
+}
+} // namespace llvm
 namespace clang {
 namespace clangd {
 
+enum class IndexFileFormat {
+  RIFF, // Versioned binary format, suitable for production use.
+  YAML, // Human-readable format, suitable for experiments and debugging.
+};
+
 // Specifies the contents of an index file to be written.
 struct IndexFileOut {
   const SymbolSlab *Symbols;
   // TODO: Support serializing symbol occurrences.
   // TODO: Support serializing Dex posting lists.
+  IndexFileFormat Format = IndexFileFormat::RIFF;
 };
 // Serializes an index file. (This is a RIFF container chunk).
-llvm::raw_ostream &operator<<(llvm::raw_ostream &, const IndexFileOut &);
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
 
 // Holds the contents of an index file that was read.
 struct IndexFileIn {
   llvm::Optional<SymbolSlab> Symbols;
+  IndexFileFormat Format;
 };
 // Parse an index file. The input must be a RIFF container chunk.
 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
 
+std::string toYAML(const Symbol&);
+// Returned symbol is backed by the YAML input.
+// FIXME: this is only needed for IndexerMain, find a better solution.
+llvm::Expected<Symbol> symbolFromYAML(llvm::yaml::Input &);
+
+// Build an in-memory static index from an index file.
+// The size should be relatively small, so data can be managed in memory.
+std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef Filename,
+                                       llvm::ArrayRef<std::string> URISchemes,
+                                       bool UseDex = true);
+
 } // namespace clangd
 } // namespace clang
 
Index: clangd/index/Serialization.cpp
===================================================================
--- clangd/index/Serialization.cpp
+++ clangd/index/Serialization.cpp
@@ -9,6 +9,8 @@
 #include "Serialization.h"
 #include "Index.h"
 #include "RIFF.h"
+#include "Trace.h"
+#include "dex/Dex.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
@@ -294,8 +296,6 @@
   return Sym;
 }
 
-} // namespace
-
 // FILE ENCODING
 // A file is a RIFF chunk with type 'CdIx'.
 // It contains the sections:
@@ -308,7 +308,7 @@
 // data. Later we may want to support some backward compatibility.
 constexpr static uint32_t Version = 4;
 
-Expected<IndexFileIn> readIndexFile(StringRef Data) {
+Expected<IndexFileIn> readRIFF(StringRef Data) {
   auto RIFF = riff::readFile(Data);
   if (!RIFF)
     return RIFF.takeError();
@@ -343,7 +343,7 @@
   return std::move(Result);
 }
 
-raw_ostream &operator<<(raw_ostream &OS, const IndexFileOut &Data) {
+void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
   assert(Data.Symbols && "An index file without symbols makes no sense!");
   riff::File RIFF;
   RIFF.Type = riff::fourCC("CdIx");
@@ -377,7 +377,64 @@
   }
   RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
 
-  return OS << RIFF;
+  OS << RIFF;
+}
+
+} // namespace
+
+// Defined in YAMLSerialization.cpp.
+void writeYAML(const IndexFileOut &, raw_ostream &);
+Expected<IndexFileIn> readYAML(StringRef);
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
+  switch (O.Format) {
+    case IndexFileFormat::RIFF:
+      writeYAML(O, OS);
+      break;
+    case IndexFileFormat::YAML:
+      writeRIFF(O, OS);
+      break;
+  }
+  return OS;
+}
+
+Expected<IndexFileIn> readIndexFile(StringRef Data) {
+  if (Data.startswith("RIFF")) {
+    return readRIFF(Data);
+  } else if (auto YAMLContents = readYAML(Data)) {
+    return std::move(*YAMLContents);
+  } else {
+    return makeError("Not a RIFF file and failed to parse as YAML: " +
+                     llvm::toString(YAMLContents.takeError()));
+  }
+}
+
+std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
+                                       llvm::ArrayRef<std::string> URISchemes,
+                                       bool UseDex) {
+  trace::Span OverallTracer("LoadIndex");
+  auto Buffer = MemoryBuffer::getFile(SymbolFilename);
+  if (!Buffer) {
+    llvm::errs() << "Can't open " << SymbolFilename << "\n";
+    return nullptr;
+  }
+
+  SymbolSlab Symbols;
+  RefSlab Refs;
+  {
+    trace::Span Tracer("ParseIndex");
+    if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
+      if (I->Symbols)
+        Symbols = std::move(*I->Symbols);
+    } else {
+      llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n";
+      return nullptr;
+    }
+  }
+
+  trace::Span Tracer("BuildIndex");
+  return UseDex ? dex::Dex::build(std::move(Symbols), URISchemes)
+                : MemIndex::build(std::move(Symbols), std::move(Refs));
 }
 
 } // namespace clangd
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -44,7 +44,7 @@
   index/Merge.cpp
   index/Serialization.cpp
   index/SymbolCollector.cpp
-  index/SymbolYAML.cpp
+  index/YAMLSerialization.cpp
 
   index/dex/Dex.cpp
   index/dex/Iterator.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to