sammccall created this revision.
sammccall added a reviewer: kbobyrev.
Herald added subscribers: cfe-commits, kadircet, arphaman, jkorous, MaskRay,
ioeric, ilya-biryukov, mgorny.
Interface is in one file, implementation in two as they have little in common.
A couple of ad-hoc YAML functions left exposed:
- symbol -> YAML I expect to keep for tools like dexp
- YAML -> symbol is used for the MR-style indexer, I think we can eliminate
this (merge-on-the-fly, else use a different serialization)
Repository:
rCTE Clang Tools Extra
https://reviews.llvm.org/D52453
Files:
clangd/CMakeLists.txt
clangd/index/Serialization.cpp
clangd/index/Serialization.h
clangd/index/SymbolYAML.cpp
clangd/index/SymbolYAML.h
clangd/index/YAMLSerialization.cpp
clangd/index/dex/dexp/Dexp.cpp
clangd/indexer/IndexerMain.cpp
clangd/tool/ClangdMain.cpp
unittests/clangd/SerializationTests.cpp
unittests/clangd/SymbolCollectorTests.cpp
Index: unittests/clangd/SymbolCollectorTests.cpp
===================================================================
--- unittests/clangd/SymbolCollectorTests.cpp
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -11,7 +11,6 @@
#include "TestFS.h"
#include "TestTU.h"
#include "index/SymbolCollector.h"
-#include "index/SymbolYAML.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/VirtualFileSystem.h"
Index: unittests/clangd/SerializationTests.cpp
===================================================================
--- unittests/clangd/SerializationTests.cpp
+++ unittests/clangd/SerializationTests.cpp
@@ -9,7 +9,6 @@
#include "index/Index.h"
#include "index/Serialization.h"
-#include "index/SymbolYAML.h"
#include "llvm/Support/ScopedPrinter.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -20,7 +19,7 @@
namespace clangd {
namespace {
-const char *YAML1 = R"(
+const char *YAML = R"(
---
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
Name: 'Foo1'
@@ -46,9 +45,6 @@
- Header: 'include2'
References: 3
...
-)";
-
-const char *YAML2 = R"(
---
ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
Name: 'Foo2'
@@ -76,7 +72,10 @@
}
TEST(SerializationTest, YAMLConversions) {
- auto Symbols1 = symbolsFromYAML(YAML1);
+ auto In = readIndexFile(YAML);
+ EXPECT_TRUE(bool(In)) << In.takeError();
+#if 0
+ auto Symbols = symbolsFromYAML(YAML);
ASSERT_EQ(Symbols1.size(), 1u);
const auto &Sym1 = *Symbols1.begin();
EXPECT_THAT(Sym1, QName("clang::Foo1"));
@@ -136,6 +135,7 @@
// Assert the YAML serializations match, for nice comparisons and diffs.
EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
UnorderedElementsAreArray(YAMLFromSymbols(Slab)));
+#endif
}
} // namespace
Index: clangd/tool/ClangdMain.cpp
===================================================================
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -12,7 +12,7 @@
#include "Path.h"
#include "RIFF.h"
#include "Trace.h"
-#include "index/SymbolYAML.h"
+#include "index/Serialization.h"
#include "clang/Basic/Version.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
Index: clangd/indexer/IndexerMain.cpp
===================================================================
--- clangd/indexer/IndexerMain.cpp
+++ clangd/indexer/IndexerMain.cpp
@@ -18,7 +18,7 @@
#include "index/Merge.h"
#include "index/Serialization.h"
#include "index/SymbolCollector.h"
-#include "index/SymbolYAML.h"
+#include "index/Serialization.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Index/IndexDataConsumer.h"
@@ -60,12 +60,13 @@
"MapReduce."),
llvm::cl::init(true), llvm::cl::Hidden);
-enum IndexFormat { YAML, Binary };
-static llvm::cl::opt<IndexFormat> Format(
- "format", llvm::cl::desc("Format of the index to be written"),
- llvm::cl::values(clEnumValN(YAML, "yaml", "human-readable YAML format"),
- clEnumValN(Binary, "binary", "binary RIFF format")),
- llvm::cl::init(YAML));
+static llvm::cl::opt<IndexFileFormat>
+ Format("format", llvm::cl::desc("Format of the index to be written"),
+ llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
+ "human-readable YAML format"),
+ clEnumValN(IndexFileFormat::RIFF, "binary",
+ "binary RIFF format")),
+ llvm::cl::init(IndexFileFormat::YAML));
/// Responsible for aggregating symbols from each processed file and producing
/// the final results. All methods in this class must be thread-safe,
@@ -162,16 +163,15 @@
void consumeSymbols(SymbolSlab Symbols) override {
for (const auto &Sym : Symbols)
- Executor.getExecutionContext()->reportResult(Sym.ID.str(),
- SymbolToYAML(Sym));
+ Executor.getExecutionContext()->reportResult(Sym.ID.str(), toYAML(Sym));
}
SymbolSlab mergeResults() override {
SymbolSlab::Builder UniqueSymbols;
Executor.getToolResults()->forEachResult(
[&](llvm::StringRef Key, llvm::StringRef Value) {
llvm::yaml::Input Yin(Value);
- auto Sym = clang::clangd::SymbolFromYAML(Yin);
+ auto Sym = cantFail(symbolFromYAML(Yin));
auto ID = cantFail(clang::clangd::SymbolID::fromStr(Key));
if (const auto *Existing = UniqueSymbols.find(ID))
UniqueSymbols.insert(mergeSymbol(*Existing, Sym));
@@ -270,15 +270,9 @@
// Reduce phase: combine symbols with the same IDs.
auto UniqueSymbols = Consumer->mergeResults();
// Output phase: emit result symbols.
- switch (clang::clangd::Format) {
- case clang::clangd::IndexFormat::YAML:
- SymbolsToYAML(UniqueSymbols, llvm::outs());
- break;
- case clang::clangd::IndexFormat::Binary: {
- clang::clangd::IndexFileOut Out;
- Out.Symbols = &UniqueSymbols;
- llvm::outs() << Out;
- }
- }
+ clang::clangd::IndexFileOut Out;
+ Out.Symbols = &UniqueSymbols;
+ Out.Format = clang::clangd::Format;
+ llvm::outs() << Out;
return 0;
}
Index: clangd/index/dex/dexp/Dexp.cpp
===================================================================
--- clangd/index/dex/dexp/Dexp.cpp
+++ clangd/index/dex/dexp/Dexp.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "../../../index/SymbolYAML.h"
+#include "../../Serialization.h"
#include "../Dex.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -155,7 +155,7 @@
bool FoundSymbol = false;
Index->lookup(Request, [&](const Symbol &Sym) {
FoundSymbol = true;
- llvm::outs() << SymbolToYAML(Sym);
+ llvm::outs() << toYAML(Sym);
});
if (!FoundSymbol)
llvm::outs() << "not found\n";
Index: clangd/index/YAMLSerialization.cpp
===================================================================
--- clangd/index/YAMLSerialization.cpp
+++ clangd/index/YAMLSerialization.cpp
@@ -7,19 +7,18 @@
//
//===----------------------------------------------------------------------===//
-#include "SymbolYAML.h"
-#include "Index.h"
#include "Serialization.h"
+#include "Index.h"
#include "Trace.h"
#include "dex/Dex.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
#include <cstdint>
-LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(clang::clangd::Symbol)
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences)
namespace llvm {
@@ -186,65 +185,45 @@
namespace clang {
namespace clangd {
-SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent) {
- llvm::yaml::Input Yin(YAMLContent);
- std::vector<Symbol> S;
- Yin >> S;
-
- SymbolSlab::Builder Syms;
- for (auto &Sym : S)
- Syms.insert(Sym);
- return std::move(Syms).build();
-}
-
-Symbol SymbolFromYAML(llvm::yaml::Input &Input) {
- Symbol S;
- Input >> S;
- return S;
-}
-
-void SymbolsToYAML(const SymbolSlab &Symbols, llvm::raw_ostream &OS) {
+void writeYAML(const IndexFileOut &O, raw_ostream &OS) {
llvm::yaml::Output Yout(OS);
- for (Symbol S : Symbols) // copy: Yout<< requires mutability.
- Yout << S;
+ for (Symbol Sym : *O.Symbols) // copy: Yout<< requires mutability.
+ Yout << Sym;
}
-std::string SymbolToYAML(Symbol Sym) {
- std::string Str;
- llvm::raw_string_ostream OS(Str);
- llvm::yaml::Output Yout(OS);
- Yout << Sym;
- return OS.str();
+Expected<IndexFileIn> readYAML(StringRef Data) {
+ SymbolSlab::Builder Symbols;
+ llvm::yaml::Input Yin(Data);
+ do {
+ Symbol S;
+ Yin >> S;
+ if (Yin.error())
+ return llvm::errorCodeToError(Yin.error());
+ Symbols.insert(S);
+ } while(Yin.nextDocument());
+
+ IndexFileIn Result;
+ Result.Symbols.emplace(std::move(Symbols).build());
+ return std::move(Result);
}
-std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
- llvm::ArrayRef<std::string> URISchemes,
- bool UseDex) {
- trace::Span OverallTracer("LoadIndex");
- auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
- if (!Buffer) {
- llvm::errs() << "Can't open " << SymbolFilename << "\n";
- return nullptr;
- }
- StringRef Data = Buffer->get()->getBuffer();
-
- llvm::Optional<SymbolSlab> Slab;
- if (Data.startswith("RIFF")) { // Magic for binary index file.
- trace::Span Tracer("ParseRIFF");
- if (auto RIFF = readIndexFile(Data))
- Slab = std::move(RIFF->Symbols);
- else
- llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n";
- } else {
- trace::Span Tracer("ParseYAML");
- Slab = symbolsFromYAML(Data);
+std::string toYAML(const Symbol& S) {
+ std::string Buf;
+ {
+ llvm::raw_string_ostream OS(Buf);
+ llvm::yaml::Output Yout(OS);
+ Symbol Sym = S; // copy: Yout<< requires mutability.
+ OS << Sym;
}
+ return Buf;
+}
- if (!Slab)
- return nullptr;
- trace::Span Tracer("BuildIndex");
- return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes)
- : MemIndex::build(std::move(*Slab), RefSlab());
+Expected<Symbol> symbolFromYAML(llvm::yaml::Input &Yin) {
+ Symbol S;
+ Yin >> S;
+ if (Yin.error())
+ return llvm::errorCodeToError(Yin.error());
+ return S;
}
} // namespace clangd
Index: clangd/index/SymbolYAML.h
===================================================================
--- clangd/index/SymbolYAML.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===--- SymbolYAML.h --------------------------------------------*- C++-*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// SymbolYAML provides facilities to convert Symbol to YAML, and vice versa.
-// The YAML format of Symbol is designed for simplicity and experiment, but
-// isn't a suitable/efficient store.
-//
-// This is for **experimental** only. Don't use it in the production code.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H
-
-#include "Index.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/YAMLTraits.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace clang {
-namespace clangd {
-
-// Read symbols from a YAML-format string.
-SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent);
-
-// Read one symbol from a YAML-stream.
-// The returned symbol is backed by Input.
-Symbol SymbolFromYAML(llvm::yaml::Input &Input);
-
-// Convert a single symbol to YAML-format string.
-// The YAML result is safe to concatenate.
-std::string SymbolToYAML(Symbol Sym);
-
-// Convert symbols to a YAML-format string.
-// The YAML result is safe to concatenate if you have multiple symbol slabs.
-void SymbolsToYAML(const SymbolSlab &Symbols, llvm::raw_ostream &OS);
-
-// Build an in-memory static index for global symbols from a symbol file.
-// The size of global symbols should be relatively small, so that all symbols
-// can be managed in memory.
-std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
- llvm::ArrayRef<std::string> URISchemes,
- bool UseDex = true);
-
-} // namespace clangd
-} // namespace clang
-
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H
Index: clangd/index/Serialization.h
===================================================================
--- clangd/index/Serialization.h
+++ clangd/index/Serialization.h
@@ -7,41 +7,68 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides a compact binary serialization of indexed symbols.
+// This file provides serialization of indexed symbols and other data.
//
-// It writes two sections:
+// It writes sections:
+// - metadata such as version info
// - a string table (which is compressed)
// - lists of encoded symbols
//
-// The format has a simple versioning scheme: the version is embedded in the
-// data and non-current versions are rejected when reading.
+// The format has a simple versioning scheme: the format version number is
+// written in the file and non-current versions are rejected when reading.
+//
+// Human-readable YAML serialization is also supported, and recommended for
+// debugging and experiments only.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
#include "Index.h"
#include "llvm/Support/Error.h"
+namespace llvm {
+namespace yaml {
+class Input;
+}
+} // namespace llvm
namespace clang {
namespace clangd {
+enum class IndexFileFormat {
+ RIFF, // Versioned binary format, suitable for production use.
+ YAML, // Human-readable format, suitable for experiments and debugging.
+};
+
// Specifies the contents of an index file to be written.
struct IndexFileOut {
const SymbolSlab *Symbols;
// TODO: Support serializing symbol occurrences.
// TODO: Support serializing Dex posting lists.
+ IndexFileFormat Format = IndexFileFormat::RIFF;
};
// Serializes an index file. (This is a RIFF container chunk).
-llvm::raw_ostream &operator<<(llvm::raw_ostream &, const IndexFileOut &);
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
// Holds the contents of an index file that was read.
struct IndexFileIn {
llvm::Optional<SymbolSlab> Symbols;
+ IndexFileFormat Format;
};
// Parse an index file. The input must be a RIFF container chunk.
llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
+std::string toYAML(const Symbol&);
+// Returned symbol is backed by the YAML input.
+// FIXME: this is only needed for IndexerMain, find a better solution.
+llvm::Expected<Symbol> symbolFromYAML(llvm::yaml::Input &);
+
+// Build an in-memory static index from an index file.
+// The size should be relatively small, so data can be managed in memory.
+std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef Filename,
+ llvm::ArrayRef<std::string> URISchemes,
+ bool UseDex = true);
+
} // namespace clangd
} // namespace clang
Index: clangd/index/Serialization.cpp
===================================================================
--- clangd/index/Serialization.cpp
+++ clangd/index/Serialization.cpp
@@ -9,6 +9,8 @@
#include "Serialization.h"
#include "Index.h"
#include "RIFF.h"
+#include "Trace.h"
+#include "dex/Dex.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
@@ -294,8 +296,6 @@
return Sym;
}
-} // namespace
-
// FILE ENCODING
// A file is a RIFF chunk with type 'CdIx'.
// It contains the sections:
@@ -308,7 +308,7 @@
// data. Later we may want to support some backward compatibility.
constexpr static uint32_t Version = 4;
-Expected<IndexFileIn> readIndexFile(StringRef Data) {
+Expected<IndexFileIn> readRIFF(StringRef Data) {
auto RIFF = riff::readFile(Data);
if (!RIFF)
return RIFF.takeError();
@@ -343,7 +343,7 @@
return std::move(Result);
}
-raw_ostream &operator<<(raw_ostream &OS, const IndexFileOut &Data) {
+void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
assert(Data.Symbols && "An index file without symbols makes no sense!");
riff::File RIFF;
RIFF.Type = riff::fourCC("CdIx");
@@ -377,7 +377,64 @@
}
RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
- return OS << RIFF;
+ OS << RIFF;
+}
+
+} // namespace
+
+// Defined in YAMLSerialization.cpp.
+void writeYAML(const IndexFileOut &, raw_ostream &);
+Expected<IndexFileIn> readYAML(StringRef);
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
+ switch (O.Format) {
+ case IndexFileFormat::RIFF:
+ writeYAML(O, OS);
+ break;
+ case IndexFileFormat::YAML:
+ writeRIFF(O, OS);
+ break;
+ }
+ return OS;
+}
+
+Expected<IndexFileIn> readIndexFile(StringRef Data) {
+ if (Data.startswith("RIFF")) {
+ return readRIFF(Data);
+ } else if (auto YAMLContents = readYAML(Data)) {
+ return std::move(*YAMLContents);
+ } else {
+ return makeError("Not a RIFF file and failed to parse as YAML: " +
+ llvm::toString(YAMLContents.takeError()));
+ }
+}
+
+std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
+ llvm::ArrayRef<std::string> URISchemes,
+ bool UseDex) {
+ trace::Span OverallTracer("LoadIndex");
+ auto Buffer = MemoryBuffer::getFile(SymbolFilename);
+ if (!Buffer) {
+ llvm::errs() << "Can't open " << SymbolFilename << "\n";
+ return nullptr;
+ }
+
+ SymbolSlab Symbols;
+ RefSlab Refs;
+ {
+ trace::Span Tracer("ParseIndex");
+ if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
+ if (I->Symbols)
+ Symbols = std::move(*I->Symbols);
+ } else {
+ llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n";
+ return nullptr;
+ }
+ }
+
+ trace::Span Tracer("BuildIndex");
+ return UseDex ? dex::Dex::build(std::move(Symbols), URISchemes)
+ : MemIndex::build(std::move(Symbols), std::move(Refs));
}
} // namespace clangd
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -44,7 +44,7 @@
index/Merge.cpp
index/Serialization.cpp
index/SymbolCollector.cpp
- index/SymbolYAML.cpp
+ index/YAMLSerialization.cpp
index/dex/Dex.cpp
index/dex/Iterator.cpp
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits