llvmbot wrote: @llvm/pr-subscribers-clang
<details> <summary>Changes</summary> Create a clang tool to merge all the JSON symbolgraph emited by --emit-symbol-graph or -extract-api options into one unified JSON symbolgraph file. Differential Revision: https://reviews.llvm.org/D158646 -- Patch is 127.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/65894.diff 16 Files Affected: - (modified) clang-tools-extra/CMakeLists.txt (+1) - (added) clang-tools-extra/clang-symbolgraph-merger/CMakeLists.txt (+3) - (added) clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraph.h (+48) - (added) clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraphMerger.h (+45) - (added) clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraphVisitor.h (+68) - (added) clang-tools-extra/clang-symbolgraph-merger/lib/CMakeLists.txt (+14) - (added) clang-tools-extra/clang-symbolgraph-merger/lib/SymbolGraph.cpp (+243) - (added) clang-tools-extra/clang-symbolgraph-merger/lib/SymbolGraphMerger.cpp (+290) - (added) clang-tools-extra/clang-symbolgraph-merger/tool/CMakeLists.txt (+13) - (added) clang-tools-extra/clang-symbolgraph-merger/tool/SymbolGraphMergerMain.cpp (+125) - (modified) clang/include/clang/ExtractAPI/API.h (+212-151) - (modified) clang/include/clang/ExtractAPI/AvailabilityInfo.h (+6) - (modified) clang/include/clang/ExtractAPI/ExtractAPIVisitor.h (+43-135) - (modified) clang/lib/ExtractAPI/API.cpp (+46-44) - (modified) clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp (+5-5) - (modified) clang/tools/libclang/CXExtractAPI.cpp (+6-3) <pre> diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt index 6a3f741721ee6c7..a4052e0894076ef 100644 --- a/clang-tools-extra/CMakeLists.txt +++ b/clang-tools-extra/CMakeLists.txt @@ -13,6 +13,7 @@ if(CLANG_INCLUDE_TESTS) endif() endif() +add_subdirectory(clang-symbolgraph-merger) add_subdirectory(clang-apply-replacements) add_subdirectory(clang-reorder-fields) add_subdirectory(modularize) diff --git a/clang-tools-extra/clang-symbolgraph-merger/CMakeLists.txt b/clang-tools-extra/clang-symbolgraph-merger/CMakeLists.txt new file mode 100644 index 000000000000000..a071a8a11693337 --- /dev/null +++ b/clang-tools-extra/clang-symbolgraph-merger/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(include) +add_subdirectory(lib) +add_subdirectory(tool) diff --git a/clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraph.h b/clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraph.h new file mode 100755 index 000000000000000..a613f833ffad73b --- /dev/null +++ b/clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraph.h @@ -0,0 +1,48 @@ +#ifndef SYMBOLGRAPH_H +#define SYMBOLGRAPH_H + +#include "clang/Basic/LangStandard.h" +#include "clang/ExtractAPI/API.h" +#include "clang/ExtractAPI/AvailabilityInfo.h" +#include "clang/ExtractAPI/DeclarationFragments.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" +#include <memory> +#include <vector> + +namespace sgmerger { + +// see https://github.com/apple/swift-docc-symbolkit/bdob/main/openapi.yaml +struct SymbolGraph { + + struct Symbol { + Symbol(const llvm::json::Object &SymbolObj); + + llvm::json::Object SymbolObj; + std::string AccessLevel; + clang::extractapi::APIRecord::RecordKind Kind; + clang::extractapi::DeclarationFragments DeclFragments; + clang::extractapi::FunctionSignature FunctionSign; + std::string Name; + std::string USR; + clang::extractapi::AvailabilitySet Availabilities; + clang::extractapi::DocComment Comments; + clang::extractapi::RecordLocation Location; + clang::extractapi::DeclarationFragments SubHeadings; + + // underlying type in case of Typedef + clang::extractapi::SymbolReference UnderLyingType; + }; + + SymbolGraph(const llvm::StringRef JSON); + llvm::json::Object SymbolGraphObject; + llvm::json::Object Metadata; + llvm::json::Object Module; + std::vector<Symbol> Symbols; + llvm::json::Array Relationships; +}; + +} // namespace sgmerger + +#endif /* SYMBOLGRAPH_H */ diff --git a/clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraphMerger.h b/clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraphMerger.h new file mode 100755 index 000000000000000..179cadafd877825 --- /dev/null +++ b/clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraphMerger.h @@ -0,0 +1,45 @@ +#ifndef SYMBOLGRAPHMERGER_H +#define SYMBOLGRAPHMERGER_H + +#include "clang-symbolgraph-merger/SymbolGraph.h" +#include "clang-symbolgraph-merger/SymbolGraphVisitor.h" +#include "clang/Basic/LangStandard.h" +#include "clang/ExtractAPI/API.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/TargetParser/Triple.h" +#include <memory> + +namespace sgmerger { + +using SymbolMap = llvm::DenseMap<llvm::StringRef, const SymbolGraph::Symbol *>; + +class SymbolGraphMerger : public SymbolGraphVisitor<SymbolGraphMerger> { +public: + SymbolGraphMerger(const clang::SmallVector<SymbolGraph> &SymbolGraphs, + const std::string &ProductName = "") + : ProductName(ProductName), Lang(clang::Language::Unknown), + SymbolGraphs(SymbolGraphs) {} + bool merge(); + bool visitMetadata(const llvm::json::Object &Metadata); + bool visitModule(const llvm::json::Object &Module); + bool visitSymbol(const SymbolGraph::Symbol &Symbol); + bool visitRelationship(const llvm::json::Object &Relationship); + +private: + std::string Generator; + + // stuff required to construct the APISet + std::string ProductName; + llvm::Triple Target; + clang::Language Lang; + + SymbolMap PendingSymbols; + SymbolMap VisitedSymbols; + + const clang::SmallVector<SymbolGraph> &SymbolGraphs; +}; + +} // namespace sgmerger + +#endif /* SYMBOLGRAPHMERGER_H */ diff --git a/clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraphVisitor.h b/clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraphVisitor.h new file mode 100755 index 000000000000000..6e2042784147a62 --- /dev/null +++ b/clang-tools-extra/clang-symbolgraph-merger/include/clang-symbolgraph-merger/SymbolGraphVisitor.h @@ -0,0 +1,68 @@ +#ifndef SYMBOLGRAPHVISITOR_H +#define SYMBOLGRAPHVISITOR_H + +#include "clang-symbolgraph-merger/SymbolGraph.h" +#include "clang/ExtractAPI/API.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" +#include <memory> + +namespace sgmerger { + +// Visits a symbol graph obbect and record the extracted info to API +template <typename Derived> class SymbolGraphVisitor { +public: + bool traverseSymbolGraph(const SymbolGraph &SG) { + bool Success = true; + Success = (getDerived()->visitMetadata(SG.Metadata) && + getDerived()->visitModule(SG.Module) && + getDerived()->traverseSymbols(SG.Symbols) && + getDerived()->traverseRelationships(SG.Relationships)); + + return Success; + } + + bool traverseSymbols(const std::vector<SymbolGraph::Symbol> &Symbols) { + bool Success = true; + for (const auto &Symbol : Symbols) + Success = getDerived()->visitSymbol(Symbol); + return Success; + } + + bool traverseRelationships(const llvm::json::Array &Relationships) { + bool Success = true; + for (const auto &RelValue : Relationships) { + if (const auto *RelObj = RelValue.getAsObject()) + Success = getDerived()->visitRelationship(*RelObj); + } + return Success; + } + + bool visitMetadata(const llvm::json::Object &Metadata); + bool visitModule(const llvm::json::Object &Module); + bool visitSymbol(const SymbolGraph::Symbol &Symbol); + bool visitRelationship(const llvm::json::Object &Relationship); + + std::unique_ptr<clang::extractapi::APISet> getAPISet() { + return std::move(API); + } + +protected: + std::unique_ptr<clang::extractapi::APISet> API; + +public: + SymbolGraphVisitor(const SymbolGraphVisitor &) = delete; + SymbolGraphVisitor(SymbolGraphVisitor &&) = delete; + SymbolGraphVisitor &operator=(const SymbolGraphVisitor &) = delete; + SymbolGraphVisitor &operator=(SymbolGraphVisitor &&) = delete; + +protected: + SymbolGraphVisitor() : API(nullptr) {} + ~SymbolGraphVisitor() = default; + + Derived *getDerived() { return static_cast<Derived *>(this); }; +}; + +} // namespace sgmerger + +#endif /* SYMBOLGRAPHVISITOR_H */ diff --git a/clang-tools-extra/clang-symbolgraph-merger/lib/CMakeLists.txt b/clang-tools-extra/clang-symbolgraph-merger/lib/CMakeLists.txt new file mode 100755 index 000000000000000..5f0bcc65c4762e2 --- /dev/null +++ b/clang-tools-extra/clang-symbolgraph-merger/lib/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_library(clangSymbolGraphMerger + SymbolGraphMerger.cpp + SymbolGraph.cpp + ) + +clang_target_link_libraries(clangSymbolGraphMerger + PRIVATE + clangBasic + clangToolingCore + clangToolingInclusions + clangExtractAPI +) diff --git a/clang-tools-extra/clang-symbolgraph-merger/lib/SymbolGraph.cpp b/clang-tools-extra/clang-symbolgraph-merger/lib/SymbolGraph.cpp new file mode 100755 index 000000000000000..030a9bda99db08e --- /dev/null +++ b/clang-tools-extra/clang-symbolgraph-merger/lib/SymbolGraph.cpp @@ -0,0 +1,243 @@ +#include "clang-symbolgraph-merger/SymbolGraph.h" +#include "clang/ExtractAPI/API.h" +#include "clang/ExtractAPI/AvailabilityInfo.h" +#include "clang/ExtractAPI/DeclarationFragments.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/VersionTuple.h" +#include <cassert> +#include <cstring> +#include <memory> +#include <vector> + +using namespace sgmerger; +using namespace llvm; +using namespace llvm::json; +using namespace clang::extractapi; + +namespace { + +APIRecord::RecordKind getSymbolKind(const Object &Kind) { + + if (auto Identifier = Kind.getString("identifier")) { + // Remove danguage prefix + auto Id = Identifier->split('.').second; + if (Id.equals("func")) + return APIRecord::RK_GlobalFunction; + if (Id.equals("var")) + return APIRecord::RK_GlobalVariable; + if (Id.equals("enum.case")) + return APIRecord::RK_EnumConstant; + if (Id.equals("enum")) + return APIRecord::RK_Enum; + if (Id.equals("property")) + return APIRecord::RK_StructField; + if (Id.equals("struct")) + return APIRecord::RK_Struct; + if (Id.equals("ivar")) + return APIRecord::RK_ObjCIvar; + if (Id.equals("method")) + return APIRecord::RK_ObjCInstanceMethod; + if (Id.equals("type.method")) + return APIRecord::RK_ObjCClassMethod; + if (Id.equals("property")) + return APIRecord::RK_ObjCInstanceProperty; + if (Id.equals("type.property")) + return APIRecord::RK_ObjCClassProperty; + if (Id.equals("class")) + return APIRecord::RK_ObjCInterface; + if (Id.equals("protocod")) + return APIRecord::RK_ObjCProtocol; + if (Id.equals("macro")) + return APIRecord::RK_MacroDefinition; + if (Id.equals("typealias")) + return APIRecord::RK_Typedef; + } + return APIRecord::RK_Unknown; +} + +VersionTuple parseVersionTupleFromJSON(const Object *VTObj) { + auto Major = VTObj->getInteger("major").value_or(0); + auto Minor = VTObj->getInteger("minor").value_or(0); + auto Patch = VTObj->getInteger("patch").value_or(0); + return VersionTuple(Major, Minor, Patch); +} + +RecordLocation parseSourcePositionFromJSON(const Object *PosObj, + std::string Filename = "") { + assert(PosObj); + unsigned Line = PosObj->getInteger("line").value_or(0); + unsigned Col = PosObj->getInteger("character").value_or(0); + return RecordLocation(Line, Col, Filename); +} + +RecordLocation parseRecordLocationFromJSON(const Object *LocObj) { + assert(LocObj); + + std::string Filename(LocObj->getString("uri").value_or("")); + // extract file name from URI + std::string URIScheme = "file://"; + if (Filename.find(URIScheme) == 0) + Filename.erase(0, URIScheme.length()); + + const auto *PosObj = LocObj->getObject("position"); + + return parseSourcePositionFromJSON(PosObj, Filename); +} + +DocComment parseCommentsFromJSON(const Object *CommentsObj) { + assert(CommentsObj); + const auto *LinesArray = CommentsObj->getArray("lines"); + DocComment Comments; + if (LinesArray) { + for (auto &LineValue : *LinesArray) { + const auto *LineObj = LineValue.getAsObject(); + auto Text = LineObj->getString("text").value_or(""); + + // parse range + const auto *BeginLocObj = LineObj->getObject("start"); + RecordLocation BeginLoc = parseSourcePositionFromJSON(BeginLocObj); + const auto *EndLocObj = LineObj->getObject("end"); + RecordLocation EndLoc = parseSourcePositionFromJSON(EndLocObj); + Comments.push_back(CommentLine(Text, BeginLoc, EndLoc)); + } + } + return Comments; +} + +AvailabilitySet parseAvailabilitiesFromJSON(const Array *AvailablityArray) { + if (AvailablityArray) { + SmallVector<AvailabilityInfo, 4> AList; + for (auto &AvailablityValue : *AvailablityArray) { + const auto *AvailablityObj = AvailablityValue.getAsObject(); + auto Domain = AvailablityObj->getString("domain").value_or(""); + auto IntroducedVersion = parseVersionTupleFromJSON( + AvailablityObj->getObject("introducedVersion")); + auto ObsoletedVersion = parseVersionTupleFromJSON( + AvailablityObj->getObject("obsoletedVersion")); + auto DeprecatedVersion = parseVersionTupleFromJSON( + AvailablityObj->getObject("deprecatedVersion")); + AList.emplace_back(AvailabilityInfo(Domain, IntroducedVersion, + DeprecatedVersion, ObsoletedVersion, + false)); + } + return AvailabilitySet(AList); + } + return nullptr; +} + +DeclarationFragments parseDeclFragmentsFromJSON(const Array *FragmentsArray) { + DeclarationFragments Fragments; + if (FragmentsArray) { + for (auto &FragmentValue : *FragmentsArray) { + Object FragmentObj = *(FragmentValue.getAsObject()); + auto Spelling = FragmentObj.getString("spelling").value_or(""); + auto FragmentKind = DeclarationFragments::parseFragmentKindFromString( + FragmentObj.getString("kind").value_or("")); + StringRef PreciseIdentifier = + FragmentObj.getString("preciseIdentifier").value_or(""); + Fragments.append(Spelling, FragmentKind, PreciseIdentifier); + } + } + return Fragments; +} + +FunctionSignature parseFunctionSignaturesFromJSON(const Object *SignaturesObj) { + FunctionSignature ParsedSignatures; + if (SignaturesObj) { + // parse return type + const auto *RT = SignaturesObj->getArray("returns"); + ParsedSignatures.setReturnType(parseDeclFragmentsFromJSON(RT)); + + // parse function parameters + if (const auto *ParamArray = SignaturesObj->getArray("parameters")) { + for (auto &Param : *ParamArray) { + auto ParamObj = *(Param.getAsObject()); + auto Name = ParamObj.getString("name").value_or(""); + auto Fragments = parseDeclFragmentsFromJSON( + ParamObj.getArray("declarationFragments")); + ParsedSignatures.addParameter(Name, Fragments); + } + } + } + return ParsedSignatures; +} + +std::vector<SymbolGraph::Symbol> +parseSymbolsFromJSON(const Array *SymbolsArray) { + std::vector<SymbolGraph::Symbol> SymbolsVector; + if (SymbolsArray) { + for (const auto &S : *SymbolsArray) + if (const auto *Symbol = S.getAsObject()) + SymbolsVector.push_back(SymbolGraph::Symbol(*Symbol)); + } + return SymbolsVector; +} + +} // namespace + +SymbolGraph::Symbol::Symbol(const Object &SymbolObject) + : SymbolObj(SymbolObject) { + + AccessLevel = SymbolObj.getString("accessLevel").value_or("unknown"); + Kind = getSymbolKind(*(SymbolObject.getObject("kind"))); + + // parse Doc comments + if (const auto *CommentsArray = SymbolObject.getObject("docComment")) + Comments = parseCommentsFromJSON(CommentsArray); + + // parse Availabilityinfo + if (const auto *AvailabilityArray = SymbolObj.getArray("availability")) + Availabilities = parseAvailabilitiesFromJSON(AvailabilityArray); + + // parse declaration fragments + if (const auto *FragmentsArray = SymbolObj.getArray("declarationFragments")) + DeclFragments = parseDeclFragmentsFromJSON(FragmentsArray); + + // parse function signatures if any + if (const auto *FunctionSignObj = SymbolObj.getObject("functionSignature")) + FunctionSign = parseFunctionSignaturesFromJSON(FunctionSignObj); + + // parse identifier + if (const auto *IDObj = SymbolObj.getObject("identifier")) + USR = IDObj->getString("precise").value_or(""); + + // parse Location + if (const auto *LocObj = SymbolObject.getObject("location")) + Location = parseRecordLocationFromJSON(LocObj); + + // parse name and subheadings. + if (const auto *NamesObj = SymbolObj.getObject("names")) { + Name = NamesObj->getString("title").value_or(""); + if (const auto *SubHObj = NamesObj->getArray("subHeading")) + SubHeadings = parseDeclFragmentsFromJSON(SubHObj); + } + + // parse underlying type in case of Typedef + auto UType = SymbolObject.getString("type"); + if (UType.has_value()) { + auto UTypeUSR = UType.value(); + // FIXME: this is a hacky way for Underlying type to be + // serialized into the final graph. Get someway to extract the + // actual name of the underlying type from USR + UnderLyingType = SymbolReference(" ", UTypeUSR); + } +} + +SymbolGraph::SymbolGraph(const llvm::StringRef JSON) { + Expected<llvm::json::Value> SGValue = llvm::json::parse(JSON); + if (SGValue) { + assert(SGValue && SGValue->kind() == llvm::json::Value::Object); + if (const auto *SGObject = SGValue->getAsObject()) { + SymbolGraphObject = *SGObject; + if (const auto *MetadataObj = SGObject->getObject("metadata")) + Metadata = *MetadataObj; + if (const auto *ModuleObj = SGObject->getObject("module")) + Module = *ModuleObj; + if (const auto *RelArray = SGObject->getArray("relationships")) + Relationships = *RelArray; + + Symbols = parseSymbolsFromJSON(SGObject->getArray("symbols")); + } + } +} diff --git a/clang-tools-extra/clang-symbolgraph-merger/lib/SymbolGraphMerger.cpp b/clang-tools-extra/clang-symbolgraph-merger/lib/SymbolGraphMerger.cpp new file mode 100755 index 000000000000000..71facea3e6ba8bc --- /dev/null +++ b/clang-tools-extra/clang-symbolgraph-merger/lib/SymbolGraphMerger.cpp @@ -0,0 +1,290 @@ +#include "clang-symbolgraph-merger/SymbolGraphMerger.h" +#include "clang/AST/DeclObjC.h" +#include "clang/ExtractAPI/API.h" +#include "clang/ExtractAPI/AvailabilityInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include <memory> + +using namespace llvm; +using namespace llvm::json; +using namespace clang; +using namespace clang::extractapi; +using namespace sgmerger; + +namespace { +ObjCInstanceVariableRecord::AccessControl +getAccessFromString(const StringRef AccessLevel) { + if (AccessLevel.equals("Private")) + return ObjCInstanceVariableRecord::AccessControl::Private; + if (AccessLevel.equals("Protected")) + return ObjCInstanceVariableRecord::AccessControl::Protected; + if (AccessLevel.equals("Public")) + return ObjCInstanceVariableRecord::AccessControl::Public; + if (AccessLevel.equals("Package")) + return ObjCInstanceVariableRecord::AccessControl::Package; + return ObjCInstanceVariableRecord::AccessControl::None; +} + +Language getLanguageFromString(const StringRef LangName) { + if (LangName.equals("c")) + return Language::C; + if (LangName.equals("objective-c")) + return Language::ObjC; + if (LangName.equals("C++")) + return Language::CXX; + + return Language::Unknown; +} + +template <typename Lambda> +bool addWithContainerRecord(APIRecord::RecordKind Kind, APIRecord *TargetRecord, + Lambda Inserter) { + switch (Kind) { + case APIRecord::RK_ObjCInterface: { + if (ObjCInterfaceRecord *Container = + dyn_cast_or_null<ObjCInterfaceRecord>(TargetRecord)) + Inserter(Container); + } break; + case APIRecord::RK_ObjCProtocol: { + if (ObjCProtocolRecord *Container = + dyn_cast_or_null<ObjCProtocolRecord>(TargetRecord)) + Inserter(Container); + } break; + case APIRecord::RK_ObjCCategory: { + if (ObjCCategoryRecord *Container = + dyn_cast_or_null<ObjCCategoryRecord>(TargetRecord)) + Inserter(Container); + } break; + default: + retur... <truncated> </pre> </details> https://github.com/llvm/llvm-project/pull/65894 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits