================ @@ -49,6 +54,185 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry) namespace { +/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations +/// to aid debugging and bug minimization. It implements ASTConsumer and +/// ASTDeserializationListener, so that an object of +/// DeserializedDeclsLineRangePrinter registers as its own listener. The +/// ASTDeserializationListener interface provides the DeclRead callback that we +/// use to collect the deserialized Decls. Note that printing or otherwise +/// processing them as this point is dangerous, since that could trigger +/// additional deserialization and crash compilation. Therefore, we process the +/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a +/// safe point, since we know that by this point all the Decls needed by the +/// compiler frontend have been deserialized. In case our processing causes +/// further deserialization, DeclRead from the listener might be called again. +/// However, at that point we don't accept any more Decls for processing. +class DeserializedDeclsLineRangePrinter : public ASTConsumer, + ASTDeserializationListener { +public: + explicit DeserializedDeclsLineRangePrinter( + SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS) + : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {} + + ASTDeserializationListener *GetASTDeserializationListener() override { + return this; + } + + void DeclRead(GlobalDeclID ID, const Decl *D) override { + if (!IsCollectingDecls) + return; + if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) || + isa<NamespaceDecl>(D)) { + // These decls cover a lot of nested declarations that might not be used, + // reducing the granularity and making the output less useful. + return; + } + if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) { + // We choose to work at namespace level to reduce complexity and the + // number of cases we care about. + return; + } + PendingDecls.push_back(D); + } + + struct Position { + unsigned Line; + unsigned Column; + + bool operator<(const Position &other) const { + if (Line < other.Line) + return true; + if (Line > other.Line) + return false; + return Column < other.Column; + } + + static Position GetBeginSpelling(const SourceManager &SM, + const CharSourceRange &R) { + SourceLocation Begin = R.getBegin(); + return {SM.getSpellingLineNumber(Begin), + SM.getSpellingColumnNumber(Begin)}; + } + + static Position GetEndSpelling(const SourceManager &SM, + const CharSourceRange &Range, + const LangOptions &LangOpts) { + // For token ranges, compute end location for end character of the range. + // The end location of returned range is exclusive. + CharSourceRange R = Lexer::getAsCharRange(Range, SM, LangOpts); + SourceLocation End = R.getEnd(); + // Relex the token past the end location of the last token in the source + // range. If it's a semicolon, advance the location by one token. + Token PossiblySemi; + Lexer::getRawToken(End, PossiblySemi, SM, LangOpts, true); + if (PossiblySemi.is(tok::semi)) + End = End.getLocWithOffset(1); + return {SM.getSpellingLineNumber(End), SM.getSpellingColumnNumber(End)}; + } + }; + + struct RequiredRanges { + StringRef Filename; + std::vector<std::pair<Position, Position>> FromTo; + }; + void HandleTranslationUnit(ASTContext &Context) override { + assert(IsCollectingDecls && "HandleTranslationUnit called twice?"); + IsCollectingDecls = false; + + // Merge ranges in each of the files. + struct FileData { + std::vector<std::pair<Position, Position>> FromTo; + OptionalFileEntryRef Ref; + }; + llvm::DenseMap<const FileEntry *, FileData> FileToLines; + for (const Decl *D : PendingDecls) { + CharSourceRange R = SM.getExpansionRange(D->getSourceRange()); + if (!R.isValid()) + continue; + + auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin())); + if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) { + // Such cases are rare and difficult to handle. + continue; + } + + auto &Data = FileToLines[F]; + if (!Data.Ref) + Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin())); + Data.FromTo.push_back( + {Position::GetBeginSpelling(SM, R), + Position::GetEndSpelling(SM, R, D->getLangOpts())}); + } + + // To simplify output, merge consecutive and intersecting ranges. + std::vector<RequiredRanges> Result; + for (auto &[F, Data] : FileToLines) { + auto &FromTo = Data.FromTo; + assert(!FromTo.empty()); + + if (!Data.Ref) + continue; + + llvm::sort(FromTo); + + std::vector<std::pair<Position, Position>> MergedRanges; + MergedRanges.push_back(FromTo.front()); + for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) { + if (MergedRanges.back().second < It->first) { + MergedRanges.push_back(*It); + continue; + } + if (MergedRanges.back().second < It->second) + MergedRanges.back().second = It->second; + } + Result.push_back({Data.Ref->getName(), MergedRanges}); + } + printJson(Result); + } + +private: + std::vector<const Decl *> PendingDecls; + bool IsCollectingDecls = true; + const SourceManager &SM; + std::unique_ptr<llvm::raw_ostream> OS; + + void printJson(llvm::ArrayRef<RequiredRanges> Result) { + *OS << "{\n"; ---------------- VitaNuo wrote:
I've run some tests, and the maximum I'm getting is 4.4Mb of JSON for a very large compilation. I think this should be palatable. For a smaller compilation, it's often <1Mb. https://github.com/llvm/llvm-project/pull/133910 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits