usaxena95 created this revision. usaxena95 added reviewers: kadircet, sammccall. usaxena95 requested review of this revision. Herald added subscribers: cfe-commits, ilya-biryukov. Herald added a project: clang.
`expandedTokens(SourceRange)` used to do a binary search to get the expanded tokens belonging to a source range. Each binary search uses `isBeforeInTranslationUnit` to order two source locations. This is inherently very slow. By profiling clangd we found out that users like clangd::SelectionTree spend 95% of time in `isBeforeInTranslationUnit`. Also it is worth noting that users of `expandedTokens(SourceRange)` majorly use ranges provided by AST to query this funciton. The ranges provided by AST are token ranges (starting at the beginning of a token and ending at the beginning of another token). Therefore we can avoid the binary search in majority of the cases by maintaining an index of ExpandedToken by their SourceLocations. We still do binary search for ranges which are not token ranges but such instances are quite low. Performance: `~/build/bin/clangd --check=clang/lib/Serialization/ASTReader.cpp` Before: Took 2:10s to complete. Now: Took 1:13s to complete. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D99086 Files: clang/include/clang/Tooling/Syntax/Tokens.h clang/lib/Tooling/Syntax/Tokens.cpp Index: clang/lib/Tooling/Syntax/Tokens.cpp =================================================================== --- clang/lib/Tooling/Syntax/Tokens.cpp +++ clang/lib/Tooling/Syntax/Tokens.cpp @@ -184,6 +184,18 @@ } llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const { + // Quick lookup if `R` is a token range. + // This is a huge win since majority of the users use ranges provided by an + // AST. Ranges in AST are token ranges from expanded token stream. + const auto Begin = ExpandedTokIndex.find(R.getBegin()); + const auto End = ExpandedTokIndex.find(R.getEnd()); + if (Begin != ExpandedTokIndex.end() && End != ExpandedTokIndex.end()) { + // Add 1 to End to make a half-open range. + return {ExpandedTokens.data() + Begin->getSecond(), + ExpandedTokens.data() + End->getSecond() + 1}; + } + // Slow case. Use `isBeforeInTranslationUnit` to binary search for the + // required range. return getTokensCovering(expandedTokens(), R, *SourceMgr); } @@ -638,6 +650,11 @@ assert(!Result.ExpandedTokens.empty()); assert(Result.ExpandedTokens.back().kind() == tok::eof); + // Index ExpandedTokens for faster lookups by SourceLocation. + unsigned ExpandedIndex = 0; + for (const Token &Tok : Result.ExpandedTokens) + Result.ExpandedTokIndex[Tok.location()] = ExpandedIndex++; + // Tokenize every file that contributed tokens to the expanded stream. buildSpelledTokens(); Index: clang/include/clang/Tooling/Syntax/Tokens.h =================================================================== --- clang/include/clang/Tooling/Syntax/Tokens.h +++ clang/include/clang/Tooling/Syntax/Tokens.h @@ -34,6 +34,7 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" @@ -366,6 +367,9 @@ /// same stream as 'clang -E' (excluding the preprocessor directives like /// #file, etc.). std::vector<syntax::Token> ExpandedTokens; + // Index of ExpandedTokens for faster lookups by SourceLocation. This is + // useful while finding expanded tokens in a 'token range'. + llvm::DenseMap<SourceLocation, unsigned> ExpandedTokIndex; llvm::DenseMap<FileID, MarkedFile> Files; // The value is never null, pointer instead of reference to avoid disabling // implicit assignment operator.
Index: clang/lib/Tooling/Syntax/Tokens.cpp =================================================================== --- clang/lib/Tooling/Syntax/Tokens.cpp +++ clang/lib/Tooling/Syntax/Tokens.cpp @@ -184,6 +184,18 @@ } llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const { + // Quick lookup if `R` is a token range. + // This is a huge win since majority of the users use ranges provided by an + // AST. Ranges in AST are token ranges from expanded token stream. + const auto Begin = ExpandedTokIndex.find(R.getBegin()); + const auto End = ExpandedTokIndex.find(R.getEnd()); + if (Begin != ExpandedTokIndex.end() && End != ExpandedTokIndex.end()) { + // Add 1 to End to make a half-open range. + return {ExpandedTokens.data() + Begin->getSecond(), + ExpandedTokens.data() + End->getSecond() + 1}; + } + // Slow case. Use `isBeforeInTranslationUnit` to binary search for the + // required range. return getTokensCovering(expandedTokens(), R, *SourceMgr); } @@ -638,6 +650,11 @@ assert(!Result.ExpandedTokens.empty()); assert(Result.ExpandedTokens.back().kind() == tok::eof); + // Index ExpandedTokens for faster lookups by SourceLocation. + unsigned ExpandedIndex = 0; + for (const Token &Tok : Result.ExpandedTokens) + Result.ExpandedTokIndex[Tok.location()] = ExpandedIndex++; + // Tokenize every file that contributed tokens to the expanded stream. buildSpelledTokens(); Index: clang/include/clang/Tooling/Syntax/Tokens.h =================================================================== --- clang/include/clang/Tooling/Syntax/Tokens.h +++ clang/include/clang/Tooling/Syntax/Tokens.h @@ -34,6 +34,7 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" @@ -366,6 +367,9 @@ /// same stream as 'clang -E' (excluding the preprocessor directives like /// #file, etc.). std::vector<syntax::Token> ExpandedTokens; + // Index of ExpandedTokens for faster lookups by SourceLocation. This is + // useful while finding expanded tokens in a 'token range'. + llvm::DenseMap<SourceLocation, unsigned> ExpandedTokIndex; llvm::DenseMap<FileID, MarkedFile> Files; // The value is never null, pointer instead of reference to avoid disabling // implicit assignment operator.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits