hokein created this revision. hokein added a reviewer: sammccall. Herald added a project: All. hokein requested review of this revision. Herald added a project: clang.
The magic number 50 was removed in D134942 <https://reviews.llvm.org/D134942>, as a behavior change for performance reason. While it reduces the number of SLocEntry, it increases the usage of SourceLocation address space usage, which is critical for compiling large TU. This fixes a regression caused in D134942 <https://reviews.llvm.org/D134942> -- clang failed to compile one of our internal files, complaining the file is too large to process because clang runs out of source location space (we spend 40% more address space!) Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D136539 Files: clang/lib/Lex/TokenLexer.cpp Index: clang/lib/Lex/TokenLexer.cpp =================================================================== --- clang/lib/Lex/TokenLexer.cpp +++ clang/lib/Lex/TokenLexer.cpp @@ -993,14 +993,25 @@ llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens); llvm::MutableArrayRef<Token> Partition; + // The maximum distance between two consecutive tokens in a partition. + // This is am important trick to avoid using too much SourceLocation address + // space! + const static int MaxDistance = 50; // Partition the tokens by their FileID. // This is a hot function, and calling getFileID can be expensive, the // implementation is optimized by reducing the number of getFileID. if (BeginLoc.isFileID()) { // Consecutive tokens not written in macros must be from the same file. // (Neither #include nor eof can occur inside a macro argument.) + SourceLocation LastLoc = BeginLoc; Partition = All.take_while([&](const Token &T) { - return T.getLocation().isFileID(); + if (T.getLocation().isFileID()) { + unsigned distance = + T.getLocation().getRawEncoding() - LastLoc.getRawEncoding(); + LastLoc = T.getLocation(); + return distance <= MaxDistance; + } + return false; }); } else { // Call getFileID once to calculate the bounds, and use the cheaper @@ -1008,8 +1019,15 @@ FileID BeginFID = SM.getFileID(BeginLoc); SourceLocation Limit = SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID)); + SourceLocation LastLoc = BeginLoc; Partition = All.take_while([&](const Token &T) { - return T.getLocation() >= BeginLoc && T.getLocation() < Limit; + if (T.getLocation() >= BeginLoc && T.getLocation() < Limit) { + unsigned distance = + T.getLocation().getRawEncoding() - LastLoc.getRawEncoding(); + LastLoc = T.getLocation(); + return distance <= MaxDistance; + } + return false; }); } assert(!Partition.empty());
Index: clang/lib/Lex/TokenLexer.cpp =================================================================== --- clang/lib/Lex/TokenLexer.cpp +++ clang/lib/Lex/TokenLexer.cpp @@ -993,14 +993,25 @@ llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens); llvm::MutableArrayRef<Token> Partition; + // The maximum distance between two consecutive tokens in a partition. + // This is am important trick to avoid using too much SourceLocation address + // space! + const static int MaxDistance = 50; // Partition the tokens by their FileID. // This is a hot function, and calling getFileID can be expensive, the // implementation is optimized by reducing the number of getFileID. if (BeginLoc.isFileID()) { // Consecutive tokens not written in macros must be from the same file. // (Neither #include nor eof can occur inside a macro argument.) + SourceLocation LastLoc = BeginLoc; Partition = All.take_while([&](const Token &T) { - return T.getLocation().isFileID(); + if (T.getLocation().isFileID()) { + unsigned distance = + T.getLocation().getRawEncoding() - LastLoc.getRawEncoding(); + LastLoc = T.getLocation(); + return distance <= MaxDistance; + } + return false; }); } else { // Call getFileID once to calculate the bounds, and use the cheaper @@ -1008,8 +1019,15 @@ FileID BeginFID = SM.getFileID(BeginLoc); SourceLocation Limit = SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID)); + SourceLocation LastLoc = BeginLoc; Partition = All.take_while([&](const Token &T) { - return T.getLocation() >= BeginLoc && T.getLocation() < Limit; + if (T.getLocation() >= BeginLoc && T.getLocation() < Limit) { + unsigned distance = + T.getLocation().getRawEncoding() - LastLoc.getRawEncoding(); + LastLoc = T.getLocation(); + return distance <= MaxDistance; + } + return false; }); } assert(!Partition.empty());
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits