https://github.com/robozati updated https://github.com/llvm/llvm-project/pull/69849
>From 239302d66dea74333e27288acbceffdf3f2d3b6b Mon Sep 17 00:00:00 2001 From: Gabriel Pezati <gabrielpez...@gmail.com> Date: Sat, 21 Oct 2023 19:28:57 -0300 Subject: [PATCH] Fix UB on unclosed parentheses, brackets or braces in Tokens --- clang/lib/Tooling/Syntax/Tokens.cpp | 25 ++++++++++++++----- clang/unittests/Tooling/Syntax/TokensTest.cpp | 11 ++++++++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index 2f28b9cf286a638..97f794e09cb5590 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -286,9 +286,13 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { }); // Our token could only be produced by the previous mapping. if (It == File.Mappings.begin()) { - // No previous mapping, no need to modify offsets. - return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], - /*Mapping=*/nullptr}; + const auto offset = ExpandedIndex - File.BeginExpanded; + // Bounds check so parsing an unclosed parenthesis, brackets or braces do + // not result in UB. + if (offset >= File.SpelledTokens.size()) { + return {nullptr, nullptr}; + } + return {&File.SpelledTokens[offset], /*Mapping=*/nullptr}; } --It; // 'It' now points to last mapping that started before our token. @@ -298,9 +302,12 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { // Not part of the mapping, use the index from previous mapping to compute the // corresponding spelled token. - return { - &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)], - /*Mapping=*/nullptr}; + const auto offset = It->EndSpelled + (ExpandedIndex - It->EndExpanded); + // This index can also result in UB when parsing unclosed tokens. + if (offset >= File.SpelledTokens.size()) { + return {nullptr, nullptr}; + } + return {&File.SpelledTokens[offset], /*Mapping=*/nullptr}; } const TokenBuffer::Mapping * @@ -410,6 +417,12 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First); auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last); + // If there was an unclosed token, FirstSpelled or LastSpelled is null and the + // operation shouldn't continue. + if (FirstSpelled == nullptr || LastSpelled == nullptr) { + return std::nullopt; + }; + FileID FID = SourceMgr->getFileID(FirstSpelled->location()); // FIXME: Handle multi-file changes by trying to map onto a common root. if (FID != SourceMgr->getFileID(LastSpelled->location())) diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index 0c08318a637c0b6..e1b053ce45b841b 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -585,6 +585,17 @@ TEST_F(TokenCollectorTest, DelayedParsing) { EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); } +TEST_F(TokenCollectorTest, UnclosedToken) { + llvm::StringLiteral Code = R"cpp( + int main() { + // this should not result in a segfault or UB. + )cpp"; + std::string ExpectedTokens = + "expanded tokens:\n int main ( ) {\nfile './input.cpp'\n spelled " + "tokens:\n int main ( ) {\n no mappings.\n"; + EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); +} + TEST_F(TokenCollectorTest, MultiFile) { addFile("./foo.h", R"cpp( #define ADD(X, Y) X+Y _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits