idlecode updated this revision to Diff 102110. idlecode added a comment. Added tests for `isNewLineEscaped` - this fixed some corner cases
https://reviews.llvm.org/D30748 Files: lib/Lex/Lexer.cpp unittests/Lex/LexerTest.cpp
Index: unittests/Lex/LexerTest.cpp =================================================================== --- unittests/Lex/LexerTest.cpp +++ unittests/Lex/LexerTest.cpp @@ -25,6 +25,8 @@ using namespace clang; +bool isNewLineEscaped(const char *BufferStart, const char *Str); + namespace { // The test fixture. @@ -365,4 +367,53 @@ EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); } +TEST_F(LexerTest, IsNewLineEscapedValid) { + std::vector<std::pair<bool, std::string>> TestLines = { + {true, "\\\r"}, {true, "\\\n"}, {true, "\\\r\n"}, + {true, "\\\n\r"}, {true, "\\ \t\v\f\r"}, {true, "\\ \t\v\f\r\n"}, + {false, "\\\r\r"}, {false, "\\\r\r\n"}, {false, "\\\n\n"}, + {false, "\r"}, {false, "\n"}, {false, "\r\n"}, + {false, "\n\r"}, {false, "\r\r"}, {false, "\n\n"}}; + + int i = 1; + for (const std::pair<bool, std::string> &Pattern : TestLines) { + bool IsEscaped = Pattern.first; + const std::string &Line = Pattern.second; + EXPECT_EQ(IsEscaped, + isNewLineEscaped(Line.c_str(), Line.c_str() + Line.length() - 1)) + << "Pattern #" << i << " not recognized as escaped new line\n"; + ++i; + } +} + +TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { + // Each line should have the same length for + // further offset calculation to be more straightforward. + const unsigned IdentifierLength = 8; + std::string TextToLex = "rabarbar\n" + "foo\\\nbar\n" + "foo\\\rbar\n" + "fo\\\r\nbar\n" + "foo\\\n\rba\n"; + std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; + std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens); + + for (const Token &Tok : LexedTokens) { + std::pair<FileID, unsigned> OriginalLocation = + SourceMgr.getDecomposedLoc(Tok.getLocation()); + for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { + SourceLocation LookupLocation = + Tok.getLocation().getLocWithOffset(Offset); + + std::pair<FileID, unsigned> FoundLocation = + SourceMgr.getDecomposedExpansionLoc( + Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts)); + + // Check that location returned by the GetBeginningOfToken + // is the same as original token location reported by Lexer. + EXPECT_EQ(FoundLocation.second, OriginalLocation.second); + } + } +} + } // anonymous namespace Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -456,25 +456,45 @@ return false; } +/// \brief Check if new line pointed by Str is escaped. +bool isNewLineEscaped(const char *BufferStart, const char *Str) { + assert(isVerticalWhitespace(Str[0])); + if (Str - 1 < BufferStart) + return false; + + if ((Str[0] == '\n' && Str[-1] == '\r') || + (Str[0] == '\r' && Str[-1] == '\n')) { + if (Str - 2 < BufferStart) + return false; + --Str; + } + --Str; + + // Rewind to first non-space character: + while (isHorizontalWhitespace(*Str) && Str > BufferStart) + --Str; + + return *Str == '\\'; +} + /// Returns the pointer that points to the beginning of line that contains /// the given offset, or null if the offset if invalid. static const char *findBeginningOfLine(StringRef Buffer, unsigned Offset) { const char *BufStart = Buffer.data(); if (Offset >= Buffer.size()) return nullptr; - const char *StrData = BufStart + Offset; - if (StrData[0] == '\n' || StrData[0] == '\r') - return StrData; + const char *LexStart = BufStart + Offset; + for (; LexStart != BufStart; --LexStart) { + if (!isVerticalWhitespace(LexStart[0])) + continue; - const char *LexStart = StrData; - while (LexStart != BufStart) { - if (LexStart[0] == '\n' || LexStart[0] == '\r') { - ++LexStart; - break; - } + if (isNewLineEscaped(BufStart, LexStart)) + continue; - --LexStart; + // LexStart should point at first character of logical line. + ++LexStart; + break; } return LexStart; } @@ -486,7 +506,7 @@ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); if (LocInfo.first.isInvalid()) return Loc; - + bool Invalid = false; StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); if (Invalid) @@ -498,52 +518,52 @@ const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second); if (!LexStart || LexStart == StrData) return Loc; - + // Create a lexer starting at the beginning of this token. SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart, Buffer.end()); TheLexer.SetCommentRetentionState(true); - + // Lex tokens until we find the token that contains the source location. Token TheTok; do { TheLexer.LexFromRawLexer(TheTok); - + if (TheLexer.getBufferLocation() > StrData) { // Lexing this token has taken the lexer past the source location we're // looking for. If the current token encompasses our source location, // return the beginning of that token. if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData) return TheTok.getLocation(); - + // We ended up skipping over the source location entirely, which means // that it points into whitespace. We're done here. break; } } while (TheTok.getKind() != tok::eof); - + // We've passed our source location; just return the original source location. return Loc; } SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { - if (Loc.isFileID()) - return getBeginningOfFileToken(Loc, SM, LangOpts); - - if (!SM.isMacroArgExpansion(Loc)) - return Loc; - - SourceLocation FileLoc = SM.getSpellingLoc(Loc); - SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); - std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); - std::pair<FileID, unsigned> BeginFileLocInfo - = SM.getDecomposedLoc(BeginFileLoc); - assert(FileLocInfo.first == BeginFileLocInfo.first && - FileLocInfo.second >= BeginFileLocInfo.second); - return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); + if (Loc.isFileID()) + return getBeginningOfFileToken(Loc, SM, LangOpts); + + if (!SM.isMacroArgExpansion(Loc)) + return Loc; + + SourceLocation FileLoc = SM.getSpellingLoc(Loc); + SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); + std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); + std::pair<FileID, unsigned> BeginFileLocInfo + = SM.getDecomposedLoc(BeginFileLoc); + assert(FileLocInfo.first == BeginFileLocInfo.first && + FileLocInfo.second >= BeginFileLocInfo.second); + return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); } namespace {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits