Author: Aaron Puchert Date: 2022-01-14T22:46:07+01:00 New Revision: 9f0fa6544012ed8f7b6b3d72fce6535bf4430e40
URL: https://github.com/llvm/llvm-project/commit/9f0fa6544012ed8f7b6b3d72fce6535bf4430e40 DIFF: https://github.com/llvm/llvm-project/commit/9f0fa6544012ed8f7b6b3d72fce6535bf4430e40.diff LOG: Comment parsing: Don't recognize commands in single-line double quotation This is consistent with the behavior of Doxygen, and allows users to write strings with C escapes or document input/output formats containing special characters (@ or \) without escaping them, which might be confusing. For example, if a function wants to document its expected input format as "user@host" it doesn't have to write user\@host instead, which would look right in the documentation but confusing in the code. Now users can just use double quotes (which they might do anyway). This fixes a lot of false positives of -Wdocumentation-unknown-command, but it could also fix issues with -Wdocumentation if the text triggers an actual command. Reviewed By: gribozavr2 Differential Revision: https://reviews.llvm.org/D116190 Added: Modified: clang/include/clang/AST/CommentLexer.h clang/lib/AST/CommentLexer.cpp clang/test/Sema/warn-documentation-unknown-command.cpp clang/test/Sema/warn-documentation.cpp Removed: ################################################################################ diff --git a/clang/include/clang/AST/CommentLexer.h b/clang/include/clang/AST/CommentLexer.h index 94f778501e758..9aa1681cb2c5c 100644 --- a/clang/include/clang/AST/CommentLexer.h +++ b/clang/include/clang/AST/CommentLexer.h @@ -320,6 +320,9 @@ class Lexer { /// Eat string matching regexp \code \s*\* \endcode. void skipLineStartingDecorations(); + /// Skip over pure text. + const char *skipTextToken(); + /// Lex comment text, including commands if ParseCommands is set to true. void lexCommentText(Token &T); diff --git a/clang/lib/AST/CommentLexer.cpp b/clang/lib/AST/CommentLexer.cpp index 6e00c2aa7c280..61ce8979f13f5 100644 --- a/clang/lib/AST/CommentLexer.cpp +++ b/clang/lib/AST/CommentLexer.cpp @@ -270,6 +270,29 @@ void Lexer::formTokenWithChars(Token &Result, const char *TokEnd, BufferPtr = TokEnd; } +const char *Lexer::skipTextToken() { + const char *TokenPtr = BufferPtr; + assert(TokenPtr < CommentEnd); + StringRef TokStartSymbols = ParseCommands ? "\n\r\\@\"&<" : "\n\r"; + +again: + size_t End = + StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of(TokStartSymbols); + if (End == StringRef::npos) + return CommentEnd; + + // Doxygen doesn't recognize any commands in a one-line double quotation. + // If we don't find an ending quotation mark, we pretend it never began. + if (*(TokenPtr + End) == '\"') { + TokenPtr += End + 1; + End = StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of("\n\r\""); + if (End != StringRef::npos && *(TokenPtr + End) == '\"') + TokenPtr += End + 1; + goto again; + } + return TokenPtr + End; +} + void Lexer::lexCommentText(Token &T) { assert(CommentState == LCS_InsideBCPLComment || CommentState == LCS_InsideCComment); @@ -290,17 +313,8 @@ void Lexer::lexCommentText(Token &T) { skipLineStartingDecorations(); return; - default: { - StringRef TokStartSymbols = ParseCommands ? "\n\r\\@&<" : "\n\r"; - size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr) - .find_first_of(TokStartSymbols); - if (End != StringRef::npos) - TokenPtr += End; - else - TokenPtr = CommentEnd; - formTextToken(T, TokenPtr); - return; - } + default: + return formTextToken(T, skipTextToken()); } }; diff --git a/clang/test/Sema/warn-documentation-unknown-command.cpp b/clang/test/Sema/warn-documentation-unknown-command.cpp index 4328c9682f212..2cb261d627c56 100644 --- a/clang/test/Sema/warn-documentation-unknown-command.cpp +++ b/clang/test/Sema/warn-documentation-unknown-command.cpp @@ -9,6 +9,15 @@ int test_unknown_comand_1; /// \retur aaa int test_unknown_comand_2(); +/// We don't recognize commands in double quotes: "\n\t @unknown2". +int test_unknown_comand_3(); + +// expected-warning@+2 {{unknown command tag name}} +// expected-warning@+2 {{unknown command tag name}} +/// But it has to be a single line: "\unknown3 +/// @unknown4" (Doxygen treats multi-line quotes inconsistently.) +int test_unknown_comand_4(); + // RUN: c-index-test -test-load-source all -Wdocumentation-unknown-command %s > /dev/null 2> %t.err // RUN: FileCheck < %t.err -check-prefix=CHECK-RANGE %s // CHECK-RANGE: warn-documentation-unknown-command.cpp:5:9:{5:9-5:17}: warning: unknown command tag name diff --git a/clang/test/Sema/warn-documentation.cpp b/clang/test/Sema/warn-documentation.cpp index 7243e791bba60..353c94a47eb6f 100644 --- a/clang/test/Sema/warn-documentation.cpp +++ b/clang/test/Sema/warn-documentation.cpp @@ -125,6 +125,16 @@ int test_block_command5(int); /// \brief \c Aaa int test_block_command6(int); +// We don't recognize comments in double quotes. +/// "\brief \returns Aaa" +int test_block_command7(int); + +// But only if they're single-line. (Doxygen treats multi-line quotes inconsistently.) +// expected-warning@+1 {{empty paragraph passed to '\brief' command}} +/// "\brief +/// \returns Aaa" +int test_block_command8(int); + // expected-warning@+5 {{duplicated command '\brief'}} expected-note@+1 {{previous command '\brief' here}} /// \brief Aaa /// _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits