aaronpuchert created this revision.
aaronpuchert added a reviewer: gribozavr2.
aaronpuchert requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This is consistent with the behavior of Doxygen, and allows users to
write strings with C escapes or document input/output formats containing
special characters (@ or \) without escaping them, which might be
confusing. For example, if a function wants to document its expected
input format as "user@host" it doesn't have to write user\@host instead,
which would look right in the documentation but confusing in the code.
Now users can just use double quotes (which they might do anyway).

This fixes a lot of false positives of -Wdocumentation-unknown-command,
but it could also fix issues with -Wdocumentation if the text triggers
an actual command.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D116190

Files:
  clang/include/clang/AST/CommentLexer.h
  clang/lib/AST/CommentLexer.cpp
  clang/test/Sema/warn-documentation-unknown-command.cpp
  clang/test/Sema/warn-documentation.cpp

Index: clang/test/Sema/warn-documentation.cpp
===================================================================
--- clang/test/Sema/warn-documentation.cpp
+++ clang/test/Sema/warn-documentation.cpp
@@ -125,6 +125,16 @@
 /// \brief \c Aaa
 int test_block_command6(int);
 
+// We don't recognize comments in double quotes.
+/// "\brief \returns Aaa"
+int test_block_command7(int);
+
+// But only if they're single-line. (Doxygen treats multi-line quotes inconsistently.)
+// expected-warning@+1 {{empty paragraph passed to '\brief' command}}
+/// "\brief
+/// \returns Aaa"
+int test_block_command8(int);
+
 // expected-warning@+5 {{duplicated command '\brief'}} expected-note@+1 {{previous command '\brief' here}}
 /// \brief Aaa
 ///
Index: clang/test/Sema/warn-documentation-unknown-command.cpp
===================================================================
--- clang/test/Sema/warn-documentation-unknown-command.cpp
+++ clang/test/Sema/warn-documentation-unknown-command.cpp
@@ -9,6 +9,15 @@
 /// \retur aaa
 int test_unknown_comand_2();
 
+/// We don't recognize commands in double quotes: "\n\t @unknown2".
+int test_unknown_comand_3();
+
+// expected-warning@+2 {{unknown command tag name}}
+// expected-warning@+2 {{unknown command tag name}}
+/// But it has to be a single line: "\unknown3
+/// @unknown4" (Doxygen treats multi-line quotes inconsistently.)
+int test_unknown_comand_4();
+
 // RUN: c-index-test -test-load-source all -Wdocumentation-unknown-command %s > /dev/null 2> %t.err
 // RUN: FileCheck < %t.err -check-prefix=CHECK-RANGE %s
 // CHECK-RANGE: warn-documentation-unknown-command.cpp:5:9:{5:9-5:17}: warning: unknown command tag name
Index: clang/lib/AST/CommentLexer.cpp
===================================================================
--- clang/lib/AST/CommentLexer.cpp
+++ clang/lib/AST/CommentLexer.cpp
@@ -270,6 +270,29 @@
   BufferPtr = TokEnd;
 }
 
+const char *Lexer::skipTextToken() {
+  const char *TokenPtr = BufferPtr;
+  assert(TokenPtr < CommentEnd);
+  StringRef TokStartSymbols = ParseCommands ? "\n\r\\@\"&<" : "\n\r";
+
+again:
+  size_t End =
+      StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of(TokStartSymbols);
+  if (End == StringRef::npos)
+    return CommentEnd;
+
+  // Doxygen doesn't recognize any commands in a one-line double quotation.
+  // If we don't find an ending quotation mark, we pretend it never began.
+  if (*(TokenPtr + End) == '\"') {
+    TokenPtr += End + 1;
+    End = StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of("\n\r\"");
+    if (End != StringRef::npos && *(TokenPtr + End) == '\"')
+      TokenPtr += End + 1;
+    goto again;
+  }
+  return TokenPtr + End;
+}
+
 void Lexer::lexCommentText(Token &T) {
   assert(CommentState == LCS_InsideBCPLComment ||
          CommentState == LCS_InsideCComment);
@@ -290,17 +313,8 @@
             skipLineStartingDecorations();
           return;
 
-      default: {
-          StringRef TokStartSymbols = ParseCommands ? "\n\r\\@&<" : "\n\r";
-          size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr)
-                           .find_first_of(TokStartSymbols);
-          if (End != StringRef::npos)
-            TokenPtr += End;
-          else
-            TokenPtr = CommentEnd;
-          formTextToken(T, TokenPtr);
-          return;
-      }
+      default:
+        return formTextToken(T, skipTextToken());
     }
   };
 
Index: clang/include/clang/AST/CommentLexer.h
===================================================================
--- clang/include/clang/AST/CommentLexer.h
+++ clang/include/clang/AST/CommentLexer.h
@@ -320,6 +320,9 @@
   /// Eat string matching regexp \code \s*\* \endcode.
   void skipLineStartingDecorations();
 
+  /// Skip over pure text.
+  const char *skipTextToken();
+
   /// Lex comment text, including commands if ParseCommands is set to true.
   void lexCommentText(Token &T);
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to