https://github.com/hdoc updated https://github.com/llvm/llvm-project/pull/91100
>From 2b1845352b0ea27f027c94e406cd32ae8a1e94d1 Mon Sep 17 00:00:00 2001 From: hdoc <git...@hdoc.io> Date: Sat, 4 May 2024 18:50:16 -0700 Subject: [PATCH] Support for parsing headers in Doxygen \par commands --- .../include/clang/AST/CommentCommandTraits.h | 4 + clang/include/clang/AST/CommentCommands.td | 3 +- clang/include/clang/AST/CommentParser.h | 4 +- clang/lib/AST/CommentParser.cpp | 77 ++++++++++ clang/test/Index/comment-misc-tags.m | 8 +- clang/unittests/AST/CommentParser.cpp | 139 +++++++++++++++++- .../ClangCommentCommandInfoEmitter.cpp | 1 + 7 files changed, 228 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/AST/CommentCommandTraits.h b/clang/include/clang/AST/CommentCommandTraits.h index 0c3254d84eb000..78c484fff3aede 100644 --- a/clang/include/clang/AST/CommentCommandTraits.h +++ b/clang/include/clang/AST/CommentCommandTraits.h @@ -88,6 +88,10 @@ struct CommandInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsHeaderfileCommand : 1; + /// True if this is a \\par command. + LLVM_PREFERRED_TYPE(bool) + unsigned IsParCommand : 1; + /// True if we don't want to warn about this command being passed an empty /// paragraph. Meaningful only for block commands. LLVM_PREFERRED_TYPE(bool) diff --git a/clang/include/clang/AST/CommentCommands.td b/clang/include/clang/AST/CommentCommands.td index e839031752cdd8..5fd687b0d8991f 100644 --- a/clang/include/clang/AST/CommentCommands.td +++ b/clang/include/clang/AST/CommentCommands.td @@ -18,6 +18,7 @@ class Command<string name> { bit IsThrowsCommand = 0; bit IsDeprecatedCommand = 0; bit IsHeaderfileCommand = 0; + bit IsParCommand = 0; bit IsEmptyParagraphAllowed = 0; @@ -156,7 +157,7 @@ def Date : BlockCommand<"date">; def Invariant : BlockCommand<"invariant">; def Li : BlockCommand<"li">; def Note : BlockCommand<"note">; -def Par : BlockCommand<"par">; +def Par : BlockCommand<"par"> { let IsParCommand = 1; let NumArgs = 1; } def Post : BlockCommand<"post">; def Pre : BlockCommand<"pre">; def Remark : BlockCommand<"remark">; diff --git a/clang/include/clang/AST/CommentParser.h b/clang/include/clang/AST/CommentParser.h index e11e818b1af0a1..b5f1c6c19f0ce7 100644 --- a/clang/include/clang/AST/CommentParser.h +++ b/clang/include/clang/AST/CommentParser.h @@ -100,6 +100,9 @@ class Parser { ArrayRef<Comment::Argument> parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs); + ArrayRef<Comment::Argument> + parseParCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs); + BlockCommandComment *parseBlockCommand(); InlineCommandComment *parseInlineCommand(); @@ -118,4 +121,3 @@ class Parser { } // end namespace clang #endif - diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp index 8adfd85d0160c3..54760f5ba932eb 100644 --- a/clang/lib/AST/CommentParser.cpp +++ b/clang/lib/AST/CommentParser.cpp @@ -149,6 +149,63 @@ class TextTokenRetokenizer { addToken(); } + /// Check if this line starts with @par or \par + bool startsWithParCommand() { + unsigned Offset = 1; + + /// Skip all whitespace characters at the beginning. + /// This needs to backtrack because Pos has already advanced past the + /// actual \par or @par command by the time this function is called. + while (isWhitespace(*(Pos.BufferPtr - Offset))) + Offset++; + + /// Check if next four characters are \par or @par + llvm::StringRef LineStart(Pos.BufferPtr - 5, 4); + return LineStart.starts_with("\\par") || LineStart.starts_with("@par"); + } + + /// Extract a par command argument-header. + bool lexParHeading(Token &Tok) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + + if (!startsWithParCommand()) + return false; + + // Read until the end of this token, which is effectively the end of the + // line This gets us the content of the par header, if there is one. + while (!isEnd()) { + WordText.push_back(peek()); + if (Pos.BufferPtr + 1 == Pos.BufferEnd) { + consumeChar(); + break; + } else { + consumeChar(); + } + } + + const unsigned Length = WordText.size(); + if (Length == 0) { + Pos = SavedPos; + return false; + } + + char *TextPtr = Allocator.Allocate<char>(Length + 1); + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, Length, Text); + return true; + } + /// Extract a word -- sequence of non-whitespace characters. bool lexWord(Token &Tok) { if (isEnd()) @@ -304,6 +361,23 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { return llvm::ArrayRef(Args, ParsedArgs); } +ArrayRef<Comment::Argument> +Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer, + unsigned NumArgs) { + auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) + Comment::Argument[NumArgs]; + unsigned ParsedArgs = 0; + Token Arg; + + while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) { + Args[ParsedArgs] = Comment::Argument{ + SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; + ParsedArgs++; + } + + return llvm::ArrayRef(Args, ParsedArgs); +} + BlockCommandComment *Parser::parseBlockCommand() { assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); @@ -356,6 +430,9 @@ BlockCommandComment *Parser::parseBlockCommand() { parseParamCommandArgs(PC, Retokenizer); else if (TPC) parseTParamCommandArgs(TPC, Retokenizer); + else if (Info->IsParCommand) + S.actOnBlockCommandArgs(BC, + parseParCommandArgs(Retokenizer, Info->NumArgs)); else S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs)); diff --git a/clang/test/Index/comment-misc-tags.m b/clang/test/Index/comment-misc-tags.m index 47ee9d9aa392ab..6d018dbfcf193d 100644 --- a/clang/test/Index/comment-misc-tags.m +++ b/clang/test/Index/comment-misc-tags.m @@ -91,18 +91,16 @@ @interface IOCommandGate struct Test {int filler;}; -// CHECK: (CXComment_BlockCommand CommandName=[par] +// CHECK: (CXComment_BlockCommand CommandName=[par] Arg[0]=User defined paragraph: // CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ User defined paragraph:] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ Contents of the paragraph.]))) // CHECK: (CXComment_BlockCommand CommandName=[par] // CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ New paragraph under the same heading.]))) +// CHECK-NEXT: (CXComment_Text Text=[New paragraph under the same heading.]))) // CHECK: (CXComment_BlockCommand CommandName=[note] // CHECK-NEXT: (CXComment_Paragraph // CHECK-NEXT: (CXComment_Text Text=[ This note consists of two paragraphs.] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ This is the first paragraph.]))) // CHECK: (CXComment_BlockCommand CommandName=[par] // CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ And this is the second paragraph.]))) - +// CHECK-NEXT: (CXComment_Text Text=[And this is the second paragraph.]))) diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp index c3479672ae2a3c..d9150a848fcaf3 100644 --- a/clang/unittests/AST/CommentParser.cpp +++ b/clang/unittests/AST/CommentParser.cpp @@ -1427,8 +1427,145 @@ TEST_F(CommentParserTest, Deprecated) { } } +TEST_F(CommentParserTest, ParCommandHasArg1) { + const char *Sources[] = { + "/// @par Paragraph header:", "/// @par Paragraph header:\n", + "/// @par Paragraph header:\r\n", "/// @par Paragraph header:\n\r", + "/** @par Paragraph header:*/", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 0)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:"); + } + } +} + +TEST_F(CommentParserTest, ParCommandHasArg2) { + const char *Sources[] = { + "/// @par Paragraph header: ", "/// @par Paragraph header: \n", + "/// @par Paragraph header: \r\n", "/// @par Paragraph header: \n\r", + "/** @par Paragraph header: */", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 0)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header: "); + } + } +} + +TEST_F(CommentParserTest, ParCommandHasArg3) { + const char *Sources[] = { + ("/// @par Paragraph header:\n" + "/// Paragraph body"), + ("/// @par Paragraph header:\r\n" + "/// Paragraph body"), + ("/// @par Paragraph header:\n\r" + "/// Paragraph body"), + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + TextComment *TC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:"); + ASSERT_TRUE(GetChildAt(PC, 0, TC)); + ASSERT_TRUE(TC->getText() == " Paragraph body"); + } + } +} + +TEST_F(CommentParserTest, ParCommandHasArg4) { + const char *Sources[] = { + ("/// @par Paragraph header:\n" + "/// Paragraph body1\n" + "/// Paragraph body2"), + ("/// @par Paragraph header:\r\n" + "/// Paragraph body1\n" + "/// Paragraph body2"), + ("/// @par Paragraph header:\n\r" + "/// Paragraph body1\n" + "/// Paragraph body2"), + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + TextComment *TC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:"); + ASSERT_TRUE(GetChildAt(PC, 0, TC)); + ASSERT_TRUE(TC->getText() == " Paragraph body1"); + ASSERT_TRUE(GetChildAt(PC, 1, TC)); + ASSERT_TRUE(TC->getText() == " Paragraph body2"); + } + } +} + +TEST_F(CommentParserTest, ParCommandHasArg5) { + const char *Sources[] = { + ("/// @par \n" + "/// Paragraphs with no text before newline have no heading"), + ("/// @par \r\n" + "/// Paragraphs with no text before newline have no heading"), + ("/// @par \n\r" + "/// Paragraphs with no text before newline have no heading"), + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + TextComment *TC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(BCC->getNumArgs() == 0); + ASSERT_TRUE(GetChildAt(PC, 0, TC)); + ASSERT_TRUE(TC->getText() == + "Paragraphs with no text before newline have no heading"); + } + } +} + } // unnamed namespace } // end namespace comments } // end namespace clang - diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp index a113b02e19995d..07b26dc2f6b8be 100644 --- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp @@ -44,6 +44,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records, << Tag.getValueAsBit("IsThrowsCommand") << ", " << Tag.getValueAsBit("IsDeprecatedCommand") << ", " << Tag.getValueAsBit("IsHeaderfileCommand") << ", " + << Tag.getValueAsBit("IsParCommand") << ", " << Tag.getValueAsBit("IsEmptyParagraphAllowed") << ", " << Tag.getValueAsBit("IsVerbatimBlockCommand") << ", " << Tag.getValueAsBit("IsVerbatimBlockEndCommand") << ", " _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits