=?utf-8?q?Marc-André?= Lureau <[email protected]>, =?utf-8?q?Marc-André?= Lureau <[email protected]> Message-ID: In-Reply-To: <llvm.org/llvm/llvm-project/pull/[email protected]>
llvmorg-github-actions[bot] wrote: <!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clangd Author: Marc-Andre Lureau (elmarco) <details> <summary>Changes</summary> Add support for Linux kernel-doc comment format in clangd hover. This includes parsing kernel-doc structured comments (brief, @<!-- -->param, Return/Returns, named sections like Context/Note/Warning/Locking), RST-style indented and fenced code blocks, and inline markup conversion for %CONSTANT, @<!-- -->param, &struct references, ``literals``, $ENVVAR, and bare function() references. Related: https://github.com/clangd/clangd/issues/2662 Co-Authored-By: Claude Opus 4.6 <noreply@<!-- -->anthropic.com> --- Patch is 60.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/198529.diff 9 Files Affected: - (modified) clang-tools-extra/clangd/Config.h (+2) - (modified) clang-tools-extra/clangd/ConfigCompile.cpp (+1) - (modified) clang-tools-extra/clangd/ConfigFragment.h (+1) - (modified) clang-tools-extra/clangd/Hover.cpp (+60-18) - (modified) clang-tools-extra/clangd/Hover.h (+6) - (modified) clang-tools-extra/clangd/SymbolDocumentation.cpp (+522) - (modified) clang-tools-extra/clangd/SymbolDocumentation.h (+29) - (modified) clang-tools-extra/clangd/unittests/HoverTests.cpp (+129) - (modified) clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp (+1025) ``````````diff diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h index 56d7ac453deeb..28d09d394e743 100644 --- a/clang-tools-extra/clangd/Config.h +++ b/clang-tools-extra/clangd/Config.h @@ -216,6 +216,8 @@ struct Config { Markdown, /// Treat comments as doxygen. Doxygen, + /// Treat comments as kernel-doc. + KernelDoc, }; struct { diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index 2b41949d6d05c..4c0f3d99743e2 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -820,6 +820,7 @@ struct FragmentCompiler { .map("Plaintext", Config::CommentFormatPolicy::PlainText) .map("Markdown", Config::CommentFormatPolicy::Markdown) .map("Doxygen", Config::CommentFormatPolicy::Doxygen) + .map("KernelDoc", Config::CommentFormatPolicy::KernelDoc) .value()) Out.Apply.push_back([Val](const Params &, Config &C) { C.Documentation.CommentFormat = *Val; diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h index 7604fe4e24c97..90fb60f53d734 100644 --- a/clang-tools-extra/clangd/ConfigFragment.h +++ b/clang-tools-extra/clangd/ConfigFragment.h @@ -409,6 +409,7 @@ struct Fragment { /// - Plaintext: Treat comments as plain text. /// - Markdown: Treat comments as Markdown. /// - Doxygen: Treat comments as doxygen. + /// - KernelDoc: Treat comments as kernel-doc. std::optional<Located<std::string>> CommentFormat; }; DocumentationBlock Documentation; diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index fab77af3ebcea..14f90330b4e7a 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -1528,6 +1528,32 @@ void HoverInfo::sizeToMarkupParagraph(markup::Paragraph &P) const { P.appendText(", alignment " + formatSize(*Align)); } +void HoverInfo::appendCommonMetadata(markup::Document &Output) const { + Output.addRuler(); + + // Don't print Type after Parameters or ReturnType as this will just duplicate + // the information + if (Type && !ReturnType && !Parameters) + Output.addParagraph().appendText("Type: ").appendCode( + llvm::to_string(*Type)); + + if (Value) + valueToMarkupParagraph(Output.addParagraph()); + + if (Offset) + offsetToMarkupParagraph(Output.addParagraph()); + if (Size) + sizeToMarkupParagraph(Output.addParagraph()); + + if (CalleeArgInfo) + calleeArgInfoToMarkupParagraph(Output.addParagraph()); + + if (!UsedSymbolNames.empty()) { + Output.addRuler(); + usedSymbolNamesToMarkup(Output); + } +} + markup::Document HoverInfo::presentDoxygen() const { markup::Document Output; @@ -1650,33 +1676,46 @@ markup::Document HoverInfo::presentDoxygen() const { SymbolDoc.detailedDocToMarkup(Output); } - Output.addRuler(); + appendCommonMetadata(Output); - // Don't print Type after Parameters or ReturnType as this will just duplicate - // the information - if (Type && !ReturnType && !Parameters) - Output.addParagraph().appendText("Type: ").appendCode( - llvm::to_string(*Type)); + return Output; +} - if (Value) { - valueToMarkupParagraph(Output.addParagraph()); - } +markup::Document HoverInfo::presentKernelDoc() const { + markup::Document Output; - if (Offset) - offsetToMarkupParagraph(Output.addParagraph()); - if (Size) { - sizeToMarkupParagraph(Output.addParagraph()); + markup::Paragraph &Header = Output.addHeading(3); + if (!Definition.empty()) { + Output.addRuler(); + definitionScopeToMarkup(Output); + } else { + Header.appendCode(Name); } - if (CalleeArgInfo) { - calleeArgInfoToMarkupParagraph(Output.addParagraph()); + Output.addRuler(); + KernelDocInfo DocInfo = parseKernelDoc(Documentation); + renderKernelDocToMarkup(DocInfo, Output); + + if (Parameters && !Parameters->empty() && DocInfo.Params.empty()) { + Output.addHeading(3).appendText("Parameters"); + markup::BulletList &L = Output.addBulletList(); + for (const auto &Param : *Parameters) + L.addItem().addParagraph().appendCode(llvm::to_string(Param)); } - if (!UsedSymbolNames.empty()) { - Output.addRuler(); - usedSymbolNamesToMarkup(Output); + if (ReturnType && + ReturnType->AKA.value_or(ReturnType->Type) != "void") { + if (DocInfo.Returns.empty() && DocInfo.ReturnItems.empty()) { + Output.addHeading(3).appendText("Returns"); + Output.addParagraph().appendCode(llvm::to_string(*ReturnType)); + } } + appendCommonMetadata(Output); + + if (!Provider.empty()) + providerToMarkupParagraph(Output); + return Output; } @@ -1770,6 +1809,9 @@ std::string HoverInfo::present(MarkupKind Kind) const { return presentDefault().asMarkdown(); if (Cfg.Documentation.CommentFormat == Config::CommentFormatPolicy::Doxygen) return presentDoxygen().asMarkdown(); + if (Cfg.Documentation.CommentFormat == + Config::CommentFormatPolicy::KernelDoc) + return presentKernelDoc().asMarkdown(); if (Cfg.Documentation.CommentFormat == Config::CommentFormatPolicy::PlainText) // If the user prefers plain text, we use the present() method to generate diff --git a/clang-tools-extra/clangd/Hover.h b/clang-tools-extra/clangd/Hover.h index 614180a7b9846..8422c449e89f0 100644 --- a/clang-tools-extra/clangd/Hover.h +++ b/clang-tools-extra/clangd/Hover.h @@ -132,9 +132,15 @@ struct HoverInfo { void offsetToMarkupParagraph(markup::Paragraph &P) const; void sizeToMarkupParagraph(markup::Paragraph &P) const; + /// Append common metadata (type, value, offset, size, etc.) to the output. + void appendCommonMetadata(markup::Document &Output) const; + /// Parse and render the hover information as Doxygen documentation. markup::Document presentDoxygen() const; + /// Parse and render the hover information as kernel-doc documentation. + markup::Document presentKernelDoc() const; + /// Render the hover information as a default documentation. markup::Document presentDefault() const; }; diff --git a/clang-tools-extra/clangd/SymbolDocumentation.cpp b/clang-tools-extra/clangd/SymbolDocumentation.cpp index a50d7a565b1bc..e0c5f5e9edacb 100644 --- a/clang-tools-extra/clangd/SymbolDocumentation.cpp +++ b/clang-tools-extra/clangd/SymbolDocumentation.cpp @@ -557,5 +557,527 @@ void SymbolDocCommentVisitor::retvalsToMarkup(markup::Document &Out) const { } } +namespace { + +void convertKernelDocInlineMarkup(llvm::StringRef Text, + markup::Paragraph &Para) { + unsigned I = 0; + unsigned Start = 0; + while (I < Text.size()) { + char C = Text[I]; + + // Double-backtick literal: ``text`` + if (C == '`' && I + 1 < Text.size() && Text[I + 1] == '`') { + auto Close = Text.find("``", I + 2); + if (Close != StringRef::npos) { + if (I > Start) + Para.appendText(Text.slice(Start, I)); + Para.appendCode(Text.slice(I + 2, Close)); + I = Close + 2; + Start = I; + continue; + } + } + + // &struct name, &enum name, &typedef name, &struct->member + if (C == '&') { + unsigned J = I + 1; + // Optional keyword: struct, enum, typedef, union + unsigned KeywordEnd = J; + while (KeywordEnd < Text.size() && + (llvm::isAlpha(Text[KeywordEnd]) || Text[KeywordEnd] == '_')) + ++KeywordEnd; + StringRef MaybeKeyword = Text.slice(J, KeywordEnd); + bool HasKeyword = (MaybeKeyword == "struct" || MaybeKeyword == "enum" || + MaybeKeyword == "typedef" || MaybeKeyword == "union"); + unsigned NameStart = HasKeyword ? KeywordEnd : J; + if (HasKeyword && NameStart < Text.size() && Text[NameStart] == ' ') + ++NameStart; + unsigned NameEnd = NameStart; + while (NameEnd < Text.size() && + (llvm::isAlnum(Text[NameEnd]) || Text[NameEnd] == '_')) + ++NameEnd; + // Allow ->member or .member suffix + if (NameEnd < Text.size() && + (Text[NameEnd] == '.' || + (NameEnd + 1 < Text.size() && Text[NameEnd] == '-' && + Text[NameEnd + 1] == '>'))) { + unsigned MemberStart = + Text[NameEnd] == '.' ? NameEnd + 1 : NameEnd + 2; + unsigned MemberEnd = MemberStart; + while (MemberEnd < Text.size() && + (llvm::isAlnum(Text[MemberEnd]) || Text[MemberEnd] == '_')) + ++MemberEnd; + if (MemberEnd > MemberStart) + NameEnd = MemberEnd; + } + if (NameEnd > NameStart) { + if (I > Start) + Para.appendText(Text.slice(Start, I)); + Para.appendCode(Text.slice(J, NameEnd)); + I = NameEnd; + Start = I; + continue; + } + } + + // %CONSTANT or %-ERRNO + if (C == '%') { + unsigned J = I + 1; + if (J < Text.size() && Text[J] == '-') + ++J; + while (J < Text.size() && (llvm::isAlnum(Text[J]) || Text[J] == '_')) + ++J; + if (J > I + 1) { + if (I > Start) + Para.appendText(Text.slice(Start, I)); + Para.appendCode(Text.slice(I + 1, J)); + I = J; + Start = J; + continue; + } + } + + // @parameter + if (C == '@') { + unsigned J = I + 1; + while (J < Text.size() && (llvm::isAlnum(Text[J]) || Text[J] == '_')) + ++J; + if (J > I + 1) { + if (I > Start) + Para.appendText(Text.slice(Start, I)); + Para.appendCode(Text.slice(I + 1, J)); + I = J; + Start = J; + continue; + } + } + + // $ENVVAR + if (C == '$') { + unsigned J = I + 1; + while (J < Text.size() && (llvm::isAlnum(Text[J]) || Text[J] == '_')) + ++J; + if (J > I + 1) { + if (I > Start) + Para.appendText(Text.slice(Start, I)); + Para.appendCode(Text.slice(I, J)); + I = J; + Start = J; + continue; + } + } + + // Bare function references: identifier() + if ((llvm::isAlpha(C) || C == '_') && + (I == 0 || (!llvm::isAlnum(Text[I - 1]) && Text[I - 1] != '_'))) { + unsigned J = I + 1; + while (J < Text.size() && (llvm::isAlnum(Text[J]) || Text[J] == '_')) + ++J; + if (J + 1 < Text.size() && Text[J] == '(' && Text[J + 1] == ')') { + if (I > Start) + Para.appendText(Text.slice(Start, I)); + Para.appendCode(Text.slice(I, J + 2)); + I = J + 2; + Start = I; + continue; + } + } + + ++I; + } + if (Start < Text.size()) + Para.appendText(Text.slice(Start, Text.size())); +} + +} // namespace + +KernelDocInfo parseKernelDoc(llvm::StringRef Doc) { + KernelDocInfo Info; + + enum State { + Brief, + Params, + Returns, + Section, + Body, + FencedCodeBlock, + IndentedCodeBlock + } St = Brief; + std::string CurrentCodeBlock; + std::string CurrentCodeLang; + std::string CodeFence; + std::string CurrentParagraph; + + auto FlushParagraph = [&] { + StringRef Trimmed = StringRef(CurrentParagraph).trim(); + if (!Trimmed.empty()) { + // RST :: literal block marker: strip trailing :: + // "word::" → "word:", "word ::" → "word", "::" → nothing + if (Trimmed.ends_with("::")) { + StringRef WithoutDC = Trimmed.drop_back(2); + if (WithoutDC.ends_with(' ')) + WithoutDC = WithoutDC.rtrim(); + else if (!WithoutDC.empty()) + WithoutDC = Trimmed.drop_back(1); + if (!WithoutDC.empty()) + Info.Description.push_back( + {KernelDocDescriptionBlock::Paragraph, WithoutDC.str(), ""}); + } else { + Info.Description.push_back( + {KernelDocDescriptionBlock::Paragraph, Trimmed.str(), ""}); + } + } + CurrentParagraph.clear(); + }; + + // Detect named section headers: a capitalized word followed by ':' + // at the start of a line. Matches kernel-doc convention for Context:, + // Note:, Warning:, Locking:, etc. + auto IsSectionHeader = [](StringRef T) -> bool { + if (T.empty() || !llvm::isUpper(T[0])) + return false; + auto ColonPos = T.find(':'); + if (ColonPos == StringRef::npos || ColonPos < 2) + return false; + // Reject RST literal block markers like "Example::" + if (ColonPos + 1 < T.size() && T[ColonPos + 1] == ':') + return false; + StringRef Name = T.slice(0, ColonPos); + return llvm::all_of(Name, + [](char C) { return llvm::isAlnum(C) || C == '_'; }); + }; + + auto FlushIndentedCodeBlock = [&] { + StringRef Code = StringRef(CurrentCodeBlock).rtrim('\n'); + if (!Code.empty()) { + // Strip common leading indentation from all non-empty lines. + size_t MinIndent = StringRef::npos; + StringRef L, R = Code; + while (!R.empty()) { + std::tie(L, R) = R.split('\n'); + if (!L.empty()) + MinIndent = std::min(MinIndent, L.size() - L.ltrim().size()); + } + std::string Stripped; + R = Code; + bool First = true; + while (!R.empty()) { + std::tie(L, R) = R.split('\n'); + if (!First) + Stripped += '\n'; + First = false; + if (L.size() >= MinIndent) + Stripped += L.drop_front(MinIndent).str(); + } + Info.Description.push_back( + {KernelDocDescriptionBlock::Code, std::move(Stripped), ""}); + } + CurrentCodeBlock.clear(); + }; + + StringRef Line, Rest; + for (std::tie(Line, Rest) = Doc.split('\n'); + !(Line.empty() && Rest.empty()); + std::tie(Line, Rest) = Rest.split('\n')) { + + StringRef Trimmed = Line.ltrim(); + + if (St == FencedCodeBlock) { + if (Trimmed.starts_with(CodeFence)) { + StringRef Code = StringRef(CurrentCodeBlock).rtrim('\n'); + if (!Code.empty()) + Info.Description.push_back( + {KernelDocDescriptionBlock::Code, Code.str(), CurrentCodeLang}); + St = Body; + continue; + } + CurrentCodeBlock += Line.str() + "\n"; + continue; + } + + // RST-style indented code block: indented text after a blank line + if (St == IndentedCodeBlock) { + if (!Trimmed.empty() && (Line[0] == ' ' || Line[0] == '\t')) { + CurrentCodeBlock += Line.str() + "\n"; + continue; + } + if (Trimmed.empty()) { + CurrentCodeBlock += "\n"; + continue; + } + // Non-indented, non-blank line ends the code block. + FlushIndentedCodeBlock(); + St = Body; + // Fall through to process this line normally. + } + + // Markdown fenced code block: ```lang or ~~~ + if (Trimmed.starts_with("```") || Trimmed.starts_with("~~~")) { + if (St == Body) + FlushParagraph(); + CodeFence = + Trimmed.take_while([](char C) { return C == '`' || C == '~'; }).str(); + CurrentCodeLang = Trimmed.drop_front(CodeFence.size()).ltrim().str(); + CurrentCodeBlock.clear(); + St = FencedCodeBlock; + continue; + } + + // Brief line: "function_name() - Brief description" or just first + // non-empty line. May span multiple lines until a @param, blank line, + // or a named section/tag is seen. + if (St == Brief) { + if (Trimmed.empty()) { + if (!Info.Brief.empty()) + St = Params; + continue; + } + // End brief on structured tags — fall through to their handlers. + if (!Info.Brief.empty() && + (Trimmed.starts_with("@") || IsSectionHeader(Trimmed))) { + St = Params; + } else { + if (Info.Brief.empty()) { + auto DashPos = Trimmed.find(" - "); + if (DashPos != StringRef::npos) { + Info.Brief = Trimmed.drop_front(DashPos + 3).str(); + } else if (Trimmed.starts_with("@")) { + // Inline member doc: /** @member: description */ + auto ColonPos = Trimmed.find(':'); + if (ColonPos != StringRef::npos) + Info.Brief = Trimmed.drop_front(ColonPos + 1).ltrim().str(); + else + Info.Brief = Trimmed.str(); + } else { + // Try "identifier():" or "identifier:" colon-style brief. + bool FoundColonBrief = false; + unsigned J = 0; + while (J < Trimmed.size() && + (llvm::isAlnum(Trimmed[J]) || Trimmed[J] == '_')) + ++J; + if (J > 0 && J < Trimmed.size()) { + unsigned K = J; + if (K + 1 < Trimmed.size() && Trimmed[K] == '(' && + Trimmed[K + 1] == ')') + K += 2; + if (K < Trimmed.size() && Trimmed[K] == ' ') + ++K; + if (K < Trimmed.size() && Trimmed[K] == ':' && + (K + 1 >= Trimmed.size() || Trimmed[K + 1] != ':')) { + Info.Brief = Trimmed.drop_front(K + 1).ltrim().str(); + FoundColonBrief = true; + } + } + if (!FoundColonBrief) + Info.Brief = Trimmed.str(); + } + } else { + Info.Brief += " " + Trimmed.str(); + } + continue; + } + } + + // @return: / @returns: — treated as Return section per reference parser + if (Trimmed.starts_with_insensitive("@return:") || + Trimmed.starts_with_insensitive("@returns:")) { + if (St == Body) + FlushParagraph(); + St = Returns; + StringRef Tag = Trimmed.starts_with_insensitive("@returns:") + ? Trimmed.take_front(9) + : Trimmed.take_front(8); + Info.Returns = Trimmed.drop_front(Tag.size()).ltrim().str(); + continue; + } + + // @...: for variadic arguments + if (Trimmed.starts_with("@...:")) { + if (St == Body) + FlushParagraph(); + St = Params; + StringRef Desc = Trimmed.drop_front(5).ltrim(); + Info.Params.push_back({"...", Desc.str()}); + continue; + } + + // Parameter line: @name: description + if (Trimmed.starts_with("@")) { + auto ColonPos = Trimmed.find(':'); + if (ColonPos != StringRef::npos && ColonPos > 1) { + StringRef ParamName = Trimmed.slice(1, ColonPos); + bool IsParam = true; + for (unsigned K = 0; K < ParamName.size(); ++K) { + char C = ParamName[K]; + if (llvm::isAlnum(C) || C == '_' || C == '.') + continue; + if (C == '-' && K + 1 < ParamName.size() && ParamName[K + 1] == '>') { + ++K; // skip '>' + continue; + } + IsParam = false; + break; + } + if (IsParam) { + if (St == Body) + FlushParagraph(); + St = Params; + StringRef Desc = Trimmed.drop_front(ColonPos + 1).ltrim(); + Info.Params.push_back({ParamName.str(), Desc.str()}); + continue; + } + } + } + + // Return: or Returns: description (but not Return:: literal block marker) + if ((Trimmed.starts_with_insensitive("Return:") && + !Trimmed.starts_with_insensitive("Return::")) || + (Trimmed.starts_with_insensitive("Returns:") && + !Trimmed.starts_with_insensitive("Returns::"))) { + if (St == Body) + FlushParagraph(); + St = Returns; + StringRef Tag = Trimmed.starts_with_insensitive("Returns:") + ? Trimmed.take_front(8) + : Trimmed.take_front(7); + Info.Returns = Trimmed.drop_front(Tag.size()).ltrim().str(); + continue; + } + + // Description: is an optional explicit section header — strip the tag + // and treat the remainder as the start of body text. + if (Trimmed.starts_with_insensitive("Description:") && + !Trimmed.starts_with_insensitive("Description::")) { + if (St == Body) + FlushParagraph(); + St = Body; + StringRef Desc = Trimmed.drop_front(12).ltrim(); + if (!Desc.empty()) { + CurrentParagraph = Desc.str(); + } + continue; + } + + // Generic named section: "Word:" at start of line. + // Handles Context:, Note:, Warning:, Locking:, etc. + // When in a continuation state, only match non-indented lines as + // section headers — indented lines are continuation text. + bool I... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/198529 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
