ilya-biryukov updated this revision to Diff 143881.
ilya-biryukov added a comment.
Added forgotten bits of the change
Repository:
rC Clang
https://reviews.llvm.org/D46000
Files:
include/clang/AST/CommentLexer.h
include/clang/AST/RawCommentList.h
lib/AST/CommentLexer.cpp
lib/AST/RawCommentList.cpp
Index: lib/AST/RawCommentList.cpp
===================================================================
--- lib/AST/RawCommentList.cpp
+++ lib/AST/RawCommentList.cpp
@@ -335,3 +335,91 @@
BeforeThanCompare<RawComment>(SourceMgr));
std::swap(Comments, MergedComments);
}
+
+std::string RawComment::getFormattedText(const ASTContext &Ctx) const {
+ auto &SourceMgr = Ctx.getSourceManager();
+ llvm::StringRef CommentText = getRawText(SourceMgr);
+ if (CommentText.empty())
+ return ""; // we couldn't retreive the comment.
+
+ llvm::BumpPtrAllocator Allocator;
+ comments::Lexer L(Allocator, Ctx.getDiagnostics(),
+ Ctx.getCommentCommandTraits(), getSourceRange().getBegin(),
+ CommentText.begin(), CommentText.end(),
+ /*ParseCommentText=*/false);
+
+ // Trim whitespace at the start of \p S of length up to the value of \p
+ // MaxSkip.
+ auto SkipWs = [](llvm::StringRef S, unsigned MaxSkip) -> llvm::StringRef {
+ unsigned SkipLen = std::min(
+ MaxSkip, (unsigned)std::min(S.size(), S.find_first_not_of(" \t")));
+ return S.drop_front(SkipLen);
+ };
+
+ std::string Result;
+ unsigned IndentColumn = 0;
+
+ // Processes one line of the comment and adds it to the result.
+ // Handles skipping the indent at the start of the line.
+ // Returns false when eof is reached and true otherwise.
+ auto LexLine = [&](bool IsFirstLine) -> bool {
+ comments::Token Tok;
+ // Lex the first token on the line. We handle it separately, because we to
+ // fix up its indentation.
+ L.lex(Tok);
+ if (Tok.is(comments::tok::eof))
+ return false;
+ if (Tok.is(comments::tok::newline)) {
+ Result += "\n";
+ return true;
+ }
+ llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
+ bool LocInvalid = false;
+ unsigned TokColumn =
+ SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
+ if (LocInvalid)
+ TokColumn = 0;
+ // Compute the length of whitespace we're allowed to skip.
+ unsigned MaxSkip;
+ if (IsFirstLine) {
+ // For the first line we skip all leading ws.
+ MaxSkip = std::numeric_limits<unsigned>::max();
+ } else {
+ // For the rest, we skip up to the column of first non-ws symbol on the
+ // first line..
+ MaxSkip = std::max((int)IndentColumn - (int)TokColumn, 0);
+ }
+ llvm::StringRef Trimmed = SkipWs(TokText, MaxSkip);
+ Result += Trimmed;
+ // Remember the amount of whitespace we skipped in the first line to remove
+ // indent up to that column in the following lines.
+ if (IsFirstLine)
+ IndentColumn = TokColumn + TokText.size() - Trimmed.size();
+ // Lex all tokens in the rest of the line.
+ for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
+ if (Tok.is(comments::tok::newline)) {
+ Result += "\n";
+ return true;
+ }
+ Result += L.getSpelling(Tok, SourceMgr);
+ }
+ // We've reached the end of the line.
+ return false;
+ };
+
+ auto DropTrailingNewLines = [](std::string &Str) {
+ while (Str.back() == '\n')
+ Str.pop_back();
+ };
+
+ // Proces first line separately to remember indent for the following lines.
+ if (!LexLine(/*IsFirstLine=*/true)) {
+ DropTrailingNewLines(Result);
+ return Result;
+ }
+ // Process the rest of the lines.
+ while (LexLine(/*IsFirstLine=*/false))
+ ;
+ DropTrailingNewLines(Result);
+ return Result;
+}
Index: lib/AST/CommentLexer.cpp
===================================================================
--- lib/AST/CommentLexer.cpp
+++ lib/AST/CommentLexer.cpp
@@ -291,6 +291,14 @@
}
void Lexer::lexCommentText(Token &T) {
+ if (ParseCommands)
+ lexCommentTextWithCommands(T);
+ else
+ lexCommentTextWithoutCommands(T);
+}
+
+void Lexer::lexCommentTextWithCommands(Token &T) {
+ assert(ParseCommands);
assert(CommentState == LCS_InsideBCPLComment ||
CommentState == LCS_InsideCComment);
@@ -448,6 +456,39 @@
}
}
+void Lexer::lexCommentTextWithoutCommands(Token &T) {
+ assert(!ParseCommands);
+ assert(CommentState == LCS_InsideBCPLComment ||
+ CommentState == LCS_InsideCComment);
+ assert(State == LS_Normal);
+
+ const char *TokenPtr = BufferPtr;
+ assert(TokenPtr < CommentEnd);
+ while (TokenPtr != CommentEnd) {
+ switch(*TokenPtr) {
+ case '\n':
+ case '\r':
+ TokenPtr = skipNewline(TokenPtr, CommentEnd);
+ formTokenWithChars(T, TokenPtr, tok::newline);
+
+ if (CommentState == LCS_InsideCComment)
+ skipLineStartingDecorations();
+ return;
+
+ default: {
+ size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
+ find_first_of("\n\r\\@&<");
+ if (End != StringRef::npos)
+ TokenPtr += End;
+ else
+ TokenPtr = CommentEnd;
+ formTextToken(T, TokenPtr);
+ return;
+ }
+ }
+ }
+}
+
void Lexer::setupAndLexVerbatimBlock(Token &T,
const char *TextBegin,
char Marker, const CommandInfo *Info) {
@@ -727,14 +768,13 @@
}
Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
- const CommandTraits &Traits,
- SourceLocation FileLoc,
- const char *BufferStart, const char *BufferEnd):
- Allocator(Allocator), Diags(Diags), Traits(Traits),
- BufferStart(BufferStart), BufferEnd(BufferEnd),
- FileLoc(FileLoc), BufferPtr(BufferStart),
- CommentState(LCS_BeforeComment), State(LS_Normal) {
-}
+ const CommandTraits &Traits, SourceLocation FileLoc,
+ const char *BufferStart, const char *BufferEnd,
+ bool ParseCommands)
+ : Allocator(Allocator), Diags(Diags), Traits(Traits),
+ BufferStart(BufferStart), BufferEnd(BufferEnd), FileLoc(FileLoc),
+ BufferPtr(BufferStart), CommentState(LCS_BeforeComment), State(LS_Normal),
+ ParseCommands(ParseCommands) {}
void Lexer::lex(Token &T) {
again:
Index: include/clang/AST/RawCommentList.h
===================================================================
--- include/clang/AST/RawCommentList.h
+++ include/clang/AST/RawCommentList.h
@@ -111,6 +111,29 @@
return extractBriefText(Context);
}
+ /// Returns sanitized comment text, suitable for presentation in editor UIs.
+ /// E.g. will transform:
+ /// // This is a long multiline comment.
+ /// // Parts of it might be indented.
+ /// /* The comments styles might be mixed. */
+ /// into
+ /// "This is a long multiline comment.\n"
+ /// " Parts of it might be indented.\n"
+ /// "The comments styles might be mixed."
+ /// Also removes leading indentation and sanitizes some common cases:
+ /// /* This is a first line.
+ /// * This is a second line. It is indented.
+ /// * This is a third line. */
+ /// and
+ /// /* This is a first line.
+ /// This is a second line. It is indented.
+ /// This is a third line. */
+ /// will both turn into:
+ /// "This is a first line.\n"
+ /// " This is a second line. It is indented.\n"
+ /// "This is a third line."
+ std::string getFormattedText(const ASTContext &Context) const;
+
/// Parse the comment, assuming it is attached to decl \c D.
comments::FullComment *parse(const ASTContext &Context,
const Preprocessor *PP, const Decl *D) const;
Index: include/clang/AST/CommentLexer.h
===================================================================
--- include/clang/AST/CommentLexer.h
+++ include/clang/AST/CommentLexer.h
@@ -281,6 +281,11 @@
/// command, including command marker.
SmallString<16> VerbatimBlockEndCommandName;
+ /// If true, the commands, html tags, etc will be parsed and reported as
+ /// separate tokens inside the comment body. If false, the comment text will
+ /// be parsed into text and newline tokens.
+ bool ParseCommands;
+
/// Given a character reference name (e.g., "lt"), return the character that
/// it stands for (e.g., "<").
StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
@@ -315,12 +320,19 @@
/// Eat string matching regexp \code \s*\* \endcode.
void skipLineStartingDecorations();
- /// Lex stuff inside comments. CommentEnd should be set correctly.
+ /// Calls lexCommentText(With|Without)Commands, depending on value of
+ /// ParseCommands.
void lexCommentText(Token &T);
- void setupAndLexVerbatimBlock(Token &T,
- const char *TextBegin,
- char Marker, const CommandInfo *Info);
+ /// Lex stuff inside comments. CommentEnd should be set correctly.
+ void lexCommentTextWithCommands(Token &T);
+
+ /// Lex only newlines and text inside comments. CommentEnd should be set
+ /// correctly.
+ void lexCommentTextWithoutCommands(Token &T);
+
+ void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker,
+ const CommandInfo *Info);
void lexVerbatimBlockFirstLine(Token &T);
@@ -343,14 +355,13 @@
public:
Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
- const CommandTraits &Traits,
- SourceLocation FileLoc,
- const char *BufferStart, const char *BufferEnd);
+ const CommandTraits &Traits, SourceLocation FileLoc,
+ const char *BufferStart, const char *BufferEnd,
+ bool ParseCommands = true);
void lex(Token &T);
- StringRef getSpelling(const Token &Tok,
- const SourceManager &SourceMgr,
+ StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr,
bool *Invalid = nullptr) const;
};
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits