Author: Hirofumi Nakamura Date: 2024-01-17T21:20:35+09:00 New Revision: e3702f6225fe4438ccffcac3f612e902c7b3db22
URL: https://github.com/llvm/llvm-project/commit/e3702f6225fe4438ccffcac3f612e902c7b3db22 DIFF: https://github.com/llvm/llvm-project/commit/e3702f6225fe4438ccffcac3f612e902c7b3db22.diff LOG: [clang-format] TableGen multi line string support. (#78032) Support the handling of TableGen's multiline string (code) literal. That has the form, [{ this is the string possibly with multi line... }] Added: Modified: clang/lib/Format/ContinuationIndenter.cpp clang/lib/Format/FormatToken.h clang/lib/Format/FormatTokenLexer.cpp clang/lib/Format/FormatTokenLexer.h clang/unittests/Format/TokenAnnotatorTest.cpp Removed: ################################################################################ diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 102504182c4505..e6eaaa9ab45706 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1591,6 +1591,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.StartOfStringLiteral = State.Column + 1; if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) { State.StartOfStringLiteral = State.Column + 1; + } else if (Current.is(TT_TableGenMultiLineString) && + State.StartOfStringLiteral == 0) { + State.StartOfStringLiteral = State.Column + 1; } else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) { State.StartOfStringLiteral = State.Column; } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index d5ef627f1348d3..dede89f2600150 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -148,6 +148,7 @@ namespace format { TYPE(StructLBrace) \ TYPE(StructRBrace) \ TYPE(StructuredBindingLSquare) \ + TYPE(TableGenMultiLineString) \ TYPE(TemplateCloser) \ TYPE(TemplateOpener) \ TYPE(TemplateString) \ diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index a1fd6dd6effe6c..25ac9be57c81a9 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -93,6 +93,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { // string literals are correctly identified. handleCSharpVerbatimAndInterpolatedStrings(); } + if (Style.isTableGen()) + handleTableGenMultilineString(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); @@ -272,6 +274,14 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; } } + // TableGen's Multi line string starts with [{ + if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace}, + TT_TableGenMultiLineString)) { + // Set again with finalizing. This must never be annotated as other types. + Tokens.back()->setFinalizedType(TT_TableGenMultiLineString); + Tokens.back()->Tok.setKind(tok::string_literal); + return; + } } bool FormatTokenLexer::tryMergeNSStringLiteral() { @@ -763,6 +773,37 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() { resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1))); } +void FormatTokenLexer::handleTableGenMultilineString() { + FormatToken *MultiLineString = Tokens.back(); + if (MultiLineString->isNot(TT_TableGenMultiLineString)) + return; + + auto OpenOffset = Lex->getCurrentBufferOffset() - 2 /* "[{" */; + // "}]" is the end of multi line string. + auto CloseOffset = Lex->getBuffer().find("}]", OpenOffset); + if (CloseOffset == StringRef::npos) + return; + auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset + 2); + MultiLineString->TokenText = Text; + resetLexer(SourceMgr.getFileOffset( + Lex->getSourceLocation(Lex->getBufferLocation() - 2 + Text.size()))); + auto FirstLineText = Text; + auto FirstBreak = Text.find('\n'); + // Set ColumnWidth and LastLineColumnWidth when it has multiple lines. + if (FirstBreak != StringRef::npos) { + MultiLineString->IsMultiline = true; + FirstLineText = Text.substr(0, FirstBreak + 1); + // LastLineColumnWidth holds the width of the last line. + auto LastBreak = Text.rfind('\n'); + MultiLineString->LastLineColumnWidth = encoding::columnWidthWithTabs( + Text.substr(LastBreak + 1), MultiLineString->OriginalColumn, + Style.TabWidth, Encoding); + } + // ColumnWidth holds only the width of the first line. + MultiLineString->ColumnWidth = encoding::columnWidthWithTabs( + FirstLineText, MultiLineString->OriginalColumn, Style.TabWidth, Encoding); +} + void FormatTokenLexer::handleTemplateStrings() { FormatToken *BacktickToken = Tokens.back(); diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index bb6a8ab69c1be1..1dec6bbc41514c 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -95,6 +95,9 @@ class FormatTokenLexer { void handleCSharpVerbatimAndInterpolatedStrings(); + // Handles TableGen multiline strings. It has the form [{ ... }]. + void handleTableGenMultilineString(); + void tryParsePythonComment(); bool tryMerge_TMacro(); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 92f57a77cdaf01..117d8fe8f7dc12 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2193,6 +2193,22 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) { ASSERT_TRUE(Keywords.isTableGenDefinition(*Tokens[0])); ASSERT_TRUE(Tokens[0]->is(Keywords.kw_def)); ASSERT_TRUE(Tokens[1]->is(TT_StartOfName)); + + // Code, the multiline string token. + Tokens = Annotate("[{ code is multiline string }]"); + ASSERT_EQ(Tokens.size(), 2u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString); + EXPECT_FALSE(Tokens[0]->IsMultiline); + // Case with multiple lines. + Tokens = Annotate("[{ It can break\n" + " across lines and the line breaks\n" + " are retained in \n" + " the string. }]"); + ASSERT_EQ(Tokens.size(), 2u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString); + EXPECT_EQ(Tokens[0]->ColumnWidth, sizeof("[{ It can break\n") - 1); + EXPECT_TRUE(Tokens[0]->IsMultiline); + EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof(" the string. }]") - 1); } TEST_F(TokenAnnotatorTest, UnderstandConstructors) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits