https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/141334
>From 470eca4b4d963bf5c1ba87fb2f22620eb717c848 Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Fri, 23 May 2025 23:21:12 -0700 Subject: [PATCH 1/2] [clang-format] Handle Java text blocks Fix #61954 --- clang/lib/Format/FormatTokenLexer.cpp | 45 ++++++++++++++++++++ clang/lib/Format/FormatTokenLexer.h | 2 + clang/unittests/Format/FormatTestJava.cpp | 52 +++++++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 864486a9b878d..31c3613c8b083 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -694,6 +694,49 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) { return true; } +void FormatTokenLexer::tryParseJavaTextBlock() { + if (FormatTok->TokenText != "\"\"") + return; + + const auto *Str = Lex->getBufferLocation(); + const auto *End = Lex->getBuffer().end(); + + if (Str == End || *Str != '\"') + return; + + // Skip the `"""` that begins a text block. + const auto *S = Str + 1; + + // From docs.oracle.com/en/java/javase/15/text-blocks/#text-block-syntax: + // A text block begins with three double-quote characters followed by a line + // terminator. + while (S < End && *S != '\n') { + if (!isblank(*S)) + return; + ++S; + } + + // Find the `"""` that ends the text block. + for (int Count = 0; Count < 3; ++S) { + if (S == End) + return; + + switch (*S) { + case '\\': + Count = -1; + break; + case '\"': + ++Count; + break; + default: + Count = 0; + } + } + + // Skip the text block. + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S))); +} + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates @@ -1374,6 +1417,8 @@ FormatToken *FormatTokenLexer::getNextToken() { FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); ++Column; StateStack.push(LexerState::TOKEN_STASHED); + } else if (Style.isJava() && FormatTok->is(tok::string_literal)) { + tryParseJavaTextBlock(); } if (Style.isVerilog() && Tokens.size() > 0 && diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 105847b126e20..026383db1fe6c 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -72,6 +72,8 @@ class FormatTokenLexer { bool canPrecedeRegexLiteral(FormatToken *Prev); + void tryParseJavaTextBlock(); + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index e01c1d6d7e684..35ee257d015d3 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -791,6 +791,58 @@ TEST_F(FormatTestJava, AlignCaseArrows) { Style); } +TEST_F(FormatTestJava, TextBlock) { + verifyNoChange("String myStr = \"\"\"\n" + "hello\n" + "there\n" + "\"\"\";"); + + verifyNoChange("String tb = \"\"\"\n" + " the new\"\"\";"); + + verifyNoChange("System.out.println(\"\"\"\n" + " This is the first line\n" + " This is the second line\n" + " \"\"\");"); + + verifyNoChange("void writeHTML() {\n" + " String html = \"\"\" \n" + " <html>\n" + " <p>Hello World.</p>\n" + " </html>\n" + "\"\"\";\n" + " writeOutput(html);\n" + "}"); + + verifyNoChange("String colors = \"\"\"\t\n" + " red\n" + " green\n" + " blue\"\"\".indent(4);"); + + verifyNoChange("String code = \"\"\"\n" + " String source = \\\"\"\"\n" + " String message = \"Hello, World!\";\n" + " System.out.println(message);\n" + " \\\"\"\";\n" + " \"\"\";"); + + verifyNoChange( + "class Outer {\n" + " void printPoetry() {\n" + " String lilacs = \"\"\"\n" + "Passing the apple-tree blows of white and pink in the orchards\n" + "\"\"\";\n" + " System.out.println(lilacs);\n" + " }\n" + "}"); + + verifyNoChange("String name = \"\"\"\n" + " red\n" + " green\n" + " blue\\\n" + " \"\"\";"); +} + } // namespace } // namespace test } // namespace format >From e112f1587d0bc26478b3d3508afaf4cf92610a69 Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Sat, 24 May 2025 12:01:25 -0700 Subject: [PATCH 2/2] Also ignore invalid text blocks --- clang/lib/Format/FormatTokenLexer.cpp | 23 +++++------------------ clang/unittests/Format/FormatTestJava.cpp | 7 ++++++- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 31c3613c8b083..4cc4f5f22db0d 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -698,29 +698,16 @@ void FormatTokenLexer::tryParseJavaTextBlock() { if (FormatTok->TokenText != "\"\"") return; - const auto *Str = Lex->getBufferLocation(); + const auto *S = Lex->getBufferLocation(); const auto *End = Lex->getBuffer().end(); - if (Str == End || *Str != '\"') + if (S == End || *S != '\"') return; - // Skip the `"""` that begins a text block. - const auto *S = Str + 1; - - // From docs.oracle.com/en/java/javase/15/text-blocks/#text-block-syntax: - // A text block begins with three double-quote characters followed by a line - // terminator. - while (S < End && *S != '\n') { - if (!isblank(*S)) - return; - ++S; - } + ++S; // Skip the `"""` that begins a text block. // Find the `"""` that ends the text block. - for (int Count = 0; Count < 3; ++S) { - if (S == End) - return; - + for (int Count = 0; Count < 3 && S < End; ++S) { switch (*S) { case '\\': Count = -1; @@ -733,7 +720,7 @@ void FormatTokenLexer::tryParseJavaTextBlock() { } } - // Skip the text block. + // Ignore the possibly invalid text block. resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S))); } diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index 35ee257d015d3..ca5aba043b932 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -836,11 +836,16 @@ TEST_F(FormatTestJava, TextBlock) { " }\n" "}"); - verifyNoChange("String name = \"\"\"\n" + verifyNoChange("String name = \"\"\"\r\n" " red\n" " green\n" " blue\\\n" " \"\"\";"); + + verifyFormat("String name = \"\"\"Pat Q. Smith\"\"\";"); + + verifyNoChange("String name = \"\"\"\n" + " Pat Q. Smith"); } } // namespace _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits