https://github.com/owenca created https://github.com/llvm/llvm-project/pull/141334
Fix #61954 >From 470eca4b4d963bf5c1ba87fb2f22620eb717c848 Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Fri, 23 May 2025 23:21:12 -0700 Subject: [PATCH] [clang-format] Handle Java text blocks Fix #61954 --- clang/lib/Format/FormatTokenLexer.cpp | 45 ++++++++++++++++++++ clang/lib/Format/FormatTokenLexer.h | 2 + clang/unittests/Format/FormatTestJava.cpp | 52 +++++++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 864486a9b878d..31c3613c8b083 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -694,6 +694,49 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) { return true; } +void FormatTokenLexer::tryParseJavaTextBlock() { + if (FormatTok->TokenText != "\"\"") + return; + + const auto *Str = Lex->getBufferLocation(); + const auto *End = Lex->getBuffer().end(); + + if (Str == End || *Str != '\"') + return; + + // Skip the `"""` that begins a text block. + const auto *S = Str + 1; + + // From docs.oracle.com/en/java/javase/15/text-blocks/#text-block-syntax: + // A text block begins with three double-quote characters followed by a line + // terminator. + while (S < End && *S != '\n') { + if (!isblank(*S)) + return; + ++S; + } + + // Find the `"""` that ends the text block. + for (int Count = 0; Count < 3; ++S) { + if (S == End) + return; + + switch (*S) { + case '\\': + Count = -1; + break; + case '\"': + ++Count; + break; + default: + Count = 0; + } + } + + // Skip the text block. + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S))); +} + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates @@ -1374,6 +1417,8 @@ FormatToken *FormatTokenLexer::getNextToken() { FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); ++Column; StateStack.push(LexerState::TOKEN_STASHED); + } else if (Style.isJava() && FormatTok->is(tok::string_literal)) { + tryParseJavaTextBlock(); } if (Style.isVerilog() && Tokens.size() > 0 && diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 105847b126e20..026383db1fe6c 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -72,6 +72,8 @@ class FormatTokenLexer { bool canPrecedeRegexLiteral(FormatToken *Prev); + void tryParseJavaTextBlock(); + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index e01c1d6d7e684..35ee257d015d3 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -791,6 +791,58 @@ TEST_F(FormatTestJava, AlignCaseArrows) { Style); } +TEST_F(FormatTestJava, TextBlock) { + verifyNoChange("String myStr = \"\"\"\n" + "hello\n" + "there\n" + "\"\"\";"); + + verifyNoChange("String tb = \"\"\"\n" + " the new\"\"\";"); + + verifyNoChange("System.out.println(\"\"\"\n" + " This is the first line\n" + " This is the second line\n" + " \"\"\");"); + + verifyNoChange("void writeHTML() {\n" + " String html = \"\"\" \n" + " <html>\n" + " <p>Hello World.</p>\n" + " </html>\n" + "\"\"\";\n" + " writeOutput(html);\n" + "}"); + + verifyNoChange("String colors = \"\"\"\t\n" + " red\n" + " green\n" + " blue\"\"\".indent(4);"); + + verifyNoChange("String code = \"\"\"\n" + " String source = \\\"\"\"\n" + " String message = \"Hello, World!\";\n" + " System.out.println(message);\n" + " \\\"\"\";\n" + " \"\"\";"); + + verifyNoChange( + "class Outer {\n" + " void printPoetry() {\n" + " String lilacs = \"\"\"\n" + "Passing the apple-tree blows of white and pink in the orchards\n" + "\"\"\";\n" + " System.out.println(lilacs);\n" + " }\n" + "}"); + + verifyNoChange("String name = \"\"\"\n" + " red\n" + " green\n" + " blue\\\n" + " \"\"\";"); +} + } // namespace } // namespace test } // namespace format _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits