https://github.com/owenca updated 
https://github.com/llvm/llvm-project/pull/141334

>From 470eca4b4d963bf5c1ba87fb2f22620eb717c848 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpi...@gmail.com>
Date: Fri, 23 May 2025 23:21:12 -0700
Subject: [PATCH 1/2] [clang-format] Handle Java text blocks

Fix #61954
---
 clang/lib/Format/FormatTokenLexer.cpp     | 45 ++++++++++++++++++++
 clang/lib/Format/FormatTokenLexer.h       |  2 +
 clang/unittests/Format/FormatTestJava.cpp | 52 +++++++++++++++++++++++
 3 files changed, 99 insertions(+)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index 864486a9b878d..31c3613c8b083 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -694,6 +694,49 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken 
*Prev) {
   return true;
 }
 
+void FormatTokenLexer::tryParseJavaTextBlock() {
+  if (FormatTok->TokenText != "\"\"")
+    return;
+
+  const auto *Str = Lex->getBufferLocation();
+  const auto *End = Lex->getBuffer().end();
+
+  if (Str == End || *Str != '\"')
+    return;
+
+  // Skip the `"""` that begins a text block.
+  const auto *S = Str + 1;
+
+  // From docs.oracle.com/en/java/javase/15/text-blocks/#text-block-syntax:
+  // A text block begins with three double-quote characters followed by a line
+  // terminator.
+  while (S < End && *S != '\n') {
+    if (!isblank(*S))
+      return;
+    ++S;
+  }
+
+  // Find the `"""` that ends the text block.
+  for (int Count = 0; Count < 3; ++S) {
+    if (S == End)
+      return;
+
+    switch (*S) {
+    case '\\':
+      Count = -1;
+      break;
+    case '\"':
+      ++Count;
+      break;
+    default:
+      Count = 0;
+    }
+  }
+
+  // Skip the text block.
+  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S)));
+}
+
 // Tries to parse a JavaScript Regex literal starting at the current token,
 // if that begins with a slash and is in a location where JavaScript allows
 // regex literals. Changes the current token to a regex literal and updates
@@ -1374,6 +1417,8 @@ FormatToken *FormatTokenLexer::getNextToken() {
     FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
     ++Column;
     StateStack.push(LexerState::TOKEN_STASHED);
+  } else if (Style.isJava() && FormatTok->is(tok::string_literal)) {
+    tryParseJavaTextBlock();
   }
 
   if (Style.isVerilog() && Tokens.size() > 0 &&
diff --git a/clang/lib/Format/FormatTokenLexer.h 
b/clang/lib/Format/FormatTokenLexer.h
index 105847b126e20..026383db1fe6c 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -72,6 +72,8 @@ class FormatTokenLexer {
 
   bool canPrecedeRegexLiteral(FormatToken *Prev);
 
+  void tryParseJavaTextBlock();
+
   // Tries to parse a JavaScript Regex literal starting at the current token,
   // if that begins with a slash and is in a location where JavaScript allows
   // regex literals. Changes the current token to a regex literal and updates
diff --git a/clang/unittests/Format/FormatTestJava.cpp 
b/clang/unittests/Format/FormatTestJava.cpp
index e01c1d6d7e684..35ee257d015d3 100644
--- a/clang/unittests/Format/FormatTestJava.cpp
+++ b/clang/unittests/Format/FormatTestJava.cpp
@@ -791,6 +791,58 @@ TEST_F(FormatTestJava, AlignCaseArrows) {
                Style);
 }
 
+TEST_F(FormatTestJava, TextBlock) {
+  verifyNoChange("String myStr = \"\"\"\n"
+                 "hello\n"
+                 "there\n"
+                 "\"\"\";");
+
+  verifyNoChange("String tb = \"\"\"\n"
+                 "            the new\"\"\";");
+
+  verifyNoChange("System.out.println(\"\"\"\n"
+                 "    This is the first line\n"
+                 "    This is the second line\n"
+                 "    \"\"\");");
+
+  verifyNoChange("void writeHTML() {\n"
+                 "  String html = \"\"\" \n"
+                 "                <html>\n"
+                 "                    <p>Hello World.</p>\n"
+                 "                </html>\n"
+                 "\"\"\";\n"
+                 "  writeOutput(html);\n"
+                 "}");
+
+  verifyNoChange("String colors = \"\"\"\t\n"
+                 "    red\n"
+                 "    green\n"
+                 "    blue\"\"\".indent(4);");
+
+  verifyNoChange("String code = \"\"\"\n"
+                 "    String source = \\\"\"\"\n"
+                 "        String message = \"Hello, World!\";\n"
+                 "        System.out.println(message);\n"
+                 "        \\\"\"\";\n"
+                 "    \"\"\";");
+
+  verifyNoChange(
+      "class Outer {\n"
+      "  void printPoetry() {\n"
+      "    String lilacs = \"\"\"\n"
+      "Passing the apple-tree blows of white and pink in the orchards\n"
+      "\"\"\";\n"
+      "    System.out.println(lilacs);\n"
+      "  }\n"
+      "}");
+
+  verifyNoChange("String name = \"\"\"\n"
+                 "        red\n"
+                 "        green\n"
+                 "        blue\\\n"
+                 "    \"\"\";");
+}
+
 } // namespace
 } // namespace test
 } // namespace format

>From e112f1587d0bc26478b3d3508afaf4cf92610a69 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpi...@gmail.com>
Date: Sat, 24 May 2025 12:01:25 -0700
Subject: [PATCH 2/2] Also ignore invalid text blocks

---
 clang/lib/Format/FormatTokenLexer.cpp     | 23 +++++------------------
 clang/unittests/Format/FormatTestJava.cpp |  7 ++++++-
 2 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index 31c3613c8b083..4cc4f5f22db0d 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -698,29 +698,16 @@ void FormatTokenLexer::tryParseJavaTextBlock() {
   if (FormatTok->TokenText != "\"\"")
     return;
 
-  const auto *Str = Lex->getBufferLocation();
+  const auto *S = Lex->getBufferLocation();
   const auto *End = Lex->getBuffer().end();
 
-  if (Str == End || *Str != '\"')
+  if (S == End || *S != '\"')
     return;
 
-  // Skip the `"""` that begins a text block.
-  const auto *S = Str + 1;
-
-  // From docs.oracle.com/en/java/javase/15/text-blocks/#text-block-syntax:
-  // A text block begins with three double-quote characters followed by a line
-  // terminator.
-  while (S < End && *S != '\n') {
-    if (!isblank(*S))
-      return;
-    ++S;
-  }
+  ++S; // Skip the `"""` that begins a text block.
 
   // Find the `"""` that ends the text block.
-  for (int Count = 0; Count < 3; ++S) {
-    if (S == End)
-      return;
-
+  for (int Count = 0; Count < 3 && S < End; ++S) {
     switch (*S) {
     case '\\':
       Count = -1;
@@ -733,7 +720,7 @@ void FormatTokenLexer::tryParseJavaTextBlock() {
     }
   }
 
-  // Skip the text block.
+  // Ignore the possibly invalid text block.
   resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S)));
 }
 
diff --git a/clang/unittests/Format/FormatTestJava.cpp 
b/clang/unittests/Format/FormatTestJava.cpp
index 35ee257d015d3..ca5aba043b932 100644
--- a/clang/unittests/Format/FormatTestJava.cpp
+++ b/clang/unittests/Format/FormatTestJava.cpp
@@ -836,11 +836,16 @@ TEST_F(FormatTestJava, TextBlock) {
       "  }\n"
       "}");
 
-  verifyNoChange("String name = \"\"\"\n"
+  verifyNoChange("String name = \"\"\"\r\n"
                  "        red\n"
                  "        green\n"
                  "        blue\\\n"
                  "    \"\"\";");
+
+  verifyFormat("String name = \"\"\"Pat Q. Smith\"\"\";");
+
+  verifyNoChange("String name = \"\"\"\n"
+                 "              Pat Q. Smith");
 }
 
 } // namespace

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to