Author: sstwcw
Date: 2025-03-31T13:54:49Z
New Revision: ab7cee8a0ecf29fdb47c64c8d431a694d63390d2

URL: 
https://github.com/llvm/llvm-project/commit/ab7cee8a0ecf29fdb47c64c8d431a694d63390d2
DIFF: 
https://github.com/llvm/llvm-project/commit/ab7cee8a0ecf29fdb47c64c8d431a694d63390d2.diff

LOG: [clang-format] Handle C++ keywords in other languages better (#132941)

There is some code to make sure that C++ keywords that are identifiers
in the other languages are not treated as keywords.  Right now, the kind
is set to identifier, and the identifier info is cleared.  The latter is
probably so that the code for identifying C++ structures does not
recognize those structures by mistake when formatting a language that
does not have those structures.  But we did not find an instance where
the language can have the sequence of tokens, the code tries to parse
the structure as if it is C++ using the identifier info instead of the
token kind, but without checking for the language setting.  However,
there are places where the code checks whether the identifier info field
is null or not.  They are places where an identifier and a keyword are
treated the same way.  For example, the name of a function in
JavaScript.  This patch removes the lines that clear the identifier
info.  This way, a C++ keyword gets treated in the same way as an
identifier in those places.

JavaScript

New

```JavaScript
async function
union(
    myparamnameiswaytooloooong) {
}
```

Old

```JavaScript
async function
    union(
        myparamnameiswaytooloooong) {
}
```

Java

New

```Java
enum union { ABC, CDE }
```

Old

```Java
enum
union { ABC, CDE }
```

Added: 
    

Modified: 
    clang/lib/Format/FormatTokenLexer.cpp
    clang/unittests/Format/FormatTestJS.cpp
    clang/unittests/Format/FormatTestJava.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index eed54a11684b5..014b10b206d90 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -1306,15 +1306,12 @@ FormatToken *FormatTokenLexer::getNextToken() {
         FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
                            tok::kw_operator)) {
       FormatTok->Tok.setKind(tok::identifier);
-      FormatTok->Tok.setIdentifierInfo(nullptr);
     } else if (Style.isJavaScript() &&
                FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
                                   tok::kw_operator)) {
       FormatTok->Tok.setKind(tok::identifier);
-      FormatTok->Tok.setIdentifierInfo(nullptr);
     } else if (Style.isTableGen() && !Keywords.isTableGenKeyword(*FormatTok)) {
       FormatTok->Tok.setKind(tok::identifier);
-      FormatTok->Tok.setIdentifierInfo(nullptr);
     }
   } else if (FormatTok->is(tok::greatergreater)) {
     FormatTok->Tok.setKind(tok::greater);

diff  --git a/clang/unittests/Format/FormatTestJS.cpp 
b/clang/unittests/Format/FormatTestJS.cpp
index 78c9f887a159b..3dae67fbcdfcb 100644
--- a/clang/unittests/Format/FormatTestJS.cpp
+++ b/clang/unittests/Format/FormatTestJS.cpp
@@ -828,12 +828,18 @@ TEST_F(FormatTestJS, AsyncFunctions) {
                "}  ");
   // clang-format must not insert breaks between async and function, otherwise
   // automatic semicolon insertion may trigger (in particular in a class body).
+  auto Style = getGoogleJSStyleWithColumns(10);
   verifyFormat("async function\n"
                "hello(\n"
                "    myparamnameiswaytooloooong) {\n"
                "}",
                "async function hello(myparamnameiswaytooloooong) {}",
-               getGoogleJSStyleWithColumns(10));
+               Style);
+  verifyFormat("async function\n"
+               "union(\n"
+               "    myparamnameiswaytooloooong) {\n"
+               "}",
+               Style);
   verifyFormat("class C {\n"
                "  async hello(\n"
                "      myparamnameiswaytooloooong) {\n"
@@ -841,7 +847,7 @@ TEST_F(FormatTestJS, AsyncFunctions) {
                "}",
                "class C {\n"
                "  async hello(myparamnameiswaytooloooong) {} }",
-               getGoogleJSStyleWithColumns(10));
+               Style);
   verifyFormat("async function* f() {\n"
                "  yield fetch(x);\n"
                "}");
@@ -1338,15 +1344,16 @@ TEST_F(FormatTestJS, 
WrapRespectsAutomaticSemicolonInsertion) {
   // The following statements must not wrap, as otherwise the program meaning
   // would change due to automatic semicolon insertion.
   // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1.
-  verifyFormat("return aaaaa;", getGoogleJSStyleWithColumns(10));
-  verifyFormat("yield aaaaa;", getGoogleJSStyleWithColumns(10));
-  verifyFormat("return /* hello! */ aaaaa;", getGoogleJSStyleWithColumns(10));
-  verifyFormat("continue aaaaa;", getGoogleJSStyleWithColumns(10));
-  verifyFormat("continue /* hello! */ aaaaa;", 
getGoogleJSStyleWithColumns(10));
-  verifyFormat("break aaaaa;", getGoogleJSStyleWithColumns(10));
-  verifyFormat("throw aaaaa;", getGoogleJSStyleWithColumns(10));
-  verifyFormat("aaaaaaaaa++;", getGoogleJSStyleWithColumns(10));
-  verifyFormat("aaaaaaaaa--;", getGoogleJSStyleWithColumns(10));
+  auto Style =getGoogleJSStyleWithColumns(10);
+  verifyFormat("return aaaaa;", Style);
+  verifyFormat("yield aaaaa;", Style);
+  verifyFormat("return /* hello! */ aaaaa;", Style);
+  verifyFormat("continue aaaaa;", Style);
+  verifyFormat("continue /* hello! */ aaaaa;", Style);
+  verifyFormat("break aaaaa;", Style);
+  verifyFormat("throw aaaaa;", Style);
+  verifyFormat("aaaaaaaaa++;", Style);
+  verifyFormat("aaaaaaaaa--;", Style);
   verifyFormat("return [\n"
                "  aaa\n"
                "];",
@@ -1366,12 +1373,13 @@ TEST_F(FormatTestJS, 
WrapRespectsAutomaticSemicolonInsertion) {
   // Ideally the foo() bit should be indented relative to the async function().
   verifyFormat("async function\n"
                "foo() {}",
-               getGoogleJSStyleWithColumns(10));
-  verifyFormat("await theReckoning;", getGoogleJSStyleWithColumns(10));
-  verifyFormat("some['a']['b']", getGoogleJSStyleWithColumns(10));
+               Style);
+  verifyFormat("await theReckoning;", Style);
+  verifyFormat("some['a']['b']", Style);
+  verifyFormat("union['a']['b']", Style);
   verifyFormat("x = (a['a']\n"
                "      ['b']);",
-               getGoogleJSStyleWithColumns(10));
+               Style);
   verifyFormat("function f() {\n"
                "  return foo.bar(\n"
                "      (param): param is {\n"
@@ -2500,6 +2508,10 @@ TEST_F(FormatTestJS, NonNullAssertionOperator) {
 TEST_F(FormatTestJS, CppKeywords) {
   // Make sure we don't mess stuff up because of C++ keywords.
   verifyFormat("return operator && (aa);");
+  verifyFormat("enum operator {\n"
+               "  A = 1,\n"
+               "  B\n"
+               "}");
   // .. or QT ones.
   verifyFormat("const slots: Slot[];");
   // use the "!" assertion operator to validate that clang-format understands

diff  --git a/clang/unittests/Format/FormatTestJava.cpp 
b/clang/unittests/Format/FormatTestJava.cpp
index 33998bc7ff858..e01c1d6d7e684 100644
--- a/clang/unittests/Format/FormatTestJava.cpp
+++ b/clang/unittests/Format/FormatTestJava.cpp
@@ -158,6 +158,8 @@ TEST_F(FormatTestJava, AnonymousClasses) {
 
 TEST_F(FormatTestJava, EnumDeclarations) {
   verifyFormat("enum SomeThing { ABC, CDE }");
+  // A C++ keyword should not mess things up.
+  verifyFormat("enum union { ABC, CDE }");
   verifyFormat("enum SomeThing {\n"
                "  ABC,\n"
                "  CDE,\n"


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to