https://github.com/sstwcw updated https://github.com/llvm/llvm-project/pull/133722
>From b018450ed1a2901dd5b8cd5e146c1ce5d8613c86 Mon Sep 17 00:00:00 2001 From: sstwcw <su3e8a96kzl...@posteo.net> Date: Mon, 31 Mar 2025 13:50:02 +0000 Subject: [PATCH 1/3] [clang-format] Recognize TableGen paste operator on separate line Formatting this piece of code made the program crash. ``` class TypedVecListRegOperand<RegisterClass Reg, int lanes, string eltsize> : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '" # eltsize # "'>">; ``` The line starting with the `#` was treated as a separate preprocessor directive line. Then the code dereferenced a null pointer when it tried to continue parsing the first line that did not end in a semicolon. Now the 2 problems are fixed. --- clang/lib/Format/TokenAnnotator.cpp | 2 ++ clang/lib/Format/UnwrappedLineParser.cpp | 11 +++++++++-- clang/unittests/Format/FormatTestTableGen.cpp | 6 ++++++ clang/unittests/Format/TokenAnnotatorTest.cpp | 17 +++++++++++++++++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index d87b3a6088bd8..278355aa58586 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -949,6 +949,8 @@ class AnnotatingParser { HashTok->setType(TT_Unknown); if (!parseTableGenValue(ParseNameMode)) return false; + if (!CurrentToken) + return true; } // In name mode, '{' is regarded as the end of the value. // See TGParser::ParseValue in TGParser.cpp diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index f7712bea01c2c..aa0c372d5e15f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -4853,9 +4853,16 @@ void UnwrappedLineParser::readToken(int LevelDifference) { PreviousWasComment = FormatTok->is(tok::comment); while (!Line->InPPDirective && FormatTok->is(tok::hash) && - (!Style.isVerilog() || - Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && FirstNonCommentOnLine) { + // In Verilog, the backtick is used for macro invocations. In TableGen, + // the single hash is used for the paste operator. + const FormatToken *Next = Tokens->peekNextToken(); + assert(Next); // There is an EOF token at the end. + if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) || + (Style.isTableGen() && + !Next->isOneOf(tok::pp_define, tok::pp_ifdef, tok::pp_ifndef))) { + break; + } distributeComments(Comments, FormatTok); Comments.clear(); // If there is an unfinished unwrapped line, we flush the preprocessor diff --git a/clang/unittests/Format/FormatTestTableGen.cpp b/clang/unittests/Format/FormatTestTableGen.cpp index 92377c31f2e91..b78f79f20704f 100644 --- a/clang/unittests/Format/FormatTestTableGen.cpp +++ b/clang/unittests/Format/FormatTestTableGen.cpp @@ -218,6 +218,12 @@ TEST_F(FormatTestTableGen, PasteOperator) { " string Z = [\"Traring\", \"Paste\", \"Traring\", \"Paste\",\n" " \"Traring\", \"Paste\"]#;\n" "}"); + verifyFormat("def x#x {}", "def x\n" + "#x {}"); + verifyFormat("def x#x {}", "def x\n" + "#\n" + "x {}"); + verifyFormat("def x#x"); } TEST_F(FormatTestTableGen, ClassDefinition) { diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index ac5e979aea071..fb8f5d30a669f 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2832,6 +2832,23 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) { Tokens = Annotate("!cond"); EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator); + // The paste operator should not be treated as a preprocessor directive even + // if it is on a separate line. + Tokens = Annotate("def x\n" + "#embed {}"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName); + EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown); + EXPECT_EQ(Tokens[1]->Next, Tokens[2]); + Tokens = Annotate("def x\n" + "#define x\n" + "#embed {}"); + ASSERT_EQ(Tokens.size(), 10u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName); + EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown); + EXPECT_TOKEN(Tokens[5], tok::hash, TT_Unknown); + EXPECT_EQ(Tokens[1]->Next, Tokens[5]); + auto AnnotateValue = [this, &Style](StringRef Code) { // Values are annotated only in specific context. auto Result = annotate(("def X { let V = " + Code + "; }").str(), Style); >From d3671a56acf741443dd2ba16fc46f488103abd36 Mon Sep 17 00:00:00 2001 From: sstwcw <su3e8a96kzl...@posteo.net> Date: Fri, 4 Apr 2025 14:24:58 +0000 Subject: [PATCH 2/3] Add other directives --- clang/lib/Format/UnwrappedLineParser.cpp | 3 ++- clang/unittests/Format/TokenAnnotatorTest.cpp | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index aa0c372d5e15f..d3cedd249214a 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -4860,7 +4860,8 @@ void UnwrappedLineParser::readToken(int LevelDifference) { assert(Next); // There is an EOF token at the end. if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) || (Style.isTableGen() && - !Next->isOneOf(tok::pp_define, tok::pp_ifdef, tok::pp_ifndef))) { + !Next->isOneOf(tok::kw_else, tok::pp_define, tok::pp_ifdef, + tok::pp_ifndef, tok::pp_endif))) { break; } distributeComments(Comments, FormatTok); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index fb8f5d30a669f..dbe3e05e6fb72 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2838,16 +2838,21 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) { "#embed {}"); ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName); - EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown); EXPECT_EQ(Tokens[1]->Next, Tokens[2]); Tokens = Annotate("def x\n" "#define x\n" "#embed {}"); ASSERT_EQ(Tokens.size(), 10u) << Tokens; EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName); - EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown); - EXPECT_TOKEN(Tokens[5], tok::hash, TT_Unknown); EXPECT_EQ(Tokens[1]->Next, Tokens[5]); + Tokens = Annotate("def x\n" + "#ifdef x\n" + "#else\n" + "#endif\n" + "#embed {}"); + ASSERT_EQ(Tokens.size(), 14u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName); + EXPECT_EQ(Tokens[1]->Next, Tokens[9]); auto AnnotateValue = [this, &Style](StringRef Code) { // Values are annotated only in specific context. >From 45c25b7c9bf8c95c9447dbae08562e99bd5642ee Mon Sep 17 00:00:00 2001 From: sstwcw <su3e8a96kzl...@posteo.net> Date: Fri, 4 Apr 2025 14:27:29 +0000 Subject: [PATCH 3/3] Comments --- clang/lib/Format/UnwrappedLineParser.cpp | 3 +-- clang/unittests/Format/FormatTestTableGen.cpp | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index d3cedd249214a..cab21fc4970fe 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -4856,8 +4856,7 @@ void UnwrappedLineParser::readToken(int LevelDifference) { FirstNonCommentOnLine) { // In Verilog, the backtick is used for macro invocations. In TableGen, // the single hash is used for the paste operator. - const FormatToken *Next = Tokens->peekNextToken(); - assert(Next); // There is an EOF token at the end. + const auto *Next = Tokens->peekNextToken(); if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) || (Style.isTableGen() && !Next->isOneOf(tok::kw_else, tok::pp_define, tok::pp_ifdef, diff --git a/clang/unittests/Format/FormatTestTableGen.cpp b/clang/unittests/Format/FormatTestTableGen.cpp index b78f79f20704f..1c3d187de393c 100644 --- a/clang/unittests/Format/FormatTestTableGen.cpp +++ b/clang/unittests/Format/FormatTestTableGen.cpp @@ -218,6 +218,7 @@ TEST_F(FormatTestTableGen, PasteOperator) { " string Z = [\"Traring\", \"Paste\", \"Traring\", \"Paste\",\n" " \"Traring\", \"Paste\"]#;\n" "}"); + verifyFormat("def x#x {}", "def x\n" "#x {}"); verifyFormat("def x#x {}", "def x\n" _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits