llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-format

Author: None (sstwcw)

<details>
<summary>Changes</summary>

Formatting this piece of code made the program crash.

```
class TypedVecListRegOperand&lt;RegisterClass Reg, int lanes, string eltsize&gt;
    : RegisterOperand&lt;Reg, "printTypedVectorList&lt;" # lanes # ", '"
                                                   # eltsize # "'&gt;"&gt;;
```

The line starting with the `#` was treated as a separate preprocessor directive 
line.  Then the code dereferenced a null pointer when it tried to continue 
parsing the first line that did not end in a semicolon.

Now the 2 problems are fixed.

---
Full diff: https://github.com/llvm/llvm-project/pull/133722.diff


4 Files Affected:

- (modified) clang/lib/Format/TokenAnnotator.cpp (+2) 
- (modified) clang/lib/Format/UnwrappedLineParser.cpp (+9-2) 
- (modified) clang/unittests/Format/FormatTestTableGen.cpp (+6) 
- (modified) clang/unittests/Format/TokenAnnotatorTest.cpp (+17) 


``````````diff
diff --git a/clang/lib/Format/TokenAnnotator.cpp 
b/clang/lib/Format/TokenAnnotator.cpp
index d87b3a6088bd8..278355aa58586 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -949,6 +949,8 @@ class AnnotatingParser {
       HashTok->setType(TT_Unknown);
       if (!parseTableGenValue(ParseNameMode))
         return false;
+      if (!CurrentToken)
+        return true;
     }
     // In name mode, '{' is regarded as the end of the value.
     // See TGParser::ParseValue in TGParser.cpp
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp 
b/clang/lib/Format/UnwrappedLineParser.cpp
index f7712bea01c2c..aa0c372d5e15f 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -4853,9 +4853,16 @@ void UnwrappedLineParser::readToken(int LevelDifference) 
{
     PreviousWasComment = FormatTok->is(tok::comment);
 
     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
-           (!Style.isVerilog() ||
-            Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
            FirstNonCommentOnLine) {
+      // In Verilog, the backtick is used for macro invocations. In TableGen,
+      // the single hash is used for the paste operator.
+      const FormatToken *Next = Tokens->peekNextToken();
+      assert(Next); // There is an EOF token at the end.
+      if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) ||
+          (Style.isTableGen() &&
+           !Next->isOneOf(tok::pp_define, tok::pp_ifdef, tok::pp_ifndef))) {
+        break;
+      }
       distributeComments(Comments, FormatTok);
       Comments.clear();
       // If there is an unfinished unwrapped line, we flush the preprocessor
diff --git a/clang/unittests/Format/FormatTestTableGen.cpp 
b/clang/unittests/Format/FormatTestTableGen.cpp
index 92377c31f2e91..b78f79f20704f 100644
--- a/clang/unittests/Format/FormatTestTableGen.cpp
+++ b/clang/unittests/Format/FormatTestTableGen.cpp
@@ -218,6 +218,12 @@ TEST_F(FormatTestTableGen, PasteOperator) {
                "  string Z = [\"Traring\", \"Paste\", \"Traring\", 
\"Paste\",\n"
                "              \"Traring\", \"Paste\"]#;\n"
                "}");
+  verifyFormat("def x#x {}", "def x\n"
+                             "#x {}");
+  verifyFormat("def x#x {}", "def x\n"
+                             "#\n"
+                             "x {}");
+  verifyFormat("def x#x");
 }
 
 TEST_F(FormatTestTableGen, ClassDefinition) {
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp 
b/clang/unittests/Format/TokenAnnotatorTest.cpp
index ac5e979aea071..fb8f5d30a669f 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2832,6 +2832,23 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
   Tokens = Annotate("!cond");
   EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
 
+  // The paste operator should not be treated as a preprocessor directive even
+  // if it is on a separate line.
+  Tokens = Annotate("def x\n"
+                    "#embed {}");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
+  EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown);
+  EXPECT_EQ(Tokens[1]->Next, Tokens[2]);
+  Tokens = Annotate("def x\n"
+                    "#define x\n"
+                    "#embed {}");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
+  EXPECT_TOKEN(Tokens[2], tok::hash, TT_Unknown);
+  EXPECT_TOKEN(Tokens[5], tok::hash, TT_Unknown);
+  EXPECT_EQ(Tokens[1]->Next, Tokens[5]);
+
   auto AnnotateValue = [this, &Style](StringRef Code) {
     // Values are annotated only in specific context.
     auto Result = annotate(("def X { let V = " + Code + "; }").str(), Style);

``````````

</details>


https://github.com/llvm/llvm-project/pull/133722
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to