HazardyKnusperkeks created this revision. HazardyKnusperkeks added reviewers: owenpan, MyDeveloperDay, curdeius. HazardyKnusperkeks added a project: clang-format. HazardyKnusperkeks requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
We have a little problem. TokenAnnotator::resetTokenMetadata() resets the type, except for a (growing) whitelist. This is because the TokenAnnotator visits some tokens multiple times. E.g. trying to identify if a < is an operator less or a template opener. And in some runs, which are bascially "reverted" the types are reset. On the other hand, if the parser does already know the type, it should be able to set it, without it being reset. So we introduce the ability to set a type and make that final. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D120511 Files: clang/lib/Format/FormatToken.h clang/lib/Format/TokenAnnotator.cpp clang/lib/Format/UnwrappedLineParser.cpp clang/unittests/Format/TokenAnnotatorTest.cpp
Index: clang/unittests/Format/TokenAnnotatorTest.cpp =================================================================== --- clang/unittests/Format/TokenAnnotatorTest.cpp +++ clang/unittests/Format/TokenAnnotatorTest.cpp @@ -70,6 +70,14 @@ EXPECT_EQ(Tokens.size(), 17u) << Tokens; EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen); EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference); + + Tokens = annotate("void f() {\n" + " while (p < a && *p == 'a')\n" + " p++;\n" + "}"); + EXPECT_EQ(Tokens.size(), 21u) << Tokens; + EXPECT_TOKEN(Tokens[10], tok::ampamp, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[11], tok::star, TT_UnaryOperator); } TEST_F(TokenAnnotatorTest, UnderstandsClasses) { Index: clang/lib/Format/UnwrappedLineParser.cpp =================================================================== --- clang/lib/Format/UnwrappedLineParser.cpp +++ clang/lib/Format/UnwrappedLineParser.cpp @@ -500,7 +500,7 @@ break; case tok::l_brace: if (NextLBracesType != TT_Unknown) - FormatTok->setType(NextLBracesType); + FormatTok->setFinalizedType(NextLBracesType); else if (FormatTok->Previous && FormatTok->Previous->ClosesRequiresClause) { // We need the 'default' case here to correctly parse a function @@ -1240,7 +1240,7 @@ nextToken(); while (!eof()) { if (FormatTok->is(tok::colon)) { - FormatTok->setType(TT_ModulePartitionColon); + FormatTok->setFinalizedType(TT_ModulePartitionColon); } // Handle import <foo/bar.h> as we would an include statement. else if (FormatTok->is(tok::less)) { @@ -1250,7 +1250,7 @@ // literals. if (FormatTok->isNot(tok::comment) && !FormatTok->TokenText.startswith("//")) - FormatTok->setType(TT_ImplicitStringLiteral); + FormatTok->setFinalizedType(TT_ImplicitStringLiteral); nextToken(); } } @@ -1325,11 +1325,11 @@ case tok::kw_asm: nextToken(); if (FormatTok->is(tok::l_brace)) { - FormatTok->setType(TT_InlineASMBrace); + FormatTok->setFinalizedType(TT_InlineASMBrace); nextToken(); while (FormatTok && FormatTok->isNot(tok::eof)) { if (FormatTok->is(tok::r_brace)) { - FormatTok->setType(TT_InlineASMBrace); + FormatTok->setFinalizedType(TT_InlineASMBrace); nextToken(); addUnwrappedLine(); break; @@ -1651,7 +1651,7 @@ break; case tok::l_brace: if (NextLBracesType != TT_Unknown) - FormatTok->setType(NextLBracesType); + FormatTok->setFinalizedType(NextLBracesType); if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { // A block outside of parentheses must be the last part of a // structural element. @@ -1668,7 +1668,7 @@ addUnwrappedLine(); } if (!Line->InPPDirective) - FormatTok->setType(TT_FunctionLBrace); + FormatTok->setFinalizedType(TT_FunctionLBrace); parseBlock(); addUnwrappedLine(); return; @@ -1773,7 +1773,7 @@ if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { - PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro); + PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); addUnwrappedLine(); return; } @@ -1997,7 +1997,7 @@ // This might or might not actually be a lambda arrow (this could be an // ObjC method invocation followed by a dereferencing arrow). We might // reset this back to TT_Unknown in TokenAnnotator. - FormatTok->setType(TT_LambdaArrow); + FormatTok->setFinalizedType(TT_LambdaArrow); SeenArrow = true; nextToken(); break; @@ -2005,8 +2005,8 @@ return true; } } - FormatTok->setType(TT_LambdaLBrace); - LSquare.setType(TT_LambdaLSquare); + FormatTok->setFinalizedType(TT_LambdaLBrace); + LSquare.setFinalizedType(TT_LambdaLSquare); parseChildBlock(); return true; } @@ -2038,7 +2038,7 @@ // Consume * (generator function). Treat it like C++'s overloaded operators. if (FormatTok->is(tok::star)) { - FormatTok->setType(TT_OverloadedOperator); + FormatTok->setFinalizedType(TT_OverloadedOperator); nextToken(); } @@ -2246,7 +2246,7 @@ } case tok::ampamp: if (AmpAmpTokenType != TT_Unknown) - FormatTok->setType(AmpAmpTokenType); + FormatTok->setFinalizedType(AmpAmpTokenType); LLVM_FALLTHROUGH; default: nextToken(); @@ -3004,7 +3004,7 @@ !RequiresToken->Previous || RequiresToken->Previous->is(TT_RequiresExpressionLBrace); - RequiresToken->setType(InRequiresExpression + RequiresToken->setFinalizedType(InRequiresExpression ? TT_RequiresClauseInARequiresExpression : TT_RequiresClause); @@ -3026,15 +3026,15 @@ assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); assert(RequiresToken->getType() == TT_Unknown); - RequiresToken->setType(TT_RequiresExpression); + RequiresToken->setFinalizedType(TT_RequiresExpression); if (FormatTok->is(tok::l_paren)) { - FormatTok->setType(TT_RequiresExpressionLParen); + FormatTok->setFinalizedType(TT_RequiresExpressionLParen); parseParens(); } if (FormatTok->is(tok::l_brace)) { - FormatTok->setType(TT_RequiresExpressionLBrace); + FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); parseChildBlock(/*CanContainBracedList=*/false, /*NextLBracesType=*/TT_CompoundRequirementLBrace); } @@ -3110,7 +3110,7 @@ case tok::ampamp: case tok::pipepipe: - FormatTok->setType(TT_BinaryOperator); + FormatTok->setFinalizedType(TT_BinaryOperator); nextToken(); LambdaNextTimeAllowed = true; break; @@ -3218,7 +3218,7 @@ // Just a declaration or something is wrong. if (FormatTok->isNot(tok::l_brace)) return true; - FormatTok->setType(TT_EnumLBrace); + FormatTok->setFinalizedType(TT_EnumLBrace); FormatTok->setBlockKind(BK_Block); if (Style.Language == FormatStyle::LK_Java) { @@ -3470,7 +3470,7 @@ } }; if (FormatTok->is(tok::l_brace)) { - FormatTok->setType(GetBraceType(InitialToken)); + FormatTok->setFinalizedType(GetBraceType(InitialToken)); if (ParseAsExpr) { parseChildBlock(); } else { Index: clang/lib/Format/TokenAnnotator.cpp =================================================================== --- clang/lib/Format/TokenAnnotator.cpp +++ clang/lib/Format/TokenAnnotator.cpp @@ -1419,7 +1419,8 @@ // Reset token type in case we have already looked at it and then // recovered from an error (e.g. failure to find the matching >). - if (!CurrentToken->isOneOf( + if (!CurrentToken->typeIsFinalized() && + !CurrentToken->isOneOf( TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro, TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow, @@ -1430,8 +1431,7 @@ TT_RecordLBrace, TT_StructLBrace, TT_UnionLBrace, TT_RequiresClause, TT_RequiresClauseInARequiresExpression, TT_RequiresExpression, TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace, - TT_BinaryOperator, TT_CompoundRequirementLBrace, - TT_BracedListLBrace)) + TT_CompoundRequirementLBrace, TT_BracedListLBrace)) CurrentToken->setType(TT_Unknown); CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; Index: clang/lib/Format/FormatToken.h =================================================================== --- clang/lib/Format/FormatToken.h +++ clang/lib/Format/FormatToken.h @@ -258,7 +258,7 @@ PartOfMultiVariableDeclStmt(false), ContinuesLineCommentSection(false), Finalized(false), ClosesRequiresClause(false), BlockKind(BK_Unknown), Decision(FD_Unformatted), PackingKind(PPK_Inconclusive), - Type(TT_Unknown) {} + TypeIsFinalized(false), Type(TT_Unknown) {} /// The \c Token. Token Tok; @@ -367,6 +367,7 @@ } private: + unsigned TypeIsFinalized : 1; TokenType Type; public: @@ -374,6 +375,11 @@ /// binary operator. TokenType getType() const { return Type; } void setType(TokenType T) { Type = T; } + void setFinalizedType(TokenType T) { + Type = T; + TypeIsFinalized = true; + } + bool typeIsFinalized() const { return TypeIsFinalized; } /// The number of newlines immediately before the \c Token. ///
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits