https://github.com/sdkrystian created https://github.com/llvm/llvm-project/pull/96364
Consider the following: ```cpp template<typename T> struct A { }; template<typename T> int A<T>::B::* f(); // error: no member named 'B' in 'A<T>' ``` Although this is clearly valid, clang rejects it because the _nested-name-specifier_ `A<T>::` is parsed as-if it was declarative, meaning, we parse it as-if it was the _nested-name-specifier_ in a redeclaration/specialization. However, we don't (and can't) know whether the _nested-name-specifier_ is declarative until we see the '`*`' token, but at that point we have already complained that `A` has no member named `B`! This patch addresses this bug by adding support for _fully_ unannotated _and_ unbounded tentative parsing, which allows for us to parse past tokens without having to cache tokens up to a point we can guarantee aren't part of the construct we are disambiguating. I don't know where the approach taken here is ideal -- alternatives are welcome. However, the performance impact (as measured by [llvm-compile-time-tracker](https://llvm-compile-time-tracker.com/?config=Overview&stat=instructions%3Au&remote=sdkrystian)) is quite minimal (0.09%, which I plan to further improve). >From 2b0ac4e04f3eb9f8ececa874001c96a5ff8ee235 Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski <sdkryst...@gmail.com> Date: Wed, 12 Jun 2024 14:14:26 -0400 Subject: [PATCH] [Clang][Parse] Fix ambiguity with nested-name-specifiers that may be declarative --- clang/include/clang/Lex/Preprocessor.h | 14 +++- clang/include/clang/Parse/Parser.h | 13 ++- clang/lib/Lex/PPCaching.cpp | 39 ++++++++- clang/lib/Parse/ParseCXXInlineMethods.cpp | 41 +-------- clang/lib/Parse/ParseDecl.cpp | 83 ++++++++++++------- clang/lib/Parse/ParseExprCXX.cpp | 19 ++--- .../CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp | 64 ++++++++++++++ clang/test/CXX/temp/temp.res/p3.cpp | 10 +-- 8 files changed, 182 insertions(+), 101 deletions(-) create mode 100644 clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 9d8a1aae23df3..838857b6b8833 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1150,6 +1150,9 @@ class Preprocessor { /// invoked (at which point the last position is popped). std::vector<CachedTokensTy::size_type> BacktrackPositions; + std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>> + UnannotatedBacktrackPositions; + /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running. /// This is used to guard against calling this function recursively. /// @@ -1712,7 +1715,7 @@ class Preprocessor { /// at some point after EnableBacktrackAtThisPos. If you don't, caching of /// tokens will continue indefinitely. /// - void EnableBacktrackAtThisPos(); + void EnableBacktrackAtThisPos(bool Unannotated = false); /// Disable the last EnableBacktrackAtThisPos call. void CommitBacktrackedTokens(); @@ -1723,7 +1726,11 @@ class Preprocessor { /// True if EnableBacktrackAtThisPos() was called and /// caching of tokens is on. + bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } + bool isUnannotatedBacktrackEnabled() const { + return !UnannotatedBacktrackPositions.empty(); + } /// Lex the next token for this preprocessor. void Lex(Token &Result); @@ -1827,8 +1834,9 @@ class Preprocessor { void RevertCachedTokens(unsigned N) { assert(isBacktrackEnabled() && "Should only be called when tokens are cached for backtracking"); - assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) - && "Should revert tokens up to the last backtrack position, not more"); + assert(signed(CachedLexPos) - signed(N) >= + signed(BacktrackPositions.back() >> 1) && + "Should revert tokens up to the last backtrack position, not more"); assert(signed(CachedLexPos) - signed(N) >= 0 && "Corrupted backtrack positions ?"); CachedLexPos -= N; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index d054b8cf0d240..fc0bae6bdec2b 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -1033,7 +1033,7 @@ class Parser : public CodeCompletionHandler { bool isActive; public: - explicit TentativeParsingAction(Parser &p) + explicit TentativeParsingAction(Parser &p, bool Unannotated = false) : P(p), PrevPreferredType(P.PreferredType) { PrevTok = P.Tok; PrevTentativelyDeclaredIdentifierCount = @@ -1041,7 +1041,7 @@ class Parser : public CodeCompletionHandler { PrevParenCount = P.ParenCount; PrevBracketCount = P.BracketCount; PrevBraceCount = P.BraceCount; - P.PP.EnableBacktrackAtThisPos(); + P.PP.EnableBacktrackAtThisPos(Unannotated); isActive = true; } void Commit() { @@ -1072,13 +1072,11 @@ class Parser : public CodeCompletionHandler { class RevertingTentativeParsingAction : private Parser::TentativeParsingAction { public: - RevertingTentativeParsingAction(Parser &P) - : Parser::TentativeParsingAction(P) {} + using TentativeParsingAction::TentativeParsingAction; + ~RevertingTentativeParsingAction() { Revert(); } }; - class UnannotatedTentativeParsingAction; - /// ObjCDeclContextSwitch - An object used to switch context from /// an objective-c decl context to its enclosing decl context and /// back. @@ -1979,7 +1977,8 @@ class Parser : public CodeCompletionHandler { CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHasErrors, bool EnteringContext, bool *MayBePseudoDestructor = nullptr, bool IsTypename = false, const IdentifierInfo **LastII = nullptr, - bool OnlyNamespace = false, bool InUsingDeclaration = false); + bool OnlyNamespace = false, bool InUsingDeclaration = false, + bool Disambiguation = false); //===--------------------------------------------------------------------===// // C++11 5.1.2: Lambda expressions diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp index f38ff62ebf437..bc52bfb237e5c 100644 --- a/clang/lib/Lex/PPCaching.cpp +++ b/clang/lib/Lex/PPCaching.cpp @@ -22,9 +22,12 @@ using namespace clang; // Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can // be called multiple times and CommitBacktrackedTokens/Backtrack calls will // be combined with the EnableBacktrackAtThisPos calls in reverse order. -void Preprocessor::EnableBacktrackAtThisPos() { +void Preprocessor::EnableBacktrackAtThisPos(bool Unannotated) { assert(LexLevel == 0 && "cannot use lookahead while lexing"); - BacktrackPositions.push_back(CachedLexPos); + BacktrackPositions.push_back((CachedLexPos << 1) | Unannotated); + if (Unannotated) + UnannotatedBacktrackPositions.emplace_back(CachedTokens, + CachedTokens.size()); EnterCachingLexMode(); } @@ -32,7 +35,18 @@ void Preprocessor::EnableBacktrackAtThisPos() { void Preprocessor::CommitBacktrackedTokens() { assert(!BacktrackPositions.empty() && "EnableBacktrackAtThisPos was not called!"); + auto BacktrackPos = BacktrackPositions.back(); BacktrackPositions.pop_back(); + if (BacktrackPos & 1) { + assert(!UnannotatedBacktrackPositions.empty() && + "missing unannotated tokens?"); + auto [UnannotatedTokens, NumCachedToks] = + std::move(UnannotatedBacktrackPositions.back()); + if (!UnannotatedBacktrackPositions.empty()) + UnannotatedBacktrackPositions.back().first.append( + UnannotatedTokens.begin() + NumCachedToks, UnannotatedTokens.end()); + UnannotatedBacktrackPositions.pop_back(); + } } // Make Preprocessor re-lex the tokens that were lexed since @@ -40,8 +54,20 @@ void Preprocessor::CommitBacktrackedTokens() { void Preprocessor::Backtrack() { assert(!BacktrackPositions.empty() && "EnableBacktrackAtThisPos was not called!"); - CachedLexPos = BacktrackPositions.back(); + auto BacktrackPos = BacktrackPositions.back(); BacktrackPositions.pop_back(); + CachedLexPos = BacktrackPos >> 1; + if (BacktrackPos & 1) { + assert(!UnannotatedBacktrackPositions.empty() && + "missing unannotated tokens?"); + auto [UnannotatedTokens, NumCachedToks] = + std::move(UnannotatedBacktrackPositions.back()); + UnannotatedBacktrackPositions.pop_back(); + if (!UnannotatedBacktrackPositions.empty()) + UnannotatedBacktrackPositions.back().first.append( + UnannotatedTokens.begin() + NumCachedToks, UnannotatedTokens.end()); + CachedTokens = std::move(UnannotatedTokens); + } recomputeCurLexerKind(); } @@ -67,6 +93,8 @@ void Preprocessor::CachingLex(Token &Result) { EnterCachingLexModeUnchecked(); CachedTokens.push_back(Result); ++CachedLexPos; + if (isUnannotatedBacktrackEnabled()) + UnannotatedBacktrackPositions.back().first.push_back(Result); return; } @@ -108,6 +136,8 @@ const Token &Preprocessor::PeekAhead(unsigned N) { for (size_t C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) { CachedTokens.push_back(Token()); Lex(CachedTokens.back()); + if (isUnannotatedBacktrackEnabled()) + UnannotatedBacktrackPositions.back().first.push_back(CachedTokens.back()); } EnterCachingLexMode(); return CachedTokens.back(); @@ -124,7 +154,8 @@ void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) { for (CachedTokensTy::size_type i = CachedLexPos; i != 0; --i) { CachedTokensTy::iterator AnnotBegin = CachedTokens.begin() + i-1; if (AnnotBegin->getLocation() == Tok.getLocation()) { - assert((BacktrackPositions.empty() || BacktrackPositions.back() <= i) && + assert((BacktrackPositions.empty() || + (BacktrackPositions.back() >> 1) <= i) && "The backtrack pos points inside the annotated tokens!"); // Replace the cached tokens with the single annotation token. if (i < CachedLexPos) diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp index 943ce0fdde3a3..86a89650e2583 100644 --- a/clang/lib/Parse/ParseCXXInlineMethods.cpp +++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp @@ -1188,41 +1188,6 @@ bool Parser::ConsumeAndStoreConditional(CachedTokens &Toks) { return true; } -/// A tentative parsing action that can also revert token annotations. -class Parser::UnannotatedTentativeParsingAction : public TentativeParsingAction { -public: - explicit UnannotatedTentativeParsingAction(Parser &Self, - tok::TokenKind EndKind) - : TentativeParsingAction(Self), Self(Self), EndKind(EndKind) { - // Stash away the old token stream, so we can restore it once the - // tentative parse is complete. - TentativeParsingAction Inner(Self); - Self.ConsumeAndStoreUntil(EndKind, Toks, true, /*ConsumeFinalToken*/false); - Inner.Revert(); - } - - void RevertAnnotations() { - Revert(); - - // Put back the original tokens. - Self.SkipUntil(EndKind, StopAtSemi | StopBeforeMatch); - if (Toks.size()) { - auto Buffer = std::make_unique<Token[]>(Toks.size()); - std::copy(Toks.begin() + 1, Toks.end(), Buffer.get()); - Buffer[Toks.size() - 1] = Self.Tok; - Self.PP.EnterTokenStream(std::move(Buffer), Toks.size(), true, - /*IsReinject*/ true); - - Self.Tok = Toks.front(); - } - } - -private: - Parser &Self; - CachedTokens Toks; - tok::TokenKind EndKind; -}; - /// ConsumeAndStoreInitializer - Consume and store the token at the passed token /// container until the end of the current initializer expression (either a /// default argument or an in-class initializer for a non-static data member). @@ -1260,9 +1225,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks, // syntactically-valid init-declarator-list, then this comma ends // the default initializer. { - UnannotatedTentativeParsingAction PA(*this, - CIK == CIK_DefaultInitializer - ? tok::semi : tok::r_paren); + TentativeParsingAction TPA(*this, /*Unannotated=*/true); Sema::TentativeAnalysisScope Scope(Actions); TPResult Result = TPResult::Error; @@ -1290,7 +1253,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks, // Put the token stream back and undo any annotations we performed // after the comma. They may reflect a different parse than the one // we will actually perform at the end of the class. - PA.RevertAnnotations(); + TPA.Revert(); // If what follows could be a declaration, it is a declaration. if (Result != TPResult::False && Result != TPResult::Error) diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index c528917437332..cad40a120a7b4 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -6615,48 +6615,67 @@ void Parser::ParseDeclaratorInternal(Declarator &D, (Tok.is(tok::identifier) && (NextToken().is(tok::coloncolon) || NextToken().is(tok::less))) || Tok.is(tok::annot_cxxscope))) { + TentativeParsingAction TPA(*this, /*Unannotated=*/true); bool EnteringContext = D.getContext() == DeclaratorContext::File || D.getContext() == DeclaratorContext::Member; + CXXScopeSpec SS; SS.setTemplateParamLists(D.getTemplateParameterLists()); - ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr, - /*ObjectHasErrors=*/false, EnteringContext); - if (SS.isNotEmpty()) { - if (Tok.isNot(tok::star)) { - // The scope spec really belongs to the direct-declarator. - if (D.mayHaveIdentifier()) - D.getCXXScopeSpec() = SS; - else - AnnotateScopeToken(SS, true); + if (ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr, + /*ObjectHasErrors=*/false, + /*EnteringContext=*/false, + /*MayBePseudoDestructor=*/nullptr, + /*IsTypename=*/false, /*LastII=*/nullptr, + /*OnlyNamespace=*/false, + /*InUsingDeclaration=*/false, + /*Disambiguation=*/EnteringContext) || + + SS.isEmpty() || SS.isInvalid() || !EnteringContext || + Tok.is(tok::star)) { + TPA.Commit(); + if (SS.isNotEmpty() && Tok.is(tok::star)) { + if (SS.isValid()) { + checkCompoundToken(SS.getEndLoc(), tok::coloncolon, + CompoundToken::MemberPtr); + } - if (DirectDeclParser) - (this->*DirectDeclParser)(D); + SourceLocation StarLoc = ConsumeToken(); + D.SetRangeEnd(StarLoc); + DeclSpec DS(AttrFactory); + ParseTypeQualifierListOpt(DS); + D.ExtendWithDeclSpec(DS); + + // Recurse to parse whatever is left. + Actions.runWithSufficientStackSpace(D.getBeginLoc(), [&] { + ParseDeclaratorInternal(D, DirectDeclParser); + }); + + // Sema will have to catch (syntactically invalid) pointers into global + // scope. It has to catch pointers into namespace scope anyway. + D.AddTypeInfo(DeclaratorChunk::getMemberPointer( + SS, DS.getTypeQualifiers(), StarLoc, DS.getEndLoc()), + std::move(DS.getAttributes()), + /*EndLoc=*/SourceLocation()); return; } + } else { + TPA.Revert(); + SS.clear(); + ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr, + /*ObjectHasErrors=*/false, + /*EnteringContext=*/true); + } - if (SS.isValid()) { - checkCompoundToken(SS.getEndLoc(), tok::coloncolon, - CompoundToken::MemberPtr); - } + if (SS.isNotEmpty()) { + // The scope spec really belongs to the direct-declarator. + if (D.mayHaveIdentifier()) + D.getCXXScopeSpec() = SS; + else + AnnotateScopeToken(SS, true); - SourceLocation StarLoc = ConsumeToken(); - D.SetRangeEnd(StarLoc); - DeclSpec DS(AttrFactory); - ParseTypeQualifierListOpt(DS); - D.ExtendWithDeclSpec(DS); - - // Recurse to parse whatever is left. - Actions.runWithSufficientStackSpace(D.getBeginLoc(), [&] { - ParseDeclaratorInternal(D, DirectDeclParser); - }); - - // Sema will have to catch (syntactically invalid) pointers into global - // scope. It has to catch pointers into namespace scope anyway. - D.AddTypeInfo(DeclaratorChunk::getMemberPointer( - SS, DS.getTypeQualifiers(), StarLoc, DS.getEndLoc()), - std::move(DS.getAttributes()), - /* Don't replace range end. */ SourceLocation()); + if (DirectDeclParser) + (this->*DirectDeclParser)(D); return; } } diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 1d364f77a8146..c0eae73927cdd 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -159,8 +159,8 @@ void Parser::CheckForTemplateAndDigraph(Token &Next, ParsedType ObjectType, bool Parser::ParseOptionalCXXScopeSpecifier( CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHadErrors, bool EnteringContext, bool *MayBePseudoDestructor, bool IsTypename, - const IdentifierInfo **LastII, bool OnlyNamespace, - bool InUsingDeclaration) { + const IdentifierInfo **LastII, bool OnlyNamespace, bool InUsingDeclaration, + bool Disambiguation) { assert(getLangOpts().CPlusPlus && "Call sites of this function should be guarded by checking for C++"); @@ -528,13 +528,11 @@ bool Parser::ParseOptionalCXXScopeSpecifier( UnqualifiedId TemplateName; TemplateName.setIdentifier(&II, Tok.getLocation()); bool MemberOfUnknownSpecialization; - if (TemplateNameKind TNK = Actions.isTemplateName(getCurScope(), SS, - /*hasTemplateKeyword=*/false, - TemplateName, - ObjectType, - EnteringContext, - Template, - MemberOfUnknownSpecialization)) { + if (TemplateNameKind TNK = Actions.isTemplateName( + getCurScope(), SS, + /*hasTemplateKeyword=*/false, TemplateName, ObjectType, + EnteringContext, Template, MemberOfUnknownSpecialization, + Disambiguation)) { // If lookup didn't find anything, we treat the name as a template-name // anyway. C++20 requires this, and in prior language modes it improves // error recovery. But before we commit to this, check that we actually @@ -557,7 +555,8 @@ bool Parser::ParseOptionalCXXScopeSpecifier( continue; } - if (MemberOfUnknownSpecialization && (ObjectType || SS.isSet()) && + if (MemberOfUnknownSpecialization && !Disambiguation && + (ObjectType || SS.isSet()) && (IsTypename || isTemplateArgumentList(1) == TPResult::True)) { // If we had errors before, ObjectType can be dependent even without any // templates. Do not report missing template keyword in that case. diff --git a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp new file mode 100644 index 0000000000000..a06b107755596 --- /dev/null +++ b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +template<typename T> +struct A0 { + struct B0; + + template<typename U> + struct C0 { + struct D0; + + template<typename V> + struct E0; + }; +}; + +template<typename T> +int A0<T>::B0::* f0(); + +template<typename T> +int A0<T>::B1::* f1(); + +template<typename T> +int A0<T>::C0<int>::* f2(); // expected-error {{expected unqualified-id}} + +template<typename T> +int A0<T>::C1<int>::* f3(); // expected-error {{no member named 'C1' in 'A0<T>'}} + // expected-error@-1 {{expected ';' after top level declarator}} + +template<typename T> +int A0<T>::template C2<int>::* f4(); + +template<typename T> +int A0<T>::template C0<int>::D0::* f5(); + +template<typename T> +int A0<T>::template C2<int>::D1::* f6(); + +template<typename T> +int A0<T>::template C0<int>::E0<int>::* f7(); // expected-error {{use 'template' keyword to treat 'E0' as a dependent template name}} + // expected-error@-1 {{expected unqualified-id}} + +template<typename T> +int A0<T>::template C2<int>::E1<int>::* f8(); // expected-error {{no member named 'C2' in 'A0<T>'}} + +template<typename T> +int A0<T>::template C0<int>::template E0<int>::* f9(); + +template<typename T> +int A0<T>::template C2<int>::template E1<int>::* f10(); + +namespace TypoCorrection { + template<typename T> + struct A { + template<typename U> + struct Typo; // expected-note {{'Typo' declared here}} + }; + + template<typename T> + int A<T>::template typo<int>::* f(); + + template<typename T> + int A<T>::typo<int>::* g(); // expected-error {{no template named 'typo' in 'A<T>'; did you mean 'Typo'?}} + // expected-error@-1 {{expected unqualified-id}} +} diff --git a/clang/test/CXX/temp/temp.res/p3.cpp b/clang/test/CXX/temp/temp.res/p3.cpp index 37ab93559e369..1eda967523a59 100644 --- a/clang/test/CXX/temp/temp.res/p3.cpp +++ b/clang/test/CXX/temp/temp.res/p3.cpp @@ -2,7 +2,7 @@ template<typename T> struct A { template<typename U> struct B; - template<typename U> using C = U; // expected-note {{here}} + template<typename U> using C = U; }; struct X { @@ -20,12 +20,10 @@ template<typename T> A<T>::C<T> f2(); // expected-warning {{missing 'typename'}} template<typename T> A<T>::C<X>::X(T) {} template<typename T> A<T>::C<X>::X::Y::Y(T) {} -// FIXME: This is ill-formed -template<typename T> int A<T>::B<T>::*f3() {} -template<typename T> int A<T>::C<X>::*f4() {} +template<typename T> int A<T>::B<T>::*f3() {} // expected-error {{expected unqualified-id}} +template<typename T> int A<T>::C<X>::*f4() {} // expected-error {{expected unqualified-id}} -// FIXME: This is valid -template<typename T> int A<T>::template C<int>::*f5() {} // expected-error {{has no members}} +template<typename T> int A<T>::template C<int>::*f5() {} template<typename T> template<typename U> struct A<T>::B { friend A<T>::C<T> f6(); // ok, same as 'friend T f6();' _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits