https://github.com/yronglin updated https://github.com/llvm/llvm-project/pull/102135
>From efa62c3c8572f9c94cf0e2e9ac5a1fb11746ddc7 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Tue, 23 Jul 2024 19:46:49 +0800 Subject: [PATCH 1/5] =?UTF-8?q?[Clang]=20Implement=20P3034R1=20Module=20De?= =?UTF-8?q?clarations=20Shouldn=E2=80=99t=20be=20Macros?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/docs/ReleaseNotes.rst | 2 + .../include/clang/Basic/DiagnosticLexKinds.td | 5 + clang/include/clang/Basic/IdentifierTable.h | 24 +- clang/include/clang/Basic/TokenKinds.def | 3 + clang/include/clang/Lex/Preprocessor.h | 83 +++- clang/include/clang/Lex/Token.h | 3 + clang/include/clang/Parse/Parser.h | 2 +- clang/lib/Basic/IdentifierTable.cpp | 3 +- .../lib/Frontend/PrintPreprocessedOutput.cpp | 12 +- clang/lib/Lex/PPLexerChange.cpp | 9 +- clang/lib/Lex/Preprocessor.cpp | 444 ++++++++++++------ clang/lib/Lex/TokenConcatenation.cpp | 10 + clang/lib/Parse/ParseDecl.cpp | 8 +- clang/lib/Parse/Parser.cpp | 93 ++-- clang/test/CXX/cpp/cpp.module/p2.cppm | 88 ++++ .../basic/basic.link/module-declaration.cpp | 61 +-- .../dcl.module/dcl.module.import/p1.cppm | 39 +- clang/test/SemaCXX/modules.cppm | 89 ++-- clang/www/cxx_status.html | 2 +- 19 files changed, 717 insertions(+), 263 deletions(-) create mode 100644 clang/test/CXX/cpp/cpp.module/p2.cppm diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4c7bd099420abf..a5953a3641fff7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -89,6 +89,8 @@ C++2c Feature Support - Add ``__builtin_is_virtual_base_of`` intrinsic, which supports `P2985R0 A type trait for detecting virtual base classes <https://wg21.link/p2985r0>`_ +- Implemented `P3034R1 Module Declarations Shouldn’t be Macros <https://wg21.link/P3034R1>`_. + Resolutions to C++ Defect Reports ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 12d7b8c0205ee9..08ece01009387d 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -952,6 +952,11 @@ def warn_module_conflict : Warning< InGroup<ModuleConflict>; // C++20 modules +def err_module_decl_cannot_be_macros : Error< + "the module name in a module%select{| partition}0 declaration cannot contain " + "an object-like macro %1">; +def err_unxepected_paren_in_module_decl : Error< + "unexpected '(' after the module name in a module%select{| partition}0 declaration">; def err_header_import_semi_in_macro : Error< "semicolon terminating header import declaration cannot be produced " "by a macro">; diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index ae9ebd9f59154e..f40f74d0355ade 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsModulesImport : 1; + // True if this is the 'module' contextual keyword. + LLVM_PREFERRED_TYPE(bool) + unsigned IsModulesDecl : 1; + // True if this is a mangled OpenMP variant name. LLVM_PREFERRED_TYPE(bool) unsigned IsMangledOpenMPVariantName : 1; @@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsFinal : 1; - // 22 bits left in a 64-bit word. + // 21 bits left in a 64-bit word. // Managed by the language front-end. void *FETokenInfo = nullptr; @@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), IsModulesImport(false), - IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false), - IsRestrictExpansion(false), IsFinal(false) {} + IsModulesDecl(false), IsMangledOpenMPVariantName(false), + IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {} public: IdentifierInfo(const IdentifierInfo &) = delete; @@ -520,6 +524,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { RecomputeNeedsHandleIdentifier(); } + /// Determine whether this is the contextual keyword \c module. + bool isModulesDeclaration() const { return IsModulesDecl; } + + /// Set whether this identifier is the contextual keyword \c module. + void setModulesDeclaration(bool I) { + IsModulesDecl = I; + if (I) + NeedsHandleIdentifier = true; + else + RecomputeNeedsHandleIdentifier(); + } + /// Determine whether this is the mangled name of an OpenMP variant. bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } @@ -740,6 +756,8 @@ class IdentifierTable { // If this is the 'import' contextual keyword, mark it as such. if (Name == "import") II->setModulesImport(true); + else if (Name == "module") + II->setModulesDeclaration(true); return *II; } diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 7e638dc1ddcdba..bea46f617e690d 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -1006,6 +1006,9 @@ ANNOTATION(module_include) ANNOTATION(module_begin) ANNOTATION(module_end) +// Annotations for C++, Clang and Objective-C named modules. +ANNOTATION(module_name) + // Annotation for a header_name token that has been looked up and transformed // into the name of a header unit. ANNOTATION(header_unit) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 623f868ca1e648..c35b768ffea1bc 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -615,10 +615,6 @@ class Preprocessor { ModuleDeclSeq ModuleDeclState; - /// Whether the module import expects an identifier next. Otherwise, - /// it expects a '.' or ';'. - bool ModuleImportExpectsIdentifier = false; - /// The identifier and source location of the currently-active /// \#pragma clang arc_cf_code_audited begin. std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo; @@ -1763,11 +1759,14 @@ class Preprocessor { /// Lex a token, forming a header-name token if possible. bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); + /// Lex a module name or a partition name. + bool LexModuleName(Token &Result, bool IsImport); + /// Lex the parameters for an #embed directive, returns nullopt on error. std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current, bool ForHasEmbed); - bool LexAfterModuleImport(Token &Result); + bool LexAfterModuleDecl(Token &Result); void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); void makeModuleVisible(Module *M, SourceLocation Loc); @@ -3059,6 +3058,9 @@ class Preprocessor { static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) { return P.LexAfterModuleImport(Result); } + static bool CLK_LexAfterModuleDecl(Preprocessor &P, Token &Result) { + return P.LexAfterModuleDecl(Result); + } }; /// Abstract base class that describes a handler that will receive @@ -3091,6 +3093,77 @@ struct EmbedAnnotationData { /// Registry of pragma handlers added by plugins using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; +/// Represents module or partition name token sequance. +/// +/// module-name: +/// module-name-qualifier[opt] identifier +/// +/// partition-name: [C++20] +/// : module-name-qualifier[opt] identifier +/// +/// module-name-qualifier +/// module-name-qualifier[opt] identifier . +/// +/// This class can only be created by the preprocessor and guarantees that the +/// two source array being contiguous in memory and only contains 3 kind of +/// tokens (identifier, '.' and ':'). And only available when the preprocessor +/// returns annot_module_name token. +/// +/// For exmaple: +/// +/// export module m.n:c.d +/// +/// The module name array has 3 tokens ['m', '.', 'n']. +/// The partition name array has 4 tokens [':', 'c', '.', 'd']. +/// +/// When import a partition in a named module fragment (Eg. import :part1;), +/// the module name array will be empty, and the partition name array has 2 +/// tokens. +/// +/// When we meet a private-module-fragment (Eg. module :private;), preprocessor +/// will not return a annot_module_name token, but will return 2 separate tokens +/// [':', 'kw_private']. + +class ModuleNameInfo { + friend class Preprocessor; + ArrayRef<Token> ModuleName; + ArrayRef<Token> PartitionName; + + ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex); + +public: + /// Return the contiguous token array. + ArrayRef<Token> getTokens() const { + if (ModuleName.empty()) + return PartitionName; + if (PartitionName.empty()) + return ModuleName; + return ArrayRef(ModuleName.begin(), PartitionName.end()); + } + bool hasModuleName() const { return !ModuleName.empty(); } + bool hasPartitionName() const { return !PartitionName.empty(); } + ArrayRef<Token> getModuleName() const { return ModuleName; } + ArrayRef<Token> getPartitionName() const { return PartitionName; } + Token getColonToken() const { + assert(hasPartitionName() && "Do not have a partition name"); + return getPartitionName().front(); + } + + /// Under the standard C++ Modules, the dot is just part of the module name, + /// and not a real hierarchy separator. Flatten such module names now. + std::string getFlatName() const; + + /// Build a module id path from the contiguous token array, both include + /// module name and partition name. + void getModuleIdPath( + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const; + + /// Build a module id path from \param ModuleName. + static void getModuleIdPath( + ArrayRef<Token> ModuleName, + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path); +}; + } // namespace clang #endif // LLVM_CLANG_LEX_PREPROCESSOR_H diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index 4f29fb7d114159..2be3ad39529f05 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -235,6 +235,9 @@ class Token { assert(isAnnotation() && "Used AnnotVal on non-annotation token"); return PtrData; } + template <class T> T getAnnotationValueAs() const { + return static_cast<T>(getAnnotationValue()); + } void setAnnotationValue(void *val) { assert(isAnnotation() && "Used AnnotVal on non-annotation token"); PtrData = val; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index ba7d6866ebacd8..31c48f8805f4dc 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3878,7 +3878,7 @@ class Parser : public CodeCompletionHandler { } bool ParseModuleName( - SourceLocation UseLoc, + SourceLocation UseLoc, ArrayRef<Token> ModuleName, SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path, bool IsImport); diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 4f7ccaf4021d63..97d830214f8900 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -322,8 +322,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { if (LangOpts.IEEE128) AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this); - // Add the 'import' contextual keyword. + // Add the 'import' and 'module' contextual keyword. get("import").setModulesImport(true); + get("module").setModulesDeclaration(true); } /// Checks if the specified token kind represents a keyword in the diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 135dca0e6a1775..e21f2b945b86ad 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -758,9 +758,10 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, // These tokens are not expanded to anything and don't need whitespace before // them. if (Tok.is(tok::eof) || - (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && - !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && - !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed))) + (Tok.isAnnotation() && Tok.isNot(tok::annot_header_unit) && + Tok.isNot(tok::annot_module_begin) && Tok.isNot(tok::annot_module_end) && + Tok.isNot(tok::annot_module_name) && + Tok.isNot(tok::annot_repl_input_end) && Tok.isNot(tok::annot_embed))) return; // EmittedDirectiveOnThisLine takes priority over RequireSameLine. @@ -951,6 +952,11 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, PP.Lex(Tok); IsStartOfLine = true; continue; + } else if (Tok.is(tok::annot_module_name)) { + auto *Info = static_cast<ModuleNameInfo *>(Tok.getAnnotationValue()); + *Callbacks->OS << Info->getFlatName(); + PP.Lex(Tok); + continue; } else if (Tok.is(tok::annot_header_unit)) { // This is a header-name that has been (effectively) converted into a // module-name. diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index 8221db46e06acc..c3a903917e9ce1 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -122,7 +122,8 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, CurPPLexer = TheLexer; CurDirLookup = CurDir; CurLexerSubmodule = nullptr; - if (CurLexerCallback != CLK_LexAfterModuleImport) + if (CurLexerCallback != CLK_LexAfterModuleImport && + CurLexerCallback != CLK_LexAfterModuleDecl) CurLexerCallback = TheLexer->isDependencyDirectivesLexer() ? CLK_DependencyDirectivesLexer : CLK_Lexer; @@ -161,8 +162,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerCallback != CLK_LexAfterModuleImport) - CurLexerCallback = CLK_TokenLexer; + CurLexerCallback = CLK_TokenLexer; } /// EnterTokenStream - Add a "macro" context to the top of the include stack, @@ -216,7 +216,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerCallback != CLK_LexAfterModuleImport) + if (CurLexerCallback != CLK_LexAfterModuleImport && + CurLexerCallback != CLK_LexAfterModuleDecl) CurLexerCallback = CLK_TokenLexer; } diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index f0b4593e0cc22e..d0c4ab8fd5b669 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -860,9 +860,15 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { ModuleImportLoc = Identifier.getLocation(); NamedModuleImportPath.clear(); IsAtImport = true; - ModuleImportExpectsIdentifier = true; CurLexerCallback = CLK_LexAfterModuleImport; } + + if ((II.isModulesDeclaration() || Identifier.is(tok::kw_module)) && + !InMacroArgs && !DisableMacroExpansion && + (getLangOpts().CPlusPlusModules || getLangOpts().DebuggerSupport) && + CurLexerCallback != CLK_CachingLexer) { + CurLexerCallback = CLK_LexAfterModuleDecl; + } return true; } @@ -905,6 +911,7 @@ void Preprocessor::Lex(Token &Result) { // This token is injected to represent the translation of '#include "a.h"' // into "import a.h;". Mimic the notional ';'. case tok::annot_module_include: + case tok::annot_repl_input_end: case tok::semi: TrackGMFState.handleSemi(); StdCXXImportSeqState.handleSemi(); @@ -919,12 +926,30 @@ void Preprocessor::Lex(Token &Result) { StdCXXImportSeqState.handleExport(); ModuleDeclState.handleExport(); break; - case tok::colon: - ModuleDeclState.handleColon(); - break; - case tok::period: - ModuleDeclState.handlePeriod(); + case tok::annot_module_name: { + auto *Info = static_cast<ModuleNameInfo *>(Result.getAnnotationValue()); + for (const auto &Tok : Info->getTokens()) { + switch (Tok.getKind()) { + case tok::identifier: + ModuleDeclState.handleIdentifier(Tok.getIdentifierInfo()); + break; + case tok::period: + ModuleDeclState.handlePeriod(); + break; + case tok::colon: + ModuleDeclState.handleColon(); + break; + default: + llvm_unreachable("Unexpected token in module name"); + } + } + if (ModuleDeclState.isModuleCandidate()) + break; + TrackGMFState.handleMisc(); + StdCXXImportSeqState.handleMisc(); + ModuleDeclState.handleMisc(); break; + } case tok::identifier: // Check "import" and "module" when there is no open bracket. The two // identifiers are not meaningful with open brackets. @@ -936,17 +961,17 @@ void Preprocessor::Lex(Token &Result) { ModuleImportLoc = Result.getLocation(); NamedModuleImportPath.clear(); IsAtImport = false; - ModuleImportExpectsIdentifier = true; CurLexerCallback = CLK_LexAfterModuleImport; } break; - } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { + } + if (Result.getIdentifierInfo()->isModulesDeclaration()) { TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); ModuleDeclState.handleModule(); + CurLexerCallback = CLK_LexAfterModuleDecl; break; } } - ModuleDeclState.handleIdentifier(Result.getIdentifierInfo()); if (ModuleDeclState.isModuleCandidate()) break; [[fallthrough]]; @@ -1121,6 +1146,151 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { } } +ModuleNameInfo::ModuleNameInfo(ArrayRef<Token> AnnotToks, + std::optional<unsigned> ColonIndex) { + assert(!AnnotToks.empty() && "Named module token cannot be empty."); + if (!ColonIndex.has_value()) + ColonIndex = AnnotToks.size(); + ModuleName = ArrayRef(AnnotToks.begin(), AnnotToks.begin() + *ColonIndex); + PartitionName = ArrayRef(AnnotToks.begin() + *ColonIndex, AnnotToks.end()); + assert(ModuleName.end() == PartitionName.begin()); +} + +std::string ModuleNameInfo::getFlatName() const { + std::string FlatModuleName; + for (auto &Tok : getTokens()) { + switch (Tok.getKind()) { + case tok::identifier: + FlatModuleName += Tok.getIdentifierInfo()->getName(); + break; + case tok::period: + FlatModuleName += '.'; + break; + case tok::colon: + FlatModuleName += ':'; + break; + default: + llvm_unreachable("Unexpected token in module name"); + } + } + return FlatModuleName; +} + +void ModuleNameInfo::getModuleIdPath( + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const { + return getModuleIdPath(getTokens(), Path); +} + +void ModuleNameInfo::getModuleIdPath( + ArrayRef<Token> ModuleName, + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) { + for (const auto &Tok : ModuleName) { + if (Tok.is(tok::identifier)) + Path.push_back( + std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation())); + } +} + +/// Lex a module name or a partition name. +/// +/// module-name: +/// module-name-qualifier[opt] identifier +/// +/// partition-name: [C++20] +/// : module-name-qualifier[opt] identifier +/// +/// module-name-qualifier +/// module-name-qualifier[opt] identifier . +bool Preprocessor::LexModuleName(Token &Result, bool IsImport) { + bool ExpectsIdentifier = true, IsLexingPartition = false; + SmallVector<Token, 8> ModuleName; + std::optional<unsigned> ColonTokIndex; + auto LexNextToken = [&](Token &Tok) { + if (IsImport) + Lex(Tok); + else + LexUnexpandedToken(Tok); + }; + + while (true) { + LexNextToken(Result); + if (ExpectsIdentifier && Result.is(tok::identifier)) { + auto *MI = getMacroInfo(Result.getIdentifierInfo()); + if (getLangOpts().CPlusPlusModules && !IsImport && MI && + MI->isObjectLike()) { + Diag(Result, diag::err_module_decl_cannot_be_macros) + << Result.getLocation() << IsLexingPartition + << Result.getIdentifierInfo(); + } + ModuleName.push_back(Result); + ExpectsIdentifier = false; + continue; + } + + if (!ExpectsIdentifier && Result.is(tok::period)) { + ModuleName.push_back(Result); + ExpectsIdentifier = true; + continue; + } + + // Module partition only allowed in C++20 Modules. + if (getLangOpts().CPlusPlusModules && Result.is(tok::colon)) { + // Handle the form like: import :P; + // If the token after ':' is not an identifier, this is a invalid module + // name. + if (ModuleName.empty()) { + Token Tmp; + LexNextToken(Tmp); + EnterToken(Tmp, /*IsReiject=*/false); + // A private-module-fragment: + // export module :private; + if (!IsImport && Tmp.is(tok::kw_private)) + return true; + // import :N; + if (IsImport && Tmp.isNot(tok::identifier)) + return false; + } else if (!ExpectsIdentifier) { + ExpectsIdentifier = true; + } + IsLexingPartition = true; + ColonTokIndex = ModuleName.size(); + ModuleName.push_back(Result); + continue; + } + + // [cpp.module]/p2: where the pp-tokens (if any) shall not begin with a ( + // preprocessing token [...] + // + // We only emit diagnostic in the preprocessor, and in the parser we skip + // invalid tokens and recover from errors. + if (getLangOpts().CPlusPlusModules && !ExpectsIdentifier && + Result.is(tok::l_paren)) + Diag(Result, diag::err_unxepected_paren_in_module_decl) + << IsLexingPartition; + break; + } + + // Put the last token back to stream, it's not a valid part of module name. + // We lexed it unexpanded but it might be a valid macro expansion + Result.clearFlag(Token::DisableExpand); + auto ToksCopy = std::make_unique<Token[]>(1); + *ToksCopy.get() = Result; + EnterTokenStream(std::move(ToksCopy), 1, + /*DisableMacroExpansion=*/false, + /*IsReinject=*/false); + + if (ModuleName.empty()) + return false; + Result.startToken(); + Result.setKind(tok::annot_module_name); + Result.setLocation(ModuleName.front().getLocation()); + Result.setAnnotationEndLoc(ModuleName.back().getLocation()); + auto AnnotToks = ArrayRef(ModuleName).copy(getPreprocessorAllocator()); + ModuleNameInfo *Info = + new (getPreprocessorAllocator()) ModuleNameInfo(AnnotToks, ColonTokIndex); + Result.setAnnotationValue(static_cast<void *>(Info)); + return true; +} /// Lex a token following the 'import' contextual keyword. /// @@ -1145,6 +1315,17 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { // Figure out what kind of lexer we actually have. recomputeCurLexerKind(); + // Allocate a holding buffer for a sequence of tokens and introduce it into + // the token stream. + auto EnterTokens = [this](ArrayRef<Token> Toks) { + auto ToksCopy = std::make_unique<Token[]>(Toks.size()); + std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); + EnterTokenStream(std::move(ToksCopy), Toks.size(), + /*DisableMacroExpansion*/ true, /*IsReinject*/ false); + }; + + SmallVector<Token, 32> Suffix; + // Lex the next token. The header-name lexing rules are used at the start of // a pp-import. // @@ -1155,122 +1336,108 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { if (LexHeaderName(Result)) return true; - if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) { - std::string Name = ModuleDeclState.getPrimaryName().str(); - Name += ":"; - NamedModuleImportPath.push_back( - {getIdentifierInfo(Name), Result.getLocation()}); - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - } else { - Lex(Result); - } + // Check for a header-name. + if (Result.is(tok::header_name)) { + // Enter the header-name token into the token stream; a Lex action cannot + // both return a token and cache tokens (doing so would corrupt the token + // cache if the call to Lex comes from CachingLex / PeekAhead). + Suffix.push_back(Result); + + // Consume the pp-import-suffix and expand any macros in it now. We'll add + // it back into the token stream later. + CollectPpImportSuffix(Suffix); + if (Suffix.back().isNot(tok::semi)) { + // This is not a pp-import after all. + EnterTokens(Suffix); + return false; + } - // Allocate a holding buffer for a sequence of tokens and introduce it into - // the token stream. - auto EnterTokens = [this](ArrayRef<Token> Toks) { - auto ToksCopy = std::make_unique<Token[]>(Toks.size()); - std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); - EnterTokenStream(std::move(ToksCopy), Toks.size(), - /*DisableMacroExpansion*/ true, /*IsReinject*/ false); - }; + // C++2a [cpp.module]p1: + // The ';' preprocessing-token terminating a pp-import shall not have + // been produced by macro replacement. + SourceLocation SemiLoc = Suffix.back().getLocation(); + if (SemiLoc.isMacroID()) + Diag(SemiLoc, diag::err_header_import_semi_in_macro); + + // Reconstitute the import token. + Token ImportTok; + ImportTok.startToken(); + ImportTok.setKind(tok::kw_import); + ImportTok.setLocation(ModuleImportLoc); + ImportTok.setIdentifierInfo(getIdentifierInfo("import")); + ImportTok.setLength(6); + + auto Action = HandleHeaderIncludeOrImport( + /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); + switch (Action.Kind) { + case ImportAction::None: + break; - bool ImportingHeader = Result.is(tok::header_name); - // Check for a header-name. - SmallVector<Token, 32> Suffix; - if (ImportingHeader) { - // Enter the header-name token into the token stream; a Lex action cannot - // both return a token and cache tokens (doing so would corrupt the token - // cache if the call to Lex comes from CachingLex / PeekAhead). - Suffix.push_back(Result); + case ImportAction::ModuleBegin: + // Let the parser know we're textually entering the module. + Suffix.emplace_back(); + Suffix.back().startToken(); + Suffix.back().setKind(tok::annot_module_begin); + Suffix.back().setLocation(SemiLoc); + Suffix.back().setAnnotationEndLoc(SemiLoc); + Suffix.back().setAnnotationValue(Action.ModuleForHeader); + [[fallthrough]]; + + case ImportAction::ModuleImport: + case ImportAction::HeaderUnitImport: + case ImportAction::SkippedModuleImport: + // We chose to import (or textually enter) the file. Convert the + // header-name token into a header unit annotation token. + Suffix[0].setKind(tok::annot_header_unit); + Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); + Suffix[0].setAnnotationValue(Action.ModuleForHeader); + // FIXME: Call the moduleImport callback? + break; + case ImportAction::Failure: + assert(TheModuleLoader.HadFatalFailure && + "This should be an early exit only to a fatal error"); + Result.setKind(tok::eof); + CurLexer->cutOffLexing(); + EnterTokens(Suffix); + return true; + } - // Consume the pp-import-suffix and expand any macros in it now. We'll add - // it back into the token stream later. - CollectPpImportSuffix(Suffix); - if (Suffix.back().isNot(tok::semi)) { - // This is not a pp-import after all. EnterTokens(Suffix); return false; } + } else { + Lex(Result); + } - // C++2a [cpp.module]p1: - // The ';' preprocessing-token terminating a pp-import shall not have - // been produced by macro replacement. - SourceLocation SemiLoc = Suffix.back().getLocation(); - if (SemiLoc.isMacroID()) - Diag(SemiLoc, diag::err_header_import_semi_in_macro); - - // Reconstitute the import token. - Token ImportTok; - ImportTok.startToken(); - ImportTok.setKind(tok::kw_import); - ImportTok.setLocation(ModuleImportLoc); - ImportTok.setIdentifierInfo(getIdentifierInfo("import")); - ImportTok.setLength(6); - - auto Action = HandleHeaderIncludeOrImport( - /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); - switch (Action.Kind) { - case ImportAction::None: - break; - - case ImportAction::ModuleBegin: - // Let the parser know we're textually entering the module. - Suffix.emplace_back(); - Suffix.back().startToken(); - Suffix.back().setKind(tok::annot_module_begin); - Suffix.back().setLocation(SemiLoc); - Suffix.back().setAnnotationEndLoc(SemiLoc); - Suffix.back().setAnnotationValue(Action.ModuleForHeader); - [[fallthrough]]; - - case ImportAction::ModuleImport: - case ImportAction::HeaderUnitImport: - case ImportAction::SkippedModuleImport: - // We chose to import (or textually enter) the file. Convert the - // header-name token into a header unit annotation token. - Suffix[0].setKind(tok::annot_header_unit); - Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); - Suffix[0].setAnnotationValue(Action.ModuleForHeader); - // FIXME: Call the moduleImport callback? - break; - case ImportAction::Failure: - assert(TheModuleLoader.HadFatalFailure && - "This should be an early exit only to a fatal error"); - Result.setKind(tok::eof); - CurLexer->cutOffLexing(); - EnterTokens(Suffix); + if (Result.isOneOf(tok::identifier, tok::colon)) { + EnterToken(Result, /*IsReinject=*/false); + if (!LexModuleName(Result, /*IsImport=*/true)) return true; + auto *Info = Result.getAnnotationValueAs<ModuleNameInfo *>(); + if (getLangOpts().CPlusPlusModules) { + // Under the standard C++ Modules, the dot is just part of the module + // name, and not a real hierarchy separator. Flatten such module names + // now. + // + // FIXME: Is this the right level to be performing this transformation? + std::string FlatModuleName; + if (Info->getTokens().front().is(tok::colon)) { + // Import a module partition allowed in C++20 Modules. + // We can import a partition in named module TU. + if (NamedModuleImportPath.empty() && ModuleDeclState.isNamedModule()) + FlatModuleName = llvm::Twine(ModuleDeclState.getPrimaryName()) + .concat(Info->getFlatName()) + .str(); + else + return true; + } else { + FlatModuleName = Info->getFlatName(); + } + NamedModuleImportPath.emplace_back(getIdentifierInfo(FlatModuleName), + Result.getLocation()); + } else { + Info->getModuleIdPath(NamedModuleImportPath); } - - EnterTokens(Suffix); - return false; - } - - // The token sequence - // - // import identifier (. identifier)* - // - // indicates a module import directive. We already saw the 'import' - // contextual keyword, so now we're looking for the identifiers. - if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { - // We expected to see an identifier here, and we did; continue handling - // identifiers. - NamedModuleImportPath.push_back( - std::make_pair(Result.getIdentifierInfo(), Result.getLocation())); - ModuleImportExpectsIdentifier = false; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - - // If we're expecting a '.' or a ';', and we got a '.', then wait until we - // see the next identifier. (We can also see a '[[' that begins an - // attribute-specifier-seq here under the Standard C++ Modules.) - if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; } // If we didn't recognize a module name at all, this is not a (valid) import. @@ -1291,24 +1458,6 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { SemiLoc = Suffix.back().getLocation(); } - // Under the standard C++ Modules, the dot is just part of the module name, - // and not a real hierarchy separator. Flatten such module names now. - // - // FIXME: Is this the right level to be performing this transformation? - std::string FlatModuleName; - if (getLangOpts().CPlusPlusModules) { - for (auto &Piece : NamedModuleImportPath) { - // If the FlatModuleName ends with colon, it implies it is a partition. - if (!FlatModuleName.empty() && FlatModuleName.back() != ':') - FlatModuleName += "."; - FlatModuleName += Piece.first->getName(); - } - SourceLocation FirstPathLoc = NamedModuleImportPath[0].second; - NamedModuleImportPath.clear(); - NamedModuleImportPath.push_back( - std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); - } - Module *Imported = nullptr; // We don't/shouldn't load the standard c++20 modules when preprocessing. if (getLangOpts().Modules && !isInImportingCXXNamedModules()) { @@ -1330,6 +1479,33 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { return true; } +/// Lex a token following the 'module' contextual keyword. +/// +/// [cpp.module]/p2: +/// The pp-tokens, if any, of a pp-module shall be of the form: +/// pp-module-name pp-module-partition[opt] pp-tokens[opt] +/// +/// where the pp-tokens (if any) shall not begin with a ( preprocessing token +/// and the grammar non-terminals are defined as: +/// pp-module-name: +/// pp-module-name-qualifierp[opt] identifier +/// pp-module-partition: +/// : pp-module-name-qualifier[opt] identifier +/// pp-module-name-qualifier: +/// identifier . +/// pp-module-name-qualifier identifier . +/// No identifier in the pp-module-name or pp-module-partition shall currently +/// be defined as an object-like macro. +/// +/// [cpp.module]/p3: +/// Any preprocessing tokens after the module preprocessing token in the module +/// directive are processed just as in normal text. +bool Preprocessor::LexAfterModuleDecl(Token &Result) { + // Figure out what kind of lexer we actually have. + recomputeCurLexerKind(); + return LexModuleName(Result, /*IsImport=*/false); +} + void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { CurSubmoduleState->VisibleModules.setVisible( M, Loc, [](Module *) {}, diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp index 865879d1805336..cdb636923b9e91 100644 --- a/clang/lib/Lex/TokenConcatenation.cpp +++ b/clang/lib/Lex/TokenConcatenation.cpp @@ -160,6 +160,13 @@ static char GetFirstChar(const Preprocessor &PP, const Token &Tok) { bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const { + // If previous token is a module name, we need avoid concat it with current + // token, otherwise, there will has an extra space between 'M' and ';' for the + // following code: + // + // import M; + if (PrevTok.is(tok::annot_module_name)) + return false; // Conservatively assume that every annotation token that has a printable // form requires whitespace. if (PrevTok.isAnnotation()) @@ -190,6 +197,9 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, return true; ConcatInfo &= ~aci_avoid_equal; } + + if (Tok.is(tok::annot_module_name)) + return true; if (Tok.isAnnotation()) { // Modules annotation can show up when generated automatically for includes. assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin, diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 4a2d9a650e20cc..713e32b1a313f3 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4002,7 +4002,13 @@ void Parser::ParseDeclarationSpecifiers( // We're done with the declaration-specifiers. goto DoneWithDeclSpec; - + case tok::annot_module_name: { + PP.EnterTokenStream( + Tok.getAnnotationValueAs<ModuleNameInfo *>()->getTokens(), + /*DisableMacroExpansion=*/true, /*IsReinject=*/false); + ConsumeAnyToken(); + [[fallthrough]]; + } // typedef-name case tok::kw___super: case tok::kw_decltype: diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 5ebe71e496a2e8..afb2e1e4161682 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -2511,18 +2511,28 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { } SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; - if (ParseModuleName(ModuleLoc, Path, /*IsImport*/ false)) + if (Tok.isNot(tok::annot_module_name)) { + Diag(Tok, diag::err_module_expected_ident) << /*IsImport=*/false; + SkipUntil(tok::semi, StopBeforeMatch); + return nullptr; + } + + auto *Info = Tok.getAnnotationValueAs<ModuleNameInfo *>(); + ConsumeAnnotationToken(); + if (ParseModuleName(ModuleLoc, Info->getModuleName(), Path, + /*IsImport=*/false)) return nullptr; // Parse the optional module-partition. SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Partition; - if (Tok.is(tok::colon)) { - SourceLocation ColonLoc = ConsumeToken(); + if (Info->hasPartitionName()) { + SourceLocation ColonLoc = Info->getColonToken().getLocation(); if (!getLangOpts().CPlusPlusModules) Diag(ColonLoc, diag::err_unsupported_module_partition) << SourceRange(ColonLoc, Partition.back().second); // Recover by ignoring the partition name. - else if (ParseModuleName(ModuleLoc, Partition, /*IsImport*/ false)) + else if (ParseModuleName(ModuleLoc, Info->getPartitionName(), Partition, + /*IsImport=*/false)) return nullptr; } @@ -2581,18 +2591,32 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, // This is a header import that the preprocessor mapped to a module import. HeaderUnit = reinterpret_cast<Module *>(Tok.getAnnotationValue()); ConsumeAnnotationToken(); - } else if (Tok.is(tok::colon)) { - SourceLocation ColonLoc = ConsumeToken(); - if (!getLangOpts().CPlusPlusModules) - Diag(ColonLoc, diag::err_unsupported_module_partition) - << SourceRange(ColonLoc, Path.back().second); - // Recover by leaving partition empty. - else if (ParseModuleName(ColonLoc, Path, /*IsImport*/ true)) - return nullptr; - else - IsPartition = true; } else { - if (ParseModuleName(ImportLoc, Path, /*IsImport*/ true)) + if (Tok.isNot(tok::annot_module_name)) { + if (Tok.is(tok::code_completion)) { + cutOffParsing(); + Actions.CodeCompletion().CodeCompleteModuleImport(ImportLoc, Path); + return nullptr; + } + Diag(Tok, diag::err_module_expected_ident) << /*IsImport=*/true; + SkipUntil(tok::semi, StopBeforeMatch); + return nullptr; + } + auto *Info = Tok.getAnnotationValueAs<ModuleNameInfo *>(); + ConsumeAnnotationToken(); + if (Info->hasPartitionName()) { + SourceLocation ColonLoc = Info->getColonToken().getLocation(); + if (!getLangOpts().CPlusPlusModules) + Diag(ColonLoc, diag::err_unsupported_module_partition) + << SourceRange(ColonLoc, Path.back().second); + // Recover by leaving partition empty. + else if (ParseModuleName(ColonLoc, Info->getPartitionName(), Path, + /*IsImport=*/true)) + return nullptr; + else + IsPartition = true; + } else if (ParseModuleName(ImportLoc, Info->getModuleName(), Path, + /*IsImport=*/true)) return nullptr; } @@ -2689,32 +2713,31 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, /// module-name-qualifier: /// module-name-qualifier[opt] identifier '.' bool Parser::ParseModuleName( - SourceLocation UseLoc, + SourceLocation UseLoc, ArrayRef<Token> ModuleName, SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path, bool IsImport) { - // Parse the module path. - while (true) { - if (!Tok.is(tok::identifier)) { - if (Tok.is(tok::code_completion)) { - cutOffParsing(); - Actions.CodeCompletion().CodeCompleteModuleImport(UseLoc, Path); - return true; - } - - Diag(Tok, diag::err_module_expected_ident) << IsImport; - SkipUntil(tok::semi); + ModuleNameInfo::getModuleIdPath(ModuleName, Path); + // Eg. import A.B. + if (ModuleName.back().isNot(tok::identifier)) { + if (Tok.is(tok::code_completion)) { + cutOffParsing(); + Actions.CodeCompletion().CodeCompleteModuleImport(UseLoc, Path); return true; } + Diag(ModuleName.back(), diag::err_module_expected_ident) << IsImport; + SkipUntil(tok::semi, StopBeforeMatch); + return true; + } - // Record this part of the module path. - Path.push_back(std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation())); - ConsumeToken(); - - if (Tok.isNot(tok::period)) - return false; - - ConsumeToken(); + // [cpp.module]/p2: where the pp-tokens (if any) shall not begin with a ( + // preprocessing token [...] + // + // Skip unitl ';' to recovery. + if (getLangOpts().CPlusPlusModules && Tok.is(tok::l_paren)) { + SkipUntil(tok::semi, StopBeforeMatch); + return true; } + return false; } /// Try recover parser when module annotation appears where it must not diff --git a/clang/test/CXX/cpp/cpp.module/p2.cppm b/clang/test/CXX/cpp/cpp.module/p2.cppm new file mode 100644 index 00000000000000..966a88ccfa972d --- /dev/null +++ b/clang/test/CXX/cpp/cpp.module/p2.cppm @@ -0,0 +1,88 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/C.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/D.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/E.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/F.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/G.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/H.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/I.cppm -triple x86_64-linux-gnu -verify +// RUN: %clang_cc1 -std=c++20 %t/J.cppm -triple x86_64-linux-gnu -verify + +//--- version.h +#ifndef VERSION_H +#define VERSION_H + +#define VERSION libv5 +#define A a +#define B b +#define C c +#define FUNC_LIKE(X) function_like_##X +#define ATTRS [[]] +#define SEMICOLON ; + +#endif // VERSION_H + +//--- A.cppm +module; +#include "version.h" +export module VERSION; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'VERSION'}} + +//--- B.cppm +module; +#include "version.h" +export module A.B; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{the module name in a module declaration cannot contain an object-like macro 'B'}} + +//--- C.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module A.FUNC_LIKE(foo):C; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- D.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module B.A.FUNC_LIKE(bar):C; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'B'}} \ + // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- E.cppm +module; +#include "version.h" +export module a.FUNC_LIKE:c; // OK, FUNC_LIKE would not be treated as a macro name. +// expected-no-diagnostics + +//--- F.cppm +module; +#include "version.h" +export module a.FUNC_LIKE:c ATTRS; // OK, FUNC_LIKE would not be treated as a macro name. +// expected-no-diagnostics + +//--- G.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module A.FUNC_LIKE(B c:C ATTRS // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- H.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module A.FUNC_LIKE(B,). c:C ATTRS // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- I.cppm +module; // expected-error {{missing 'module' declaration at end of global module fragment introduced here}} +#include "version.h" +export module A.FUNC_LIKE(B,) c:C ATTRS // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \ + // expected-error {{unexpected '(' after the module name in a module declaration}} + +//--- J.cppm +module; +#include "version.h" +export module unexpanded : unexpanded ATTRS SEMICOLON // OK, ATTRS and SEMICOLON can be expanded. +// expected-no-diagnostics diff --git a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp index d71358cc7a571f..14bbc911febfcd 100644 --- a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp +++ b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp @@ -8,27 +8,19 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fmodule-file=x=%t/x.pcm %t/x.y.cppm -o %t/x.y.pcm // // Module implementation for unknown and known module. (The former is ill-formed.) -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M.cpp \ -// RUN: -DTEST=1 -DEXPORT= -DMODULE_NAME=z -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x=%t/x.pcm -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M.cpp \ -// RUN: -DTEST=2 -DEXPORT= -DMODULE_NAME=x +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M1.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x=%t/x.pcm -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M2.cpp // // Module interface for unknown and known module. (The latter is ill-formed due to // redefinition.) -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=3 -DEXPORT=export -DMODULE_NAME=z -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=4 -DEXPORT=export -DMODULE_NAME=x +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M3.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M4.cpp // // Miscellaneous syntax. -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=7 -DEXPORT=export -DMODULE_NAME='z elderberry' -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=8 -DEXPORT=export -DMODULE_NAME='z [[]]' -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=9 -DEXPORT=export -DMODULE_NAME='z [[fancy]]' -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \ -// RUN: -DTEST=10 -DEXPORT=export -DMODULE_NAME='z [[maybe_unused]]' +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M5.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M6.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M7.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M8.cpp //--- x.cppm export module x; @@ -38,17 +30,26 @@ int a, b; export module x.y; int c; -//--- M.cpp - -EXPORT module MODULE_NAME; -#if TEST == 7 -// expected-error@-2 {{expected ';'}} expected-error@-2 {{a type specifier is required}} -#elif TEST == 9 -// expected-warning@-4 {{unknown attribute 'fancy' ignored}} -#elif TEST == 10 -// expected-error-re@-6 {{'maybe_unused' attribute cannot be applied to a module{{$}}}} -#elif TEST == 1 -// expected-error@-8 {{module 'z' not found}} -#else -// expected-no-diagnostics -#endif +//--- M1.cpp +module z; // expected-error {{module 'z' not found}} + +//--- M2.cpp +module x; // expected-no-diagnostics + +//--- M3.cpp +export module z; // expected-no-diagnostics + +//--- M4.cpp +export module x; // expected-no-diagnostics + +//--- M5.cpp +export module z elderberry; // expected-error {{expected ';'}} expected-error {{a type specifier is required}} + +//--- M6.cpp +export module z [[]]; // expected-no-diagnostics + +//--- M7.cpp +export module z [[fancy]]; // expected-warning {{unknown attribute 'fancy' ignored}} + +//--- M8.cpp +export module z [[maybe_unused]]; // expected-error-re {{'maybe_unused' attribute cannot be applied to a module{{$}}}} diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm index 873e4c0edeac25..ecad4db32a7e94 100644 --- a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm +++ b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm @@ -6,10 +6,12 @@ // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fmodule-file=x=%t/x.pcm %t/x.y.cppm -o %t/x.y.pcm // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.b.cppm -o %t/a.b.pcm // -// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test.cpp \ -// RUN: -DMODULE_NAME=z -DINTERFACE +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test-interface.cpp \ +// RUN: -DINTERFACE // RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm \ -// RUN: -fmodule-file=a.b=%t/a.b.pcm -verify %t/test.cpp -DMODULE_NAME=a.b +// RUN: -fmodule-file=a.b=%t/a.b.pcm -verify %t/test.cpp +// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm \ +// RUN: -verify %t/test-module-not-found.cpp // RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test.x.cpp //--- x.cppm @@ -34,11 +36,8 @@ int use_2 = b; // ok int use_3 = c; // expected-error {{use of undeclared identifier 'c'}} //--- test.cpp -#ifdef INTERFACE -export module MODULE_NAME; -#else -module MODULE_NAME; -#endif +module; +module a.b; import x; @@ -51,6 +50,28 @@ import x.y; import x.; // expected-error {{expected a module name after 'import'}} import .x; // expected-error {{expected a module name after 'import'}} -import blarg; // expected-error {{module 'blarg' not found}} +int use_4 = c; // ok + + +//--- test-interface.cpp +module; +export module z; + +import x; + +import x [[]]; +import x [[foo]]; // expected-warning {{unknown attribute 'foo' ignored}} +import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applied to a module import}} +import x [[blarg::noreturn]]; // expected-warning {{unknown attribute 'noreturn' ignored}} + +import x.y; +import x.; // expected-error {{expected a module name after 'import'}} +import .x; // expected-error {{expected a module name after 'import'}} int use_4 = c; // ok + +//--- test-module-not-found.cpp +module; + +import blarg; // expected-error {{module 'blarg' not found}} + diff --git a/clang/test/SemaCXX/modules.cppm b/clang/test/SemaCXX/modules.cppm index 41204be76eafa1..267417bf5da2ca 100644 --- a/clang/test/SemaCXX/modules.cppm +++ b/clang/test/SemaCXX/modules.cppm @@ -1,19 +1,17 @@ -// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -o %t.0.pcm -verify -DTEST=0 -// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -o %t.1.pcm -verify -DTEST=1 -// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -fmodule-file=foo=%t.0.pcm -o %t.2.pcm -verify -DTEST=2 -// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify -Dfoo=bar -DTEST=3 +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t -#if TEST == 0 || TEST == 2 -// expected-no-diagnostics -#endif +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/A.cppm -o %t.0.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/B.cppm -o %t.1.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/C.cppm -fmodule-file=foo=%t.0.pcm -o %t.2.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/D.cppm -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/E.cppm -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify -Dfoo=bar +//--- A.cppm export module foo; - static int m; - int n; - -#if TEST == 0 export { int a; int b; @@ -27,7 +25,43 @@ export void f() {} export struct T { } t; -#elif TEST == 3 +// expected-no-diagnostics + +//--- B.cppm +export module foo; +static int m; +int n; +struct S { + export int n; // expected-error {{expected member name or ';'}} + export static int n; // expected-error {{expected member name or ';'}} +}; + +// FIXME: Exports of declarations without external linkage are disallowed. +// Exports of declarations with non-external-linkage types are disallowed. + +// Cannot export within another export. This isn't precisely covered by the +// language rules right now, but (per personal correspondence between zygoloid +// and gdr) is the intent. +export { // expected-note {{export block begins here}} + extern "C++" { + namespace NestedExport { + export { // expected-error {{export declaration appears within another export declaration}} + int q; + } + } // namespace NestedExport + } +} + +//--- C.cppm +export module foo; +static int m; +int n; +// expected-no-diagnostics + +//--- D.cppm +export module foo; +static int m; +int n; int use_a = a; // expected-error {{use of undeclared identifier 'a'}} #undef foo @@ -46,29 +80,12 @@ int use_n = n; // FIXME: this should not be visible, because it is not exported extern int n; static_assert(&n != p); // expected-error{{use of undeclared identifier 'p'}} -#endif -#if TEST == 1 -struct S { - export int n; // expected-error {{expected member name or ';'}} - export static int n; // expected-error {{expected member name or ';'}} -}; -#endif - -// FIXME: Exports of declarations without external linkage are disallowed. -// Exports of declarations with non-external-linkage types are disallowed. +//--- E.cppm +export module foo; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'foo'}} +static int m; +int n; +int use_a = a; // expected-error {{use of undeclared identifier 'a'}} -// Cannot export within another export. This isn't precisely covered by the -// language rules right now, but (per personal correspondence between zygoloid -// and gdr) is the intent. -#if TEST == 1 -export { // expected-note {{export block begins here}} - extern "C++" { - namespace NestedExport { - export { // expected-error {{export declaration appears within another export declaration}} - int q; - } - } // namespace NestedExport - } -} -#endif +#undef foo +import foo; // expected-error {{imports must immediately follow the module declaration}} diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index a6ded8be3ae9e5..8fe05b21146282 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -182,7 +182,7 @@ <h2 id="cxx26">C++2c implementation status</h2> <tr> <td>Module Declarations Shouldn’t be Macros</td> <td><a href="https://wg21.link/P3034R1">P3034R1</a> (<a href="#dr">DR</a>)</td> - <td class="none" align="center">No</td> + <td class="unreleased" align="center">Clang 20</td> </tr> <tr> <td>Trivial infinite loops are not Undefined Behavior</td> >From e25019f69e7131c8990ce81e36b97267690f2038 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Mon, 5 Aug 2024 22:47:33 +0800 Subject: [PATCH 2/5] [Clang] Add peekNextPPToken, makes peek next token without side-effects Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/Lexer.h | 10 ++++---- clang/include/clang/Lex/Preprocessor.h | 8 ++++++- clang/include/clang/Lex/TokenLexer.h | 7 +++--- clang/lib/Lex/Lexer.cpp | 21 +++++++++-------- clang/lib/Lex/PPMacroExpansion.cpp | 32 ++++++++++++-------------- clang/lib/Lex/TokenLexer.cpp | 10 ++++---- 6 files changed, 46 insertions(+), 42 deletions(-) diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index b6ecc7e5ded9e2..1e665c13b392f2 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer { //===--------------------------------------------------------------------===// // Context that changes as the file is lexed. // NOTE: any state that mutates when in raw mode must have save/restore code - // in Lexer::isNextPPTokenLParen. + // in Lexer::peekNextPPToken. // BufferPtr - Current pointer into the buffer. This is the next character // to be lexed. @@ -629,10 +629,10 @@ class Lexer : public PreprocessorLexer { BufferPtr = TokEnd; } - /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a - /// tok::l_paren token, 0 if it is something else and 2 if there are no more - /// tokens in the buffer controlled by this lexer. - unsigned isNextPPTokenLParen(); + /// peekNextPPToken - Return std::nullopt if there are no more tokens in the + /// buffer controlled by this lexer, otherwise return the next unexpanded + /// token. + std::optional<Token> peekNextPPToken(); //===--------------------------------------------------------------------===// // Lexer character reading interfaces. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index c35b768ffea1bc..85712c1248fe12 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2649,10 +2649,16 @@ class Preprocessor { void removeCachedMacroExpandedTokensOfLastLexer(); + /// Peek the next token. If so, return the token, if not, this + /// method should have no observable side-effect on the lexed tokens. + std::optional<Token> peekNextPPToken(); + /// Determine whether the next preprocessor token to be /// lexed is a '('. If so, consume the token and return true, if not, this /// method should have no observable side-effect on the lexed tokens. - bool isNextPPTokenLParen(); + bool isNextPPTokenLParen() { + return peekNextPPToken().value_or(Token{}).is(tok::l_paren); + } /// After reading "MACRO(", this method is invoked to read all of the formal /// arguments specified for the macro invocation. Returns null on error. diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 4d229ae6106743..777b4e6266c714 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -139,10 +139,9 @@ class TokenLexer { void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject); - /// If the next token lexed will pop this macro off the - /// expansion stack, return 2. If the next unexpanded token is a '(', return - /// 1, otherwise return 0. - unsigned isNextTokenLParen() const; + /// If the next token lexed will pop this macro off the expansion stack, + /// return std::nullopt, otherwise return the next unexpanded token. + std::optional<Token> peekNextPPToken() const; /// Lex and return a token from this macro stream. bool Lex(Token &Tok); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index ef1e1f4bd9aeb4..af533b3874cf5d 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3193,18 +3193,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { return PP->HandleEndOfFile(Result, isPragmaLexer()); } -/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from -/// the specified lexer will return a tok::l_paren token, 0 if it is something -/// else and 2 if there are no more tokens in the buffer controlled by the -/// lexer. -unsigned Lexer::isNextPPTokenLParen() { +/// peekNextPPToken - Return std::nullopt if there are no more tokens in the +/// buffer controlled by this lexer, otherwise return the next unexpanded +/// token. +std::optional<Token> Lexer::peekNextPPToken() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); if (isDependencyDirectivesLexer()) { if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) - return 2; - return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( - tok::l_paren); + return std::nullopt; + Token Result; + (void)convertDependencyDirectiveToken( + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result); + return Result; } // Switch to 'skipping' mode. This will ensure that we can lex a token @@ -3233,8 +3234,8 @@ unsigned Lexer::isNextPPTokenLParen() { LexingRawMode = false; if (Tok.is(tok::eof)) - return 2; - return Tok.is(tok::l_paren); + return std::nullopt; + return Tok; } /// Find the end of a version control conflict marker. diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 1e31fcc3d731ed..0fadaeb6c2c681 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -437,42 +437,40 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, return !llvm::is_contained(MI->params(), II); } -/// isNextPPTokenLParen - Determine whether the next preprocessor token to be -/// lexed is a '('. If so, consume the token and return true, if not, this -/// method should have no observable side-effect on the lexed tokens. -bool Preprocessor::isNextPPTokenLParen() { +/// isNextPPTokenLParen - Peek the next token. If so, return the token, if not, +/// this method should have no observable side-effect on the lexed tokens. +std::optional<Token> Preprocessor::peekNextPPToken() { // Do some quick tests for rejection cases. - unsigned Val; + std::optional<Token> Val; if (CurLexer) - Val = CurLexer->isNextPPTokenLParen(); + Val = CurLexer->peekNextPPToken(); else - Val = CurTokenLexer->isNextTokenLParen(); + Val = CurTokenLexer->peekNextPPToken(); - if (Val == 2) { + if (!Val) { // We have run off the end. If it's a source file we don't // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the // macro stack. if (CurPPLexer) - return false; + return std::nullopt; for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { if (Entry.TheLexer) - Val = Entry.TheLexer->isNextPPTokenLParen(); + Val = Entry.TheLexer->peekNextPPToken(); else - Val = Entry.TheTokenLexer->isNextTokenLParen(); + Val = Entry.TheTokenLexer->peekNextPPToken(); - if (Val != 2) + if (Val) break; // Ran off the end of a source file? if (Entry.ThePPLexer) - return false; + return std::nullopt; } } - // Okay, if we know that the token is a '(', lex it and return. Otherwise we - // have found something that isn't a '(' or we found the end of the - // translation unit. In either case, return false. - return Val == 1; + // Okay, we found the token and return. Otherwise we found the end of the + // translation unit. + return Val; } /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 856d5682727fe3..0eca09ef93da92 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -922,13 +922,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, } /// isNextTokenLParen - If the next token lexed will pop this macro off the -/// expansion stack, return 2. If the next unexpanded token is a '(', return -/// 1, otherwise return 0. -unsigned TokenLexer::isNextTokenLParen() const { +/// expansion stack, return std::nullopt, otherwise return the next unexpanded +/// token. +std::optional<Token> TokenLexer::peekNextPPToken() const { // Out of tokens? if (isAtEnd()) - return 2; - return Tokens[CurTokenIdx].is(tok::l_paren); + return std::nullopt; + return Tokens[CurTokenIdx]; } /// isParsingPreprocessorDirective - Return true if we are in the middle of a >From 829b1c1296148c6ca201a0d7d7951cc762d322ed Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Mon, 5 Aug 2024 22:48:25 +0800 Subject: [PATCH 3/5] [Clang] Add IsCurrentLexingTokAtPhysicalStartOfLine in Lexer to avoid pass this flag as argument Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/Lexer.h | 13 +++++---- clang/lib/Lex/Lexer.cpp | 47 +++++++++++++++------------------ 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 1e665c13b392f2..142f3f05381298 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -136,6 +136,8 @@ class Lexer : public PreprocessorLexer { bool IsAtPhysicalStartOfLine; + bool IsCurrentLexingTokAtPhysicalStartOfLine; + bool HasLeadingSpace; bool HasLeadingEmptyMacro; @@ -609,7 +611,7 @@ class Lexer : public PreprocessorLexer { /// LexTokenInternal - Internal interface to lex a preprocessing token. Called /// by Lex. /// - bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine); + bool LexTokenInternal(Token &Result); bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr); @@ -749,12 +751,9 @@ class Lexer : public PreprocessorLexer { bool LexCharConstant (Token &Result, const char *CurPtr, tok::TokenKind Kind); bool LexEndOfFile (Token &Result, const char *CurPtr); - bool SkipWhitespace (Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine); - bool SkipLineComment (Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine); - bool SkipBlockComment (Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine); + bool SkipWhitespace (Token &Result, const char *CurPtr); + bool SkipLineComment (Token &Result, const char *CurPtr); + bool SkipBlockComment (Token &Result, const char *CurPtr); bool SaveLineComment (Token &Result, const char *CurPtr); bool IsStartOfConflictMarker(const char *CurPtr); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index af533b3874cf5d..160a60fb63e3d9 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2484,8 +2484,7 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, /// Update BufferPtr to point to the next non-whitespace character and return. /// /// This method forms a token and returns true if KeepWhitespaceMode is enabled. -bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine) { +bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { // Whitespace - Skip it, then return the token after the whitespace. bool SawNewline = isVerticalWhitespace(CurPtr[-1]); @@ -2541,7 +2540,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace); if (SawNewline) { Result.setFlag(Token::StartOfLine); - TokAtPhysicalStartOfLine = true; + IsCurrentLexingTokAtPhysicalStartOfLine = true; if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine && PP) { if (auto *Handler = PP->getEmptylineHandler()) @@ -2560,8 +2559,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, /// /// If we're in KeepCommentMode or any CommentHandler has inserted /// some tokens, this will store the first token and return true. -bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine) { +bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) { // If Line comments aren't explicitly enabled for this language, emit an // extension warning. if (!LineComment) { @@ -2717,7 +2715,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, // The next returned token is at the start of the line. Result.setFlag(Token::StartOfLine); - TokAtPhysicalStartOfLine = true; + IsCurrentLexingTokAtPhysicalStartOfLine = true; // No leading whitespace seen so far. Result.clearFlag(Token::LeadingSpace); BufferPtr = CurPtr; @@ -2842,8 +2840,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, /// /// If we're in KeepCommentMode or any CommentHandler has inserted /// some tokens, this will store the first token and return true. -bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, - bool &TokAtPhysicalStartOfLine) { +bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { // Scan one character past where we should, looking for a '/' character. Once // we find it, check to see if it was preceded by a *. This common // optimization helps people who like to put a lot of * characters in their @@ -3046,7 +3043,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, // efficiently now. This is safe even in KeepWhitespaceMode because we would // have already returned above with the comment as a token. if (isHorizontalWhitespace(*CurPtr)) { - SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine); + SkipWhitespace(Result, CurPtr + 1); return false; } @@ -3698,11 +3695,11 @@ bool Lexer::Lex(Token &Result) { HasLeadingEmptyMacro = false; } - bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; + IsCurrentLexingTokAtPhysicalStartOfLine = IsAtPhysicalStartOfLine; IsAtPhysicalStartOfLine = false; bool isRawLex = isLexingRawMode(); (void) isRawLex; - bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); + bool returnedToken = LexTokenInternal(Result); // (After the LexTokenInternal call, the lexer might be destroyed.) assert((returnedToken || !isRawLex) && "Raw lex must succeed"); return returnedToken; @@ -3713,7 +3710,7 @@ bool Lexer::Lex(Token &Result) { /// has a null character at the end of the file. This returns a preprocessing /// token, not a normal token, as such, it is an internal interface. It assumes /// that the Flags of result have been cleared before calling this. -bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { +bool Lexer::LexTokenInternal(Token &Result) { LexStart: assert(!Result.needsCleaning() && "Result needs cleaning"); assert(!Result.hasPtrData() && "Result has not been reset"); @@ -3766,7 +3763,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { if (!isLexingRawMode()) Diag(CurPtr-1, diag::null_in_file); Result.setFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode // We know the lexer hasn't changed, so just try again with this lexer. @@ -3812,7 +3809,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // No leading whitespace seen so far. Result.clearFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode // We only saw whitespace, so just try again with this lexer. @@ -3824,7 +3821,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { case '\v': SkipHorizontalWhitespace: Result.setFlag(Token::LeadingSpace); - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode SkipIgnoredUnits: @@ -3834,11 +3831,11 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // too (without going through the big switch stmt). if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { - if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) + if (SkipLineComment(Result, CurPtr + 2)) return true; // There is a token to return. goto SkipIgnoredUnits; } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) { - if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) + if (SkipBlockComment(Result, CurPtr + 2)) return true; // There is a token to return. goto SkipIgnoredUnits; } else if (isHorizontalWhitespace(*CurPtr)) { @@ -4150,8 +4147,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*'; if (TreatAsComment) { - if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), - TokAtPhysicalStartOfLine)) + if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) return true; // There is a token to return. // It is common for the tokens immediately after a // comment to be @@ -4162,8 +4158,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { } if (Char == '*') { // /**/ comment. - if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), - TokAtPhysicalStartOfLine)) + if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) return true; // There is a token to return. // We only saw whitespace, so just try again with this lexer. @@ -4203,7 +4198,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // TODO: -fpreprocessed mode?? - if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) + if (IsCurrentLexingTokAtPhysicalStartOfLine && !LexingRawMode && + !Is_PragmaLexer) goto HandleDirective; Kind = tok::hash; @@ -4392,7 +4388,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // TODO: -fpreprocessed mode?? - if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) + if (IsCurrentLexingTokAtPhysicalStartOfLine && !LexingRawMode && + !Is_PragmaLexer) goto HandleDirective; Kind = tok::hash; @@ -4412,7 +4409,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { if (!LangOpts.AsmPreprocessor) { if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode // We only saw whitespace, so just try again with this lexer. @@ -4445,7 +4442,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { llvm::strictConversion); if (Status == llvm::conversionOK) { if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + if (SkipWhitespace(Result, CurPtr)) return true; // KeepWhitespaceMode // We only saw whitespace, so just try again with this lexer. >From 581160a121fca5796bff2392d9228207d37e6f16 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Tue, 6 Aug 2024 01:09:56 +0800 Subject: [PATCH 4/5] [Clang] Partially implement theP1857R3: Modules Dependency Discovery Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Basic/IdentifierTable.h | 3 +- clang/include/clang/Lex/Preprocessor.h | 172 ++++++++++---------- clang/include/clang/Lex/Token.h | 4 + clang/lib/Lex/Lexer.cpp | 24 +++ clang/lib/Lex/Preprocessor.cpp | 124 ++++++++------ clang/lib/Parse/ParseDecl.cpp | 7 - clang/lib/Parse/Parser.cpp | 34 ++-- clang/test/CXX/basic/basic.link/p3.cpp | 13 +- clang/test/CXX/lex/lex.pptoken/p3-2a.cpp | 11 +- 9 files changed, 219 insertions(+), 173 deletions(-) diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index f40f74d0355ade..e8b9e381a5b4c3 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -585,7 +585,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { void RecomputeNeedsHandleIdentifier() { NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() || isExtensionToken() || isFutureCompatKeyword() || - isOutOfDate() || isModulesImport(); + isOutOfDate() || isModulesImport() || + isModulesDeclaration(); } }; diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 85712c1248fe12..f4c28b7cf6a54e 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -109,6 +109,77 @@ class TokenValue { } }; +/// Represents module or partition name token sequance. +/// +/// module-name: +/// module-name-qualifier[opt] identifier +/// +/// partition-name: [C++20] +/// : module-name-qualifier[opt] identifier +/// +/// module-name-qualifier +/// module-name-qualifier[opt] identifier . +/// +/// This class can only be created by the preprocessor and guarantees that the +/// two source array being contiguous in memory and only contains 3 kind of +/// tokens (identifier, '.' and ':'). And only available when the preprocessor +/// returns annot_module_name token. +/// +/// For exmaple: +/// +/// export module m.n:c.d +/// +/// The module name array has 3 tokens ['m', '.', 'n']. +/// The partition name array has 4 tokens [':', 'c', '.', 'd']. +/// +/// When import a partition in a named module fragment (Eg. import :part1;), +/// the module name array will be empty, and the partition name array has 2 +/// tokens. +/// +/// When we meet a private-module-fragment (Eg. module :private;), preprocessor +/// will not return a annot_module_name token, but will return 2 separate tokens +/// [':', 'kw_private']. + +class ModuleNameInfo { + friend class Preprocessor; + ArrayRef<Token> ModuleName; + ArrayRef<Token> PartitionName; + + ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex); + +public: + /// Return the contiguous token array. + ArrayRef<Token> getTokens() const { + if (ModuleName.empty()) + return PartitionName; + if (PartitionName.empty()) + return ModuleName; + return ArrayRef(ModuleName.begin(), PartitionName.end()); + } + bool hasModuleName() const { return !ModuleName.empty(); } + bool hasPartitionName() const { return !PartitionName.empty(); } + ArrayRef<Token> getModuleName() const { return ModuleName; } + ArrayRef<Token> getPartitionName() const { return PartitionName; } + Token getColonToken() const { + assert(hasPartitionName() && "Do not have a partition name"); + return getPartitionName().front(); + } + + /// Under the standard C++ Modules, the dot is just part of the module name, + /// and not a real hierarchy separator. Flatten such module names now. + std::string getFlatName() const; + + /// Build a module id path from the contiguous token array, both include + /// module name and partition name. + void getModuleIdPath( + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const; + + /// Build a module id path from \param ModuleName. + static void getModuleIdPath( + ArrayRef<Token> ModuleName, + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path); +}; + /// Context in which macro name is used. enum MacroUse { // other than #define or #undef @@ -337,6 +408,9 @@ class Preprocessor { /// Whether the last token we lexed was an '@'. bool LastTokenWasAt = false; + /// Whether the last token we lexed was an 'export' keyword. + std::optional<Token> LastTokenWasExportKeyword = std::nullopt; + /// A position within a C++20 import-seq. class StdCXXImportSeq { public: @@ -540,24 +614,12 @@ class Preprocessor { reset(); } - void handleIdentifier(IdentifierInfo *Identifier) { - if (isModuleCandidate() && Identifier) - Name += Identifier->getName().str(); - else if (!isNamedModule()) - reset(); - } - - void handleColon() { - if (isModuleCandidate()) - Name += ":"; - else if (!isNamedModule()) - reset(); - } - - void handlePeriod() { - if (isModuleCandidate()) - Name += "."; - else if (!isNamedModule()) + void handleModuleName(Token ModuleName) { + assert(ModuleName.is(tok::annot_module_name) && "Expect a module name"); + if (isModuleCandidate()) { + Name = ModuleName.getAnnotationValueAs<ModuleNameInfo *>() + ->getFlatName(); + } else if (!isNamedModule()) reset(); } @@ -2328,6 +2390,8 @@ class Preprocessor { /// token stream. bool HandleEndOfTokenLexer(Token &Result); + bool HandleModuleContextualKeyword(Token &Result); + /// Callback invoked when the lexer sees a # token at the start of a /// line. /// @@ -3098,78 +3162,6 @@ struct EmbedAnnotationData { /// Registry of pragma handlers added by plugins using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; - -/// Represents module or partition name token sequance. -/// -/// module-name: -/// module-name-qualifier[opt] identifier -/// -/// partition-name: [C++20] -/// : module-name-qualifier[opt] identifier -/// -/// module-name-qualifier -/// module-name-qualifier[opt] identifier . -/// -/// This class can only be created by the preprocessor and guarantees that the -/// two source array being contiguous in memory and only contains 3 kind of -/// tokens (identifier, '.' and ':'). And only available when the preprocessor -/// returns annot_module_name token. -/// -/// For exmaple: -/// -/// export module m.n:c.d -/// -/// The module name array has 3 tokens ['m', '.', 'n']. -/// The partition name array has 4 tokens [':', 'c', '.', 'd']. -/// -/// When import a partition in a named module fragment (Eg. import :part1;), -/// the module name array will be empty, and the partition name array has 2 -/// tokens. -/// -/// When we meet a private-module-fragment (Eg. module :private;), preprocessor -/// will not return a annot_module_name token, but will return 2 separate tokens -/// [':', 'kw_private']. - -class ModuleNameInfo { - friend class Preprocessor; - ArrayRef<Token> ModuleName; - ArrayRef<Token> PartitionName; - - ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex); - -public: - /// Return the contiguous token array. - ArrayRef<Token> getTokens() const { - if (ModuleName.empty()) - return PartitionName; - if (PartitionName.empty()) - return ModuleName; - return ArrayRef(ModuleName.begin(), PartitionName.end()); - } - bool hasModuleName() const { return !ModuleName.empty(); } - bool hasPartitionName() const { return !PartitionName.empty(); } - ArrayRef<Token> getModuleName() const { return ModuleName; } - ArrayRef<Token> getPartitionName() const { return PartitionName; } - Token getColonToken() const { - assert(hasPartitionName() && "Do not have a partition name"); - return getPartitionName().front(); - } - - /// Under the standard C++ Modules, the dot is just part of the module name, - /// and not a real hierarchy separator. Flatten such module names now. - std::string getFlatName() const; - - /// Build a module id path from the contiguous token array, both include - /// module name and partition name. - void getModuleIdPath( - SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const; - - /// Build a module id path from \param ModuleName. - static void getModuleIdPath( - ArrayRef<Token> ModuleName, - SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path); -}; - } // namespace clang #endif // LLVM_CLANG_LEX_PREPROCESSOR_H diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index 2be3ad39529f05..8400ab7ed07e2a 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -292,6 +292,10 @@ class Token { /// Return the ObjC keyword kind. tok::ObjCKeywordKind getObjCKeywordID() const; + /// Return true if we have an C++20 Modules contextual keyword(export, import + /// or module). + bool isModuleContextualKeyword() const; + bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const; /// Return true if this token has trigraphs or escaped newlines in it. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 160a60fb63e3d9..ffb82fa46984a6 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -74,6 +74,17 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; } +/// Return true if we have an C++20 Modules contextual keyword(export, import +/// or module). +bool Token::isModuleContextualKeyword() const { + if (is(tok::kw_export)) + return true; + if (isNot(tok::identifier)) + return false; + const auto *II = getIdentifierInfo(); + return II->isModulesImport() || II->isModulesDeclaration(); +} + /// Determine whether the token kind starts a simple-type-specifier. bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const { switch (getKind()) { @@ -2003,6 +2014,13 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { return true; } + if (Result.isModuleContextualKeyword()) { + Result.setFlagValue(Token::StartOfLine, + IsCurrentLexingTokAtPhysicalStartOfLine); + if (PP->HandleModuleContextualKeyword(Result)) + return true; + } + // Finally, now that we know we have an identifier, pass this off to the // preprocessor, which may macro expand it or something. if (II->isHandleIdentifierCase()) @@ -4556,6 +4574,12 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) { Result.setRawIdentifierData(TokPtr); if (!isLexingRawMode()) { const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + if (Result.isModuleContextualKeyword()) { + // Result.setFlagValue(Token::StartOfLine, + // IsCurrentLexingTokAtPhysicalStartOfLine); + if (PP->HandleModuleContextualKeyword(Result)) + return true; + } if (II->isHandleIdentifierCase()) return PP->HandleIdentifier(Result); } diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index d0c4ab8fd5b669..7f7e6d2f2fe535 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -863,12 +863,6 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { CurLexerCallback = CLK_LexAfterModuleImport; } - if ((II.isModulesDeclaration() || Identifier.is(tok::kw_module)) && - !InMacroArgs && !DisableMacroExpansion && - (getLangOpts().CPlusPlusModules || getLangOpts().DebuggerSupport) && - CurLexerCallback != CLK_CachingLexer) { - CurLexerCallback = CLK_LexAfterModuleDecl; - } return true; } @@ -926,55 +920,22 @@ void Preprocessor::Lex(Token &Result) { StdCXXImportSeqState.handleExport(); ModuleDeclState.handleExport(); break; - case tok::annot_module_name: { - auto *Info = static_cast<ModuleNameInfo *>(Result.getAnnotationValue()); - for (const auto &Tok : Info->getTokens()) { - switch (Tok.getKind()) { - case tok::identifier: - ModuleDeclState.handleIdentifier(Tok.getIdentifierInfo()); - break; - case tok::period: - ModuleDeclState.handlePeriod(); - break; - case tok::colon: - ModuleDeclState.handleColon(); - break; - default: - llvm_unreachable("Unexpected token in module name"); - } - } - if (ModuleDeclState.isModuleCandidate()) - break; - TrackGMFState.handleMisc(); - StdCXXImportSeqState.handleMisc(); - ModuleDeclState.handleMisc(); + case tok::kw_module: + TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); + ModuleDeclState.handleModule(); break; - } - case tok::identifier: - // Check "import" and "module" when there is no open bracket. The two - // identifiers are not meaningful with open brackets. + case tok::kw_import: if (StdCXXImportSeqState.atTopLevel()) { - if (Result.getIdentifierInfo()->isModulesImport()) { - TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); - StdCXXImportSeqState.handleImport(); - if (StdCXXImportSeqState.afterImportSeq()) { - ModuleImportLoc = Result.getLocation(); - NamedModuleImportPath.clear(); - IsAtImport = false; - CurLexerCallback = CLK_LexAfterModuleImport; - } - break; - } - if (Result.getIdentifierInfo()->isModulesDeclaration()) { - TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); - ModuleDeclState.handleModule(); - CurLexerCallback = CLK_LexAfterModuleDecl; - break; - } + TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); + StdCXXImportSeqState.handleImport(); } - if (ModuleDeclState.isModuleCandidate()) - break; - [[fallthrough]]; + ModuleImportLoc = Result.getLocation(); + NamedModuleImportPath.clear(); + IsAtImport = false; + break; + case tok::annot_module_name: + ModuleDeclState.handleModuleName(Result); + break; default: TrackGMFState.handleMisc(); StdCXXImportSeqState.handleMisc(); @@ -989,6 +950,8 @@ void Preprocessor::Lex(Token &Result) { } LastTokenWasAt = Result.is(tok::at); + if (Result.isNot(tok::kw_export)) + LastTokenWasExportKeyword.reset(); --LexLevel; if ((LexLevel == 0 || PreprocessToken) && @@ -1012,6 +975,63 @@ void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) { } } +/// P1857R3: Modules Dependency Discovery +/// +/// At the start of phase 4 an import or module token is treated as starting a +/// directive and are converted to their respective keywords iff: +/// • After skipping horizontal whitespace are +/// • at the start of a logical line, or +/// • preceded by an 'export' at the start of the logical line. +/// • Are followed by an identifier pp token (before macro expansion), or +/// • <, ", or : (but not ::) pp tokens for 'import', or +/// • ; for 'module' +/// Otherwise the token is treated as an identifier. +bool Preprocessor::HandleModuleContextualKeyword(Token &Result) { + if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword() || + InMacroArgs || DisableMacroExpansion || + CurLexerCallback == CLK_CachingLexer) + return false; + + if (Result.is(tok::kw_export)) { + LastTokenWasExportKeyword = Result; + return false; + } + + if (LastTokenWasExportKeyword) { + if (!LastTokenWasExportKeyword->isAtStartOfLine()) + return false; + // [cpp.pre]/1.4 + // export // not a preprocessing directive + // import foo; // preprocessing directive (ill-formed at phase + // 7) + if (Result.isAtStartOfLine()) + return false; + } else if (!Result.isAtStartOfLine()) + return false; + + // Peek next token. + auto NextTok = peekNextPPToken().value_or(Token{}); + if (Result.getIdentifierInfo()->isModulesImport() && + NextTok.isOneOf(tok::raw_identifier, tok::less, tok::string_literal, + tok::colon)) { + Result.setKind(tok::kw_import); + ModuleImportLoc = Result.getLocation(); + IsAtImport = false; + CurLexerCallback = CLK_LexAfterModuleImport; + return true; + } + if (Result.getIdentifierInfo()->isModulesDeclaration() && + NextTok.isOneOf(tok::raw_identifier, tok::colon, tok::semi)) { + Result.setKind(tok::kw_module); + NamedModuleImportPath.clear(); + CurLexerCallback = CLK_LexAfterModuleDecl; + return true; + } + + // Ok, it's an identifier. + return false; +} + /// Lex a header-name token (including one formed from header-name-tokens if /// \p AllowMacroExpansion is \c true). /// diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 713e32b1a313f3..8893c89b3d5b93 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4002,13 +4002,6 @@ void Parser::ParseDeclarationSpecifiers( // We're done with the declaration-specifiers. goto DoneWithDeclSpec; - case tok::annot_module_name: { - PP.EnterTokenStream( - Tok.getAnnotationValueAs<ModuleNameInfo *>()->getTokens(), - /*DisableMacroExpansion=*/true, /*IsReinject=*/false); - ConsumeAnyToken(); - [[fallthrough]]; - } // typedef-name case tok::kw___super: case tok::kw_decltype: diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index afb2e1e4161682..443f16b5c0da93 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -642,6 +642,8 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, case tok::kw_export: switch (NextToken().getKind()) { + case tok::kw_import: + goto import_decl; case tok::kw_module: goto module_decl; @@ -652,14 +654,14 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, // Recognize context-sensitive C++20 'export module' and 'export import' // declarations. case tok::identifier: { - IdentifierInfo *II = NextToken().getIdentifierInfo(); - if ((II == Ident_module || II == Ident_import) && - GetLookAheadToken(2).isNot(tok::coloncolon)) { - if (II == Ident_module) - goto module_decl; - else - goto import_decl; - } + // IdentifierInfo *II = NextToken().getIdentifierInfo(); + // if ((II == Ident_module || II == Ident_import) && + // GetLookAheadToken(2).isNot(tok::coloncolon)) { + // if (II == Ident_module) + // goto module_decl; + // else + // goto import_decl; + // } break; } @@ -736,14 +738,14 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, // A token sequence beginning with 'export[opt] module' or // 'export[opt] import' and not immediately followed by '::' // is never interpreted as the declaration of a top-level-declaration. - if ((Tok.getIdentifierInfo() == Ident_module || - Tok.getIdentifierInfo() == Ident_import) && - NextToken().isNot(tok::coloncolon)) { - if (Tok.getIdentifierInfo() == Ident_module) - goto module_decl; - else - goto import_decl; - } + // if ((Tok.getIdentifierInfo() == Ident_module || + // Tok.getIdentifierInfo() == Ident_import) && + // NextToken().isNot(tok::coloncolon)) { + // if (Tok.getIdentifierInfo() == Ident_module) + // goto module_decl; + // else + // goto import_decl; + // } break; default: diff --git a/clang/test/CXX/basic/basic.link/p3.cpp b/clang/test/CXX/basic/basic.link/p3.cpp index 23f39d11b655a6..8793162d8957e3 100644 --- a/clang/test/CXX/basic/basic.link/p3.cpp +++ b/clang/test/CXX/basic/basic.link/p3.cpp @@ -6,6 +6,8 @@ module; #if IMPORT_ERROR != 2 struct import { struct inner {}; }; +#else +// expected-no-diagnostics #endif struct module { struct inner {}; }; @@ -24,9 +26,9 @@ template<> struct import<n> { static X y; }; -// This is not valid because the 'import <n>' is a pp-import, even though it -// grammatically can't possibly be an import declaration. -struct X {} import<n>::y; // expected-error {{'n' file not found}} +// Well-formed since P1857R3: Modules Dependency Discovery (https://wg21.link/p1857r3), +// it grammatically can't possibly be an import declaration. +struct X {} import<n>::y; #else module y = {}; // expected-error {{multiple module declarations}} expected-error 2{{}} @@ -38,9 +40,10 @@ ::module y = {}; import::inner xi = {}; module::inner yi = {}; +// Ill-formed since P1857R3: Modules Dependency Discovery (https://wg21.link/p1857r3). namespace N { - module a; - import b; + module a; // expected-error {{module declaration can only appear at the top level}} + import b; // expected-error {{module 'b' not found}} } extern "C++" module cxxm; diff --git a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp index 0e0e5fec6e9d8b..dcde61aa4620c0 100644 --- a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp +++ b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp @@ -15,7 +15,11 @@ import <foo bar>; // CHECK: import <foo bar>; import <foo bar>; -// CHECK: foo; import <foo bar>; +// Since P1857R3, this is a invalid import directive, import will be treated as +// an identifier. Also <foo bar> will not be a tok::header_name, but will be 4 +// separate tokens. +// +// CHECK: foo; import <foo bar>; foo; import <foo bar>; // CHECK: foo import <foo bar>; @@ -45,7 +49,10 @@ export export import <foo bar>; import <foo bar>; UNBALANCED_PAREN -// CHECK: import <foo bar>; +// Since P1857R3, this is a invalid import directive. '<foo bar>' will be treated as +// a tok::header_name, but not 4 separate tokens. + +// CHECK: import <foo bar>; import <foo bar>; ) >From fd71e8ca298d808e6a2a76db77562daa3a09ce8f Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Thu, 15 Aug 2024 23:01:24 +0800 Subject: [PATCH 5/5] [Clang] Fix test and remove dead code Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/Preprocessor.h | 4 +- clang/include/clang/Parse/Parser.h | 4 -- clang/lib/Parse/Parser.cpp | 23 --------- .../dcl.module/dcl.module.import/p1.cppm | 6 ++- .../test/CXX/module/dcl.dcl/dcl.module/p1.cpp | 51 ++++++++++++++----- 5 files changed, 43 insertions(+), 45 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index f4c28b7cf6a54e..c6f1709aa874eb 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -617,8 +617,8 @@ class Preprocessor { void handleModuleName(Token ModuleName) { assert(ModuleName.is(tok::annot_module_name) && "Expect a module name"); if (isModuleCandidate()) { - Name = ModuleName.getAnnotationValueAs<ModuleNameInfo *>() - ->getFlatName(); + Name = + ModuleName.getAnnotationValueAs<ModuleNameInfo *>()->getFlatName(); } else if (!isNamedModule()) reset(); } diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 31c48f8805f4dc..7c9c569b9d2e46 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -165,10 +165,6 @@ class Parser : public CodeCompletionHandler { mutable IdentifierInfo *Ident_GNU_final; mutable IdentifierInfo *Ident_override; - // C++2a contextual keywords. - mutable IdentifierInfo *Ident_import; - mutable IdentifierInfo *Ident_module; - // C++ type trait keywords that can be reverted to identifiers and still be // used as type traits. llvm::SmallDenseMap<IdentifierInfo *, tok::TokenKind> RevertibleTypeTraits; diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 443f16b5c0da93..a0cf79417ea0a0 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -514,8 +514,6 @@ void Parser::Initialize() { Ident_abstract = nullptr; Ident_override = nullptr; Ident_GNU_final = nullptr; - Ident_import = nullptr; - Ident_module = nullptr; Ident_super = &PP.getIdentifierTable().get("super"); @@ -571,11 +569,6 @@ void Parser::Initialize() { PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block); } - if (getLangOpts().CPlusPlusModules) { - Ident_import = PP.getIdentifierInfo("import"); - Ident_module = PP.getIdentifierInfo("module"); - } - Actions.Initialize(); // Prime the lexer look-ahead. @@ -732,22 +725,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, Actions.ActOnEndOfTranslationUnit(); //else don't tell Sema that we ended parsing: more input might come. return true; - - case tok::identifier: - // C++2a [basic.link]p3: - // A token sequence beginning with 'export[opt] module' or - // 'export[opt] import' and not immediately followed by '::' - // is never interpreted as the declaration of a top-level-declaration. - // if ((Tok.getIdentifierInfo() == Ident_module || - // Tok.getIdentifierInfo() == Ident_import) && - // NextToken().isNot(tok::coloncolon)) { - // if (Tok.getIdentifierInfo() == Ident_module) - // goto module_decl; - // else - // goto import_decl; - // } - break; - default: break; } diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm index ecad4db32a7e94..1c67be2ff774f3 100644 --- a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm +++ b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm @@ -48,7 +48,8 @@ import x [[blarg::noreturn]]; // expected-warning {{unknown attribute 'noreturn' import x.y; import x.; // expected-error {{expected a module name after 'import'}} -import .x; // expected-error {{expected a module name after 'import'}} +import .x; // expected-error {{unknown type name 'import'}} \ + // expected-error {{cannot use dot operator on a type}} int use_4 = c; // ok @@ -66,7 +67,8 @@ import x [[blarg::noreturn]]; // expected-warning {{unknown attribute 'noreturn' import x.y; import x.; // expected-error {{expected a module name after 'import'}} -import .x; // expected-error {{expected a module name after 'import'}} +import .x; // expected-error {{unknown type name 'import'}} \ + // expected-error {{cannot use dot operator on a type}} int use_4 = c; // ok diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/p1.cpp b/clang/test/CXX/module/dcl.dcl/dcl.module/p1.cpp index db86b5dd34c380..e12a0106cb2f4b 100644 --- a/clang/test/CXX/module/dcl.dcl/dcl.module/p1.cpp +++ b/clang/test/CXX/module/dcl.dcl/dcl.module/p1.cpp @@ -1,14 +1,37 @@ -// RUN: %clang_cc1 -std=c++20 -verify %s -DFOO=export -DBAR=export -// RUN: %clang_cc1 -std=c++20 -verify %s -DFOO=export -DBAR= -// RUN: %clang_cc1 -std=c++20 %s -DFOO=export -emit-module-interface -o %t -// RUN: %clang_cc1 -std=c++20 %s -fmodule-file=foo=%t -DFOO= -// RUN: %clang_cc1 -std=c++20 %s -fmodule-file=foo=%t -DBAR=export -// RUN: %clang_cc1 -std=c++20 -verify %s -fmodule-file=foo=%t -DFOO= -DBAR=export - -#ifdef FOO -FOO module foo; // expected-note {{previous module declaration is here}} -#endif - -#ifdef BAR -BAR module bar; // expected-error {{translation unit contains multiple module declarations}} -#endif +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 -verify %t/A.cpp +// RUN: %clang_cc1 -std=c++20 -verify %t/B.cpp +// RUN: %clang_cc1 -std=c++20 %t/C.cpp -emit-module-interface -o %t/C.pcm +// RUN: %clang_cc1 -std=c++20 %t/D.cpp -fmodule-file=foo=%t/C.pcm +// RUN: %clang_cc1 -std=c++20 %t/E.cpp -fmodule-file=foo=%t/C.pcm +// RUN: %clang_cc1 -std=c++20 -verify %t/F.cpp -fmodule-file=foo=%t/C.pcm + +//--- A.cpp +module; +export module foo; // expected-note {{previous module declaration is here}} +export module bar; // expected-error {{translation unit contains multiple module declarations}} + +//--- B.cpp +module; +export module foo; // expected-note {{previous module declaration is here}} +module bar; // expected-error {{translation unit contains multiple module declarations}} + +//--- C.cpp +module; +export module foo; + +//--- D.cpp +module; +module foo; + +//--- E.cpp +module; +export module bar; + +//--- F.cpp +module; +module foo; // expected-note {{previous module declaration is here}} +export module bar; // expected-error {{translation unit contains multiple module declarations}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits