Author: Dmitry Polukhin Date: 2024-09-25T08:31:49+01:00 New Revision: 2ccac07bf22d17029d4437b0a727dd55c8c86d56
URL: https://github.com/llvm/llvm-project/commit/2ccac07bf22d17029d4437b0a727dd55c8c86d56 DIFF: https://github.com/llvm/llvm-project/commit/2ccac07bf22d17029d4437b0a727dd55c8c86d56.diff LOG: [C++20][Modules] Fix crash when function and lambda inside loaded from different modules (#109167) Summary: Because AST loading code is lazy and happens in unpredictable order, it is possible that a function and lambda inside the function can be loaded from different modules. As a result, the captured DeclRefExpr won’t match the corresponding VarDecl inside the function. This situation is reflected in the AST as follows: ``` FunctionDecl 0x555564f4aff0 <Conv.h:33:1, line:41:1> line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected<Tgt, const char *> ()' inline |-also in ./folly-conv.h `-CompoundStmt 0x555564f7cfc8 <col:43, line:41:1> |-DeclStmt 0x555564f7ced8 <line:34:3, col:17> | `-VarDecl 0x555564f7cef8 <col:3, col:16> col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit | `-IntegerLiteral 0x555564f7d080 <col:16> 'int' 0 |-CallExpr 0x555564f7cea8 <line:39:3, col:76> '<dependent type>' | |-UnresolvedLookupExpr 0x555564f7bea0 <col:3, col:19> '<overloaded function type>' lvalue (no ADL) = 'then_' 0x555564f7bef0 | |-CXXTemporaryObjectExpr 0x555564f7bcb0 <col:25, col:45> 'Expected<bool, int>':'folly::Expected<bool, int>' 'void () noexcept' zeroing | `-LambdaExpr 0x555564f7bc88 <col:48, col:75> '(lambda at Conv.h:39:48)' | |-CXXRecordDecl 0x555564f76b88 <col:48> col:48 imported in ./folly-conv.h hidden implicit <undeserialized declarations> class definition | | |-also in ./thrift_cpp2_base.h | | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init | | |-DefaultConstructor defaulted_is_constexpr | | |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveConstructor exists simple trivial needs_implicit | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveAssignment | | `-Destructor simple irrelevant trivial constexpr needs_implicit | `-CompoundStmt 0x555564f7d1a8 <col:58, col:75> | `-ReturnStmt 0x555564f7d198 <col:60, col:67> | `-DeclRefExpr 0x555564f7d0a0 <col:67> 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture `-ReturnStmt 0x555564f7bc78 <line:40:3, col:11> `-InitListExpr 0x555564f7bc38 <col:10, col:11> 'void' ``` This diff modifies the AST deserialization process to load lambdas within the canonical function declaration sooner, immediately following the function, ensuring that they are loaded from the same module. Re-land https://github.com/llvm/llvm-project/pull/104512 Added test case that caused crash due to multiple enclosed lambdas deserialization. Test Plan: check-clang Added: clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp Modified: clang/include/clang/Serialization/ASTBitCodes.h clang/include/clang/Serialization/ASTReader.h clang/include/clang/Serialization/ASTWriter.h clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTReaderDecl.cpp clang/lib/Serialization/ASTWriter.cpp clang/lib/Serialization/ASTWriterDecl.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 4410df296d8efc..5be33ae0ed1b98 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -724,6 +724,12 @@ enum ASTRecordTypes { /// Record code for vtables to emit. VTABLES_TO_EMIT = 70, + + /// Record code for the FunctionDecl to lambdas mapping. These lambdas have to + /// be loaded right after the function they belong to. It is required to have + /// canonical declaration for the lambda class from the same module as + /// enclosing function. + FUNCTION_DECL_TO_LAMBDAS_MAP = 71, }; /// Record types used within a source manager block. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 898f4392465fdf..c1843218a4b8b1 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -532,6 +532,18 @@ class ASTReader /// namespace as if it is not delayed. DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap; + /// Mapping from FunctionDecl IDs to the corresponding lambda IDs. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// It is required to have canonical declaration for lambda class from the + /// same module as enclosing function. This is required to correctly resolve + /// captured variables in the lambda. Without this, due to lazy + /// deserialization, canonical declarations for the function and lambdas can + /// be selected from diff erent modules and DeclRefExprs may refer to the AST + /// nodes that don't exist in the function. + llvm::DenseMap<GlobalDeclID, SmallVector<GlobalDeclID, 4>> + FunctionToLambdasMap; + struct PendingUpdateRecord { Decl *D; GlobalDeclID ID; diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 10a50b711043a8..760866fd9de938 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -233,6 +233,14 @@ class ASTWriter : public ASTDeserializationListener, /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`. llvm::SmallPtrSet<const Decl *, 32> PredefinedDecls; + /// Mapping from FunctionDecl to the list of lambda IDs inside the function. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// In order to have canonical declaration for lambda class from the same + /// module as enclosing function during deserialization. + llvm::DenseMap<const Decl *, SmallVector<LocalDeclID, 4>> + FunctionToLambdasMap; + /// Offset of each declaration in the bitstream, indexed by /// the declaration's ID. std::vector<serialization::DeclOffset> DeclOffsets; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index ede3070787722d..a369ad0be47954 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3856,6 +3856,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, break; } + case FUNCTION_DECL_TO_LAMBDAS_MAP: + for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) { + GlobalDeclID ID = ReadDeclID(F, Record, I); + auto &Lambdas = FunctionToLambdasMap[ID]; + unsigned NN = Record[I++]; + Lambdas.reserve(NN); + for (unsigned II = 0; II < NN; II++) + Lambdas.push_back(ReadDeclID(F, Record, I)); + } + break; + case OBJC_CATEGORIES_MAP: if (F.LocalNumObjCCategoriesInMap != 0) return llvm::createStringError( diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 9272e23c7da3fc..7cead2728ca938 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -4351,6 +4351,16 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod)); DC->setHasExternalVisibleStorage(true); } + + // Load any pending lambdas for the function. + if (auto *FD = dyn_cast<FunctionDecl>(D); FD && FD->isCanonicalDecl()) { + if (auto IT = FunctionToLambdasMap.find(ID); + IT != FunctionToLambdasMap.end()) { + for (auto LID : IT->second) + GetDecl(LID); + FunctionToLambdasMap.erase(IT); + } + } } void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4ee14b1e260159..f326e3c2e2ff7a 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -903,6 +903,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(PENDING_IMPLICIT_INSTANTIATIONS); RECORD(UPDATE_VISIBLE); RECORD(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD); + RECORD(FUNCTION_DECL_TO_LAMBDAS_MAP); RECORD(DECL_UPDATE_OFFSETS); RECORD(DECL_UPDATES); RECORD(CUDA_SPECIAL_DECL_REFS); @@ -5707,6 +5708,27 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { Stream.EmitRecord(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD, DelayedNamespaceRecord); + if (!FunctionToLambdasMap.empty()) { + // TODO: on disk hash table for function to lambda mapping might be more + // efficent becuase it allows lazy deserialization. + RecordData FunctionToLambdasMapRecord; + for (const auto &Pair : FunctionToLambdasMap) { + FunctionToLambdasMapRecord.push_back( + GetDeclRef(Pair.first).getRawValue()); + FunctionToLambdasMapRecord.push_back(Pair.second.size()); + for (const auto &Lambda : Pair.second) + FunctionToLambdasMapRecord.push_back(Lambda.getRawValue()); + } + + auto Abv = std::make_shared<llvm::BitCodeAbbrev>(); + Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION_DECL_TO_LAMBDAS_MAP)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); + unsigned FunctionToLambdaMapAbbrev = Stream.EmitAbbrev(std::move(Abv)); + Stream.EmitRecord(FUNCTION_DECL_TO_LAMBDAS_MAP, FunctionToLambdasMapRecord, + FunctionToLambdaMapAbbrev); + } + const TranslationUnitDecl *TU = Context.getTranslationUnitDecl(); // Create a lexical update block containing all of the declarations in the // translation unit that do not come from other AST files. diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 555f6325da646b..50c090b195d619 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1521,6 +1521,11 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { } else { Record.push_back(0); } + // For lambdas inside canonical FunctionDecl remember the mapping. + if (auto FD = llvm::dyn_cast_or_null<FunctionDecl>(D->getDeclContext()); + FD && FD->isCanonicalDecl()) { + Writer.FunctionToLambdasMap[FD].push_back(Writer.GetDeclRef(D)); + } } else { Record.push_back(CXXRecNotTemplate); } diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp new file mode 100644 index 00000000000000..80844a58ad825a --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp @@ -0,0 +1,76 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h + +//--- Conv.h +#pragma once + +template <typename _Tp, typename _Up = _Tp&&> +_Up __declval(int); + +template <typename _Tp> +auto declval() noexcept -> decltype(__declval<_Tp>(0)); + +namespace folly { + +template <class Value, class Error> +struct Expected { + template <class Yes> + auto thenOrThrow() -> decltype(declval<Value&>()) { + return 1; + } +}; + +struct ExpectedHelper { + template <class Error, class T> + static constexpr Expected<T, Error> return_(T) { + return Expected<T, Error>(); + } + + template <class This, class Fn, class E = int, class T = ExpectedHelper> + static auto then_(This&&, Fn&&) + -> decltype(T::template return_<E>((declval<Fn>()(true), 0))) { + return Expected<int, int>(); + } +}; + +template <class Tgt> +inline Expected<Tgt, const char*> tryTo() { + Tgt result = 0; + // In build with asserts: + // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion<Decl *, LocalInstantiationScope::DeclArgumentPack *> *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa<LabelDecl>(D) && "declaration not instantiated in this scope"' failed. + // In release build compilation error on the line below inside lambda: + // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized] + ExpectedHelper::then_(Expected<bool, int>(), [&](bool) { return result; }); + return {}; +} + +} // namespace folly + +inline void bar() { + folly::tryTo<int>(); +} +// expected-no-diagnostics + +//--- folly-conv.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- thrift_cpp2_base.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- logger_base.h +#pragma once +import "folly-conv.h"; +import "thrift_cpp2_base.h"; + +inline void foo() { + folly::tryTo<unsigned>(); +} +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp new file mode 100644 index 00000000000000..5b1a904e928a68 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp @@ -0,0 +1,30 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h +// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp + +//--- header.h +template <typename T> +void f(T) {} + +class A { + virtual ~A(); +}; + +inline A::~A() { + f([](){}); +} + +struct B { + void g() { + f([](){ + [](){}; + }); + } +}; +// expected-no-diagnostics + +//--- main.cpp +import "header.h"; +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp new file mode 100644 index 00000000000000..646ff9f745710b --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 %s -std=c++11 -emit-pch -o %t +// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -fsyntax-only -verify + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// No crash or assertion failure on multiple nested lambdas deserialization. +template <typename T> +void b() { + [] { + []{ + []{ + []{ + []{ + }(); + }(); + }(); + }(); + }(); +} + +void foo() { + b<int>(); +} +#endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits