llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-modules Author: Michael Jabbour (michael-jabbour-sonarsource) <details> <summary>Changes</summary> ## Problem This is a regression that was observed in Clang 20. The lazy-loading mechanism for template specializations introduced in #<!-- -->119333 can currently load additional nodes when called multiple times, which breaks assumptions made by code that iterates over specializations. This leads to iterator invalidation crashes in some scenarios. The core issue occurs when: 1. Code calls `spec_begin()` to get an iterator over template specializations. This invokes `LoadLazySpecializations()`. 2. Code then calls `spec_end()` to get the end iterator. 3. During the `spec_end()` call, `LoadExternalSpecializations()` is invoked again. 4. This can load additional specializations for certain cases, invalidating the begin iterator returned in 1. I was able to trigger the problem when constructing a ParentMapContext. The regression test demonstrates two ways to trigger the construction of the ParentMapContext on problematic code: - The ArrayBoundV2 checker - Unsigned overflow detection in sanitized builds Unfortunately, simply dumping the ast (e.g. using `-ast-dump-all`) doesn't trigger the crash because dumping requires completing the redecl chain before iterating over the specializations. ## Solution The fix ensures that the redeclaration chain is always completed **before** loading external specializations by calling `CompleteRedeclChain(D)` at the start of `LoadExternalSpecializations()`. The idea is to ensure that all `SpecLookups` are fully known and loaded before the call to `LoadExternalSpecializationsImpl()`. --- Full diff: https://github.com/llvm/llvm-project/pull/150430.diff 2 Files Affected: - (modified) clang/lib/Serialization/ASTReader.cpp (+1) - (added) clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp (+99) ``````````diff diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 10aedb68fcd9d..f896f9f11c2b3 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -8488,6 +8488,7 @@ bool ASTReader::LoadExternalSpecializationsImpl(SpecLookupTableTy &SpecLookups, bool ASTReader::LoadExternalSpecializations(const Decl *D, bool OnlyPartial) { assert(D); + CompleteRedeclChain(D); bool NewSpecsFound = LoadExternalSpecializationsImpl(PartialSpecializationsLookups, D); if (OnlyPartial) diff --git a/clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp b/clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp new file mode 100644 index 0000000000000..2e4489bc5f437 --- /dev/null +++ b/clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp @@ -0,0 +1,99 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file --leading-lines %s %t +// +// Prepare the BMIs. +// RUN: %clang_cc1 -std=c++20 -emit-module-interface -o %t/mod_a-part1.pcm %t/mod_a-part1.cppm +// RUN: %clang_cc1 -std=c++20 -emit-module-interface -o %t/mod_a-part2.pcm %t/mod_a-part2.cppm +// RUN: %clang_cc1 -std=c++20 -emit-module-interface -o %t/mod_a.pcm %t/mod_a.cppm -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm +// RUN: %clang_cc1 -std=c++20 -emit-module-interface -o %t/mod_b.pcm %t/mod_b.cppm -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm -fmodule-file=mod_a=%t/mod_a.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm + +// Below are two examples to trigger the construction of the parent map (which is necessary to trigger the bug this regression test is for). +// Using ArrayBoundV2 checker: +// RUN: %clang_cc1 -std=c++20 -analyze -analyzer-checker=security,alpha.security -analyzer-output=text %t/test-array-bound-v2.cpp -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm -fmodule-file=mod_a=%t/mod_a.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm -fmodule-file=mod_b=%t/mod_b.pcm +// Using a sanitized build: +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fsanitize=unsigned-integer-overflow -fsanitize-undefined-ignore-overflow-pattern=all -emit-llvm -o %t/ignored %t/test-sanitized-build.cpp -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm -fmodule-file=mod_a=%t/mod_a.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm -fmodule-file=mod_b=%t/mod_b.pcm + +//--- mod_a-part1.cppm +module; +namespace mod_a { +template <int> struct Important; +} + +namespace mod_a { +Important<0>& instantiate1(); +} // namespace mod_a +export module mod_a:part1; + +export namespace mod_a { +using ::mod_a::instantiate1; +} + +//--- mod_a-part2.cppm +module; +namespace mod_a { +template <int> struct Important; +} + +namespace mod_a { +template <int N> Important<N>& instantiate2(); +namespace part2InternalInstantiations { +// During the construction of the parent map, we iterate over ClassTemplateDecl::specializations() for 'Important'. +// After GH119333, the following instantiations get loaded between the call to spec_begin() and spec_end(). +// This used to invalidate the begin iterator returned by spec_begin() by the time the end iterator is returned. +// This is a regression test for that. +Important<1> fn1(); +Important<2> fn2(); +Important<3> fn3(); +Important<4> fn4(); +Important<5> fn5(); +Important<6> fn6(); +Important<7> fn7(); +Important<8> fn8(); +Important<9> fn9(); +Important<10> fn10(); +Important<11> fn11(); +} +} // namespace mod_a +export module mod_a:part2; + +export namespace mod_a { +using ::mod_a::instantiate2; +} + +//--- mod_a.cppm +export module mod_a; +export import :part1; +export import :part2; + +//--- mod_b.cppm +export module mod_b; +import mod_a; + +void a() { + mod_a::instantiate1(); + mod_a::instantiate2<42>(); +} + +//--- test-array-bound-v2.cpp +import mod_b; + +extern void someFunc(char* first, char* last); +void triggerParentMapContextCreationThroughArrayBoundV2() { + // This code currently causes the ArrayBoundV2 checker to create the ParentMapContext. + // Once it detects an access to buf[100], the checker looks through the parents find '&' operator. + // (since taking the address of past-the-end pointer is allowed by the checker) + char buf[100]; + someFunc(&buf[0], &buf[100]); +} + +//--- test-sanitized-build.cpp +import mod_b; + +extern void some(); +void triggerParentMapContextCreationThroughSanitizedBuild(unsigned i) { + // This code currently causes UBSan to create the ParentMapContext. + // UBSan currently excludes the pattern below to avoid noise, and it relies on ParentMapContext to detect it. + while (i--) + some(); +} `````````` </details> https://github.com/llvm/llvm-project/pull/150430 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits