llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-static-analyzer-1 Author: Balazs Benics (steakhal) <details> <summary>Changes</summary> Previously, ExprEngine would just skip copying empty structs. Let's make trigger the copy event even for empty structs. Split from #<!-- -->114835 --- Full diff: https://github.com/llvm/llvm-project/pull/115918.diff 3 Files Affected: - (modified) clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp (+9-17) - (modified) clang/lib/StaticAnalyzer/Core/RegionStore.cpp (+44-9) - (modified) clang/test/Analysis/ctor-trivial-copy.cpp (+95-7) ``````````diff diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index ccc3097e8d2f97..17ee1f7c945edd 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -68,23 +68,15 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred, Bldr.takeNodes(Pred); assert(ThisRD); - if (!ThisRD->isEmpty()) { - // Load the source value only for non-empty classes. - // Otherwise it'd retrieve an UnknownVal - // and bind it and RegionStore would think that the actual value - // in this region at this offset is unknown. - SVal V = Call.getArgSVal(0); - - // If the value being copied is not unknown, load from its location to get - // an aggregate rvalue. - if (std::optional<Loc> L = V.getAs<Loc>()) - V = Pred->getState()->getSVal(*L); - else - assert(V.isUnknownOrUndef()); - evalBind(Dst, CallExpr, Pred, ThisVal, V, true); - } else { - Dst.Add(Pred); - } + SVal V = Call.getArgSVal(0); + + // If the value being copied is not unknown, load from its location to get + // an aggregate rvalue. + if (std::optional<Loc> L = V.getAs<Loc>()) + V = Pred->getState()->getSVal(*L); + else + assert(V.isUnknownOrUndef()); + evalBind(Dst, CallExpr, Pred, ThisVal, V, true); PostStmt PS(CallExpr, LCtx); for (ExplodedNode *N : Dst) { diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 6bad9a93a30169..b54ba43ff42414 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -608,6 +608,12 @@ class RegionStoreManager : public StoreManager { return getBinding(getRegionBindings(S), L, T); } + /// Returns the value of the default binding of region \p BaseR + /// if and only if that is the unique binding in the cluster of \p BaseR. + /// \p BaseR must be a base region. + std::optional<SVal> getUniqueDefaultBinding(Store S, + const MemRegion *BaseR) const; + std::optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override { RegionBindingsRef B = getRegionBindings(S); // Default bindings are always applied over a base region so look up the @@ -2336,20 +2342,16 @@ NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B, return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R); } -static bool isRecordEmpty(const RecordDecl *RD) { - if (!RD->field_empty()) - return false; - if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) - return CRD->getNumBases() == 0; - return true; -} - SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B, const TypedValueRegion *R) { const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl(); - if (!RD->getDefinition() || isRecordEmpty(RD)) + if (!RD->getDefinition()) return UnknownVal(); + // We also create a LCV for copying empty structs because then the store + // behavior doesn't depend on the struct layout. + // This way even an empty struct can carry taint, no matter if creduce drops + // the last field member or not. return createLazyBinding(B, R); } @@ -2609,9 +2611,42 @@ RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B, return NewB; } +std::optional<SVal> +RegionStoreManager::getUniqueDefaultBinding(Store S, + const MemRegion *BaseR) const { + assert(BaseR == BaseR->getBaseRegion() && "Expecting a base region"); + const auto *Cluster = getRegionBindings(S).lookup(BaseR); + if (!Cluster || !llvm::hasSingleElement(*Cluster)) + return std::nullopt; + + const auto [Key, Value] = *Cluster->begin(); + return Key.isDirect() ? std::optional<SVal>{} : Value; +} + std::optional<RegionBindingsRef> RegionStoreManager::tryBindSmallStruct( RegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD, nonloc::LazyCompoundVal LCV) { + // If we try to copy a Conjured value representing the value of the whole + // struct, don't try to element-wise copy each field. + // That would unnecessarily bind Derived symbols slicing off the subregion for + // the field from the whole Conjured symbol. + // + // struct Window { int width; int height; }; + // Window getWindow(); <-- opaque fn. + // Window w = getWindow(); <-- conjures a new Window. + // Window w2 = w; <-- trivial copy "w", calling "tryBindSmallStruct" + // + // We should not end up with a new Store for "w2" like this: + // Direct [ 0..31]: Derived{Conj{}, w.width} + // Direct [32..63]: Derived{Conj{}, w.height} + // Instead, we should just bind that Conjured value instead. + if (LCV.getRegion()->getBaseRegion() == LCV.getRegion()) { + if (auto Val = getUniqueDefaultBinding(LCV.getStore(), LCV.getRegion())) { + return B.addBinding(BindingKey::Make(R, BindingKey::Default), + Val.value()); + } + } + FieldVector Fields; if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD)) diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp index 5ed188aa8f1eae..41d0d97161bba1 100644 --- a/clang/test/Analysis/ctor-trivial-copy.cpp +++ b/clang/test/Analysis/ctor-trivial-copy.cpp @@ -1,8 +1,12 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s \ +// RUN: 2>&1 | FileCheck %s -template<typename T> -void clang_analyzer_dump(T&); +void clang_analyzer_printState(); +template<typename T> void clang_analyzer_dump_lref(T&); +template<typename T> void clang_analyzer_dump_val(T); +template <typename T> T conjure(); +template <typename... Ts> void nop(const Ts &... args) {} struct aggr { int x; @@ -15,20 +19,104 @@ struct empty { void test_copy_return() { aggr s1 = {1, 2}; aggr const& cr1 = aggr(s1); - clang_analyzer_dump(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} empty s2; empty const& cr2 = empty{s2}; - clang_analyzer_dump(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} } void test_assign_return() { aggr s1 = {1, 2}; aggr d1; - clang_analyzer_dump(d1 = s1); // expected-warning {{&d1 }} + clang_analyzer_dump_lref(d1 = s1); // expected-warning {{&d1 }} empty s2; empty d2; - clang_analyzer_dump(d2 = s2); // expected-warning {{&d2 }} was Unknown + clang_analyzer_dump_lref(d2 = s2); // expected-warning {{&d2 }} was Unknown } + +namespace trivial_struct_copy { + +void _01_empty_structs() { + clang_analyzer_dump_val(conjure<empty>()); // expected-warning {{lazyCompoundVal}} + empty Empty = conjure<empty>(); + empty Empty2 = Empty; + empty Empty3 = Empty2; + // All of these should refer to the exact same LCV, because all of + // these trivial copies refer to the original conjured value. + // There were Unknown before: + clang_analyzer_dump_val(Empty); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}} + + // We should have the same Conjured symbol for "Empty", "Empty2" and "Empty3". + clang_analyzer_printState(); + // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Empty", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" } + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Empty2", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" } + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Empty3", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" } + // CHECK-NEXT: ]} + // CHECK-NEXT: ]}, + + nop(Empty, Empty2, Empty3); +} + +void _02_structs_with_members() { + clang_analyzer_dump_val(conjure<aggr>()); // expected-warning {{lazyCompoundVal}} + aggr Aggr = conjure<aggr>(); + aggr Aggr2 = Aggr; + aggr Aggr3 = Aggr2; + // All of these should refer to the exact same LCV, because all of + // these trivial copies refer to the original conjured value. + clang_analyzer_dump_val(Aggr); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Aggr2); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Aggr3); // expected-warning {{lazyCompoundVal}} + + // We should have the same Conjured symbol for "Aggr", "Aggr2" and "Aggr3". + // We used to have Derived symbols for the individual fields that were + // copied as part of copying the whole struct. + clang_analyzer_printState(); + // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Aggr", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" } + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Aggr2", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" } + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Aggr3", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" } + // CHECK-NEXT: ]} + // CHECK-NEXT: ]}, + + nop(Aggr, Aggr2, Aggr3); +} + +// Tests that use `clang_analyzer_printState()` must share the analysis entry +// point, and have a strict ordering between. This is to meet the different +// `clang_analyzer_printState()` calls in a fixed relative ordering, thus +// FileCheck could check the stdouts. +void entrypoint() { + _01_empty_structs(); + _02_structs_with_members(); +} + +} // namespace trivial_struct_copy `````````` </details> https://github.com/llvm/llvm-project/pull/115918 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits