llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-analysis Author: None (martinboehme) <details> <summary>Changes</summary> - [clang][dataflow] Strengthen widening of boolean values. - [clang][dataflow] Defer initialization of `Environment`. - [clang][dataflow] Add synthetic fields to `RecordStorageLocation`. - [clang][dataflow] Make UncheckedOptionalAccessModel use synthetic fields. --- Patch is 66.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73518.diff 19 Files Affected: - (modified) clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h (+17-1) - (modified) clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h (+46) - (modified) clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h (+19-6) - (modified) clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h (+1-12) - (modified) clang/include/clang/Analysis/FlowSensitive/RecordOps.h (+9-8) - (modified) clang/include/clang/Analysis/FlowSensitive/StorageLocation.h (+26-6) - (modified) clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp (+39-1) - (modified) clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (+37-57) - (modified) clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp (+4) - (modified) clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp (+108-270) - (modified) clang/lib/Analysis/FlowSensitive/RecordOps.cpp (+24) - (modified) clang/lib/Analysis/FlowSensitive/Transfer.cpp (+12-14) - (modified) clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp (+59-2) - (modified) clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp (+5) - (modified) clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp (+50-9) - (modified) clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp (+6-3) - (modified) clang/unittests/Analysis/FlowSensitive/TestingSupport.h (+3-1) - (modified) clang/unittests/Analysis/FlowSensitive/TransferTest.cpp (+28) - (modified) clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp (+2-2) ``````````diff diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h index c5f14cfcd4f7bee..1dffbe8a09c3609 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h @@ -234,6 +234,22 @@ runDataflowAnalysis( return std::move(BlockStates); } +// Create an analysis class that is derived from `DataflowAnalysis`. This is an +// SFINAE adapter that allows us to call two different variants of constructor +// (either with or without the optional `Environment` parameter). +// FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment` +// parameter in their constructor so that we can get rid of this abomination. +template <typename AnalysisT> +auto createAnalysis(ASTContext &ASTCtx, Environment &Env) + -> decltype(AnalysisT(ASTCtx, Env)) { + return AnalysisT(ASTCtx, Env); +} +template <typename AnalysisT> +auto createAnalysis(ASTContext &ASTCtx, Environment &Env) + -> decltype(AnalysisT(ASTCtx)) { + return AnalysisT(ASTCtx); +} + /// Runs a dataflow analysis over the given function and then runs `Diagnoser` /// over the results. Returns a list of diagnostics for `FuncDecl` or an /// error. Currently, errors can occur (at least) because the analysis requires @@ -262,7 +278,7 @@ llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction( const WatchedLiteralsSolver *Solver = OwnedSolver.get(); DataflowAnalysisContext AnalysisContext(std::move(OwnedSolver)); Environment Env(AnalysisContext, FuncDecl); - AnalysisT Analysis(ASTCtx); + AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env); llvm::SmallVector<Diagnostic> Diagnostics; if (llvm::Error Err = runTypeErasedDataflowAnalysis( diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h index c1aa8484817a9d9..20e45cc27b01fa8 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -58,6 +58,10 @@ using FieldSet = llvm::SmallSetVector<const FieldDecl *, 4>; /// Returns the set of all fields in the type. FieldSet getObjectFields(QualType Type); +/// Returns whether `Fields` and `FieldLocs` contain the same fields. +bool containsSameFields(const FieldSet &Fields, + const RecordStorageLocation::FieldToLoc &FieldLocs); + struct ContextSensitiveOptions { /// The maximum depth to analyze. A value of zero is equivalent to disabling /// context-sensitive analysis entirely. @@ -92,11 +96,39 @@ class DataflowAnalysisContext { /*Logger=*/nullptr}); ~DataflowAnalysisContext(); + /// Sets a callback that returns the names and types of the synthetic fields + /// to add to a `RecordStorageLocation` of a given type. + /// Typically, this is called from the constructor of a `DataflowAnalysis` + /// + /// To maintain the invariant that all `RecordStorageLocation`s of a given + /// type have the same fields: + /// * The callback must always return the same result for a given type + /// * `setSyntheticFieldCallback()` must be called before any + // `RecordStorageLocation`s are created. + void setSyntheticFieldCallback( + std::function<llvm::StringMap<QualType>(QualType)> CB) { + assert(!RecordStorageLocationCreated); + SyntheticFieldCallback = CB; + } + /// Returns a new storage location appropriate for `Type`. /// /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`. StorageLocation &createStorageLocation(QualType Type); + /// Creates a `RecordStorageLocation` for the given type and with the given + /// fields. + /// + /// Requirements: + /// + /// `FieldLocs` must contain exactly the fields returned by + /// `getModeledFields(Type)`. + /// `SyntheticFields` must contain exactly the fields returned by + /// `getSyntheticFields(Type)`. + RecordStorageLocation &createRecordStorageLocation( + QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, + RecordStorageLocation::SyntheticFieldMap SyntheticFields); + /// Returns a stable storage location for `D`. StorageLocation &getStableStorageLocation(const ValueDecl &D); @@ -169,6 +201,15 @@ class DataflowAnalysisContext { /// context. FieldSet getModeledFields(QualType Type); + /// Returns the names and types of the synthetic fields for the given record + /// type. + llvm::StringMap<QualType> getSyntheticFields(QualType Type) { + assert(Type->isRecordType()); + if (SyntheticFieldCallback) + return SyntheticFieldCallback(Type); + return {}; + } + private: friend class Environment; @@ -250,6 +291,11 @@ class DataflowAnalysisContext { FieldSet ModeledFields; std::unique_ptr<Logger> LogOwner; // If created via flags. + + std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback; + + /// Has any `RecordStorageLocation` been created yet? + bool RecordStorageLocationCreated = false; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 7c1f5491096326b..8502f4da7e5414f 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -166,6 +166,10 @@ class Environment { /// with a symbolic representation of the `this` pointee. Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx); + /// Assigns storage locations and values to all global variables, fields + /// and functions referenced in `FuncDecl`. `FuncDecl` must have a body. + void initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl); + /// Returns a new environment that is a copy of this one. /// /// The state of the program is initially the same, but can be mutated without @@ -283,7 +287,15 @@ class Environment { /// Returns the storage location assigned to the `this` pointee in the /// environment or null if the `this` pointee has no assigned storage location /// in the environment. - RecordStorageLocation *getThisPointeeStorageLocation() const; + RecordStorageLocation *getThisPointeeStorageLocation() const { + return ThisPointeeLoc; + } + + /// Sets the storage location assigned to the `this` pointee in the + /// environment. + void setThisPointeeStorageLocation(RecordStorageLocation &Loc) { + ThisPointeeLoc = &Loc; + } /// Returns the location of the result object for a record-type prvalue. /// @@ -367,7 +379,8 @@ class Environment { /// storage locations and values for indirections until it finds a /// non-pointer/non-reference type. /// - /// If `Type` is a class, struct, or union type, calls `setValue()` to + /// If `Type` is a class, struct, or union type, creates values for all + /// modeled fields (including synthetic fields) and calls `setValue()` to /// associate the `RecordValue` with its storage location /// (`RecordValue::getLoc()`). /// @@ -570,6 +583,9 @@ class Environment { return dyn_cast<FunctionDecl>(getDeclCtx()); } + /// Returns the size of the call stack. + size_t callStackSize() const { return CallStack.size(); } + /// Returns whether this `Environment` can be extended to analyze the given /// `Callee` (i.e. if `pushCall` can be used), with recursion disallowed and a /// given `MaxDepth`. @@ -629,10 +645,7 @@ class Environment { void pushCallInternal(const FunctionDecl *FuncDecl, ArrayRef<const Expr *> Args); - /// Assigns storage locations and values to all global variables, fields - /// and functions referenced in `FuncDecl`. `FuncDecl` must have a body. - void initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl); - +private: // `DACtx` is not null and not owned by this object. DataflowAnalysisContext *DACtx; diff --git a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h index aeaf75b2154222c..09eb8b93822612f 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h +++ b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h @@ -45,7 +45,7 @@ struct UncheckedOptionalAccessModelOptions { class UncheckedOptionalAccessModel : public DataflowAnalysis<UncheckedOptionalAccessModel, NoopLattice> { public: - UncheckedOptionalAccessModel(ASTContext &Ctx); + UncheckedOptionalAccessModel(ASTContext &Ctx, dataflow::Environment &Env); /// Returns a matcher for the optional classes covered by this model. static ast_matchers::DeclarationMatcher optionalClassDecl(); @@ -54,17 +54,6 @@ class UncheckedOptionalAccessModel void transfer(const CFGElement &Elt, NoopLattice &L, Environment &Env); - ComparisonResult compare(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) override; - - bool merge(QualType Type, const Value &Val1, const Environment &Env1, - const Value &Val2, const Environment &Env2, Value &MergedVal, - Environment &MergedEnv) override; - - Value *widen(QualType Type, Value &Prev, const Environment &PrevEnv, - Value &Current, Environment &CurrentEnv) override; - private: CFGMatchSwitch<TransferState<NoopLattice>> TransferMatchSwitch; }; diff --git a/clang/include/clang/Analysis/FlowSensitive/RecordOps.h b/clang/include/clang/Analysis/FlowSensitive/RecordOps.h index f007a947a82f132..7b87840d626b4b6 100644 --- a/clang/include/clang/Analysis/FlowSensitive/RecordOps.h +++ b/clang/include/clang/Analysis/FlowSensitive/RecordOps.h @@ -21,10 +21,10 @@ namespace dataflow { /// Copies a record (struct, class, or union) from `Src` to `Dst`. /// -/// This performs a deep copy, i.e. it copies every field and recurses on -/// fields of record type. It also copies properties from the `RecordValue` -/// associated with `Src` to the `RecordValue` associated with `Dst` (if these -/// `RecordValue`s exist). +/// This performs a deep copy, i.e. it copies every field (including synthetic +/// fields) and recurses on fields of record type. It also copies properties +/// from the `RecordValue` associated with `Src` to the `RecordValue` associated +/// with `Dst` (if these `RecordValue`s exist). /// /// If there is a `RecordValue` associated with `Dst` in the environment, this /// function creates a new `RecordValue` and associates it with `Dst`; clients @@ -47,10 +47,11 @@ void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst, /// retrieved from `Env2`. A convenience overload retrieves values for `Loc1` /// and `Loc2` from the same environment. /// -/// This performs a deep comparison, i.e. it compares every field and recurses -/// on fields of record type. Fields of reference type compare equal if they -/// refer to the same storage location. If `RecordValue`s are associated with -/// `Loc1` and `Loc2`, it also compares the properties on those `RecordValue`s. +/// This performs a deep comparison, i.e. it compares every field (including +/// synthetic fields) and recurses on fields of record type. Fields of reference +/// type compare equal if they refer to the same storage location. If +/// `RecordValue`s are associated with `Loc1` and Loc2`, it also compares the +/// properties on those `RecordValue`s. /// /// Note on how to interpret the result: /// - If this returns true, the records are guaranteed to be equal at runtime. diff --git a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h index 89d2bbfbb69f9fb..b7203378189d6bf 100644 --- a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h +++ b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h @@ -73,7 +73,13 @@ class ScalarStorageLocation final : public StorageLocation { /// /// Contains storage locations for all modeled fields of the record (also /// referred to as "children"). The child map is flat, so accessible members of -/// the base class are directly accesible as children of this location. +/// the base class are directly accessible as children of this location. +/// +/// Record storage locations may also contain so-called synthetic fields. These +/// are typically used to the internal state of a class (e.g. the value stored +/// in a `std::optional`) without having to depend on that class's +/// implementation details. All `RecordStorageLocation`s of a given type should +/// have the same synthetic fields. /// /// The storage location for a field of reference type may be null. This /// typically occurs in one of two situations: @@ -88,12 +94,12 @@ class ScalarStorageLocation final : public StorageLocation { class RecordStorageLocation final : public StorageLocation { public: using FieldToLoc = llvm::DenseMap<const ValueDecl *, StorageLocation *>; + using SyntheticFieldMap = llvm::StringMap<StorageLocation *>; - explicit RecordStorageLocation(QualType Type) - : RecordStorageLocation(Type, FieldToLoc()) {} - - RecordStorageLocation(QualType Type, FieldToLoc TheChildren) - : StorageLocation(Kind::Record, Type), Children(std::move(TheChildren)) { + RecordStorageLocation(QualType Type, FieldToLoc TheChildren, + SyntheticFieldMap TheSyntheticFields) + : StorageLocation(Kind::Record, Type), Children(std::move(TheChildren)), + SyntheticFields(std::move(TheSyntheticFields)) { assert(!Type.isNull()); assert(Type->isRecordType()); assert([this] { @@ -133,6 +139,19 @@ class RecordStorageLocation final : public StorageLocation { return It->second; } + /// Returns the storage location for the synthetic field `Name`. + /// The synthetic field must exist. + StorageLocation &getSyntheticField(llvm::StringRef Name) const { + StorageLocation *Loc = SyntheticFields.lookup(Name); + assert(Loc != nullptr); + return *Loc; + } + + llvm::iterator_range<SyntheticFieldMap::const_iterator> + synthetic_fields() const { + return {SyntheticFields.begin(), SyntheticFields.end()}; + } + /// Changes the child storage location for a field `D` of reference type. /// All other fields cannot change their storage location and always retain /// the storage location passed to the `RecordStorageLocation` constructor. @@ -151,6 +170,7 @@ class RecordStorageLocation final : public StorageLocation { private: FieldToLoc Children; + SyntheticFieldMap SyntheticFields; }; } // namespace dataflow diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index 0a2fcd4ad695a30..fa114979c8e3263 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -68,11 +68,38 @@ StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { else FieldLocs.insert({Field, &createStorageLocation( Field->getType().getNonReferenceType())}); - return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs)); + + RecordStorageLocation::SyntheticFieldMap SyntheticFields; + for (const auto &Entry : getSyntheticFields(Type)) + SyntheticFields.insert( + {Entry.getKey(), + &createStorageLocation(Entry.getValue().getNonReferenceType())}); + + return createRecordStorageLocation(Type, std::move(FieldLocs), + std::move(SyntheticFields)); } return arena().create<ScalarStorageLocation>(Type); } +// Returns the keys for a given `StringMap`. +// Can't use `StringSet` as the return type as it doesn't support `operator==`. +template <typename T> +static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) { + return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end()); +} + +RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( + QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, + RecordStorageLocation::SyntheticFieldMap SyntheticFields) { + assert(Type->isRecordType()); + assert(containsSameFields(getModeledFields(Type), FieldLocs)); + assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); + + RecordStorageLocationCreated = true; + return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs), + std::move(SyntheticFields)); +} + StorageLocation & DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { if (auto *Loc = DeclToLoc.lookup(&D)) @@ -367,3 +394,14 @@ clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { getFieldsFromClassHierarchy(Type, Fields); return Fields; } + +bool clang::dataflow::containsSameFields( + const clang::dataflow::FieldSet &Fields, + const clang::dataflow::RecordStorageLocation::FieldToLoc &FieldLocs) { + if (Fields.size() != FieldLocs.size()) + return false; + for ([[maybe_unused]] auto [Field, Loc] : FieldLocs) + if (!Fields.contains(cast_or_null<FieldDecl>(Field))) + return false; + return true; +} diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 1a38be9c1374f65..983375da9cc3a83 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -157,12 +157,25 @@ static Value &widenDistinctValues(QualType Type, Value &Prev, Environment &CurrentEnv, Environment::ValueModel &Model) { // Boolean-model widening. - if (isa<BoolValue>(&Prev)) { - assert(isa<BoolValue>(Current)); - // Widen to Top, because we know they are different values. If previous was - // already Top, re-use that to (implicitly) indicate that no change occured. + if (auto *PrevBool = dyn_cast<BoolValue>(&Prev)) { + // If previous value was already Top, re-use that to (implicitly) indicate + // that no change occurred. if (isa<TopBoolValue>(Prev)) return Prev; + + // We may need to widen to Top, but before we do so, check whether both + // values are implied to be either true or false in the current environment. + // In that case, we can simply return a literal instead. + auto &CurBool = cast<BoolValue>(Current); + bool TruePrev = PrevEnv.proves(PrevBool->formula()); + bool TrueCur = CurrentEnv.proves(CurBool.formula()); + if (TruePrev && TrueCur) + return CurrentEnv.getBoolLiteralValue(true); + if (!TruePrev && !TrueCur && + PrevEnv.proves(PrevEnv.arena().makeNot(PrevBool->formula())) && + CurrentEnv.proves(CurrentEnv.arena().makeNot(CurBool.formula()))) + return CurrentEnv.getBoolLiteralValue(false); + return CurrentEnv.makeTopBoolValue(); } @@ -354,6 +367,16 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, } } +Environment::Environment(DataflowAnalysisContext &DACtx) + : DACtx(&DACtx), + FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} + +Environment::Environment(DataflowAnalysisContext &DACtx, + const DeclContext &DeclCtx) + : Environment(DACtx) { + CallStack.push_back(&DeclCtx); +} + // FIXME: Add support for resetting globals after function calls to enable // the implementation of sound analyses. void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { @@ -403,59 +426,12 @@ void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { } } -Environment::Environment(DataflowAnalysisContext &DACtx) - : DACtx(&DACtx), - FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} - Environment Environment::fork() const { Environment Copy(*this); Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); return Copy; } -Environment::Environment(DataflowAnalysisContext &DACtx, - const DeclCont... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/73518 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits