kinu created this revision.
Herald added a reviewer: NoQ.
Herald added a project: All.
kinu requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
Usually RecordValues for record objects (e.g. struct) are initialized with
`Environment::createValue()` which internally calls `getObjectFields()` to
collects all fields from the current and base classes, and then filter them
with `ModeledValues` via `DACtx::getModeledFields()` so that the fields that
are actually referenced are modeled.

The consistent set of fields should be initialized when a record is initialized
with an initializer list (InitListExpr), however the existing code's behavior
was different.

Before this patch:

- When a struct is initialized with InitListExpr, its fields are initialized 
based on what is returned by `getFieldsForInitListExpr()`, which only collects 
the direct fields in the current class, but not from the base classes. 
Moreover, if the base classes have their own InitListExpr, values that are 
initialized by their InitListExpr's weren't merged into the child objects.

After this patch:

- When a struct is initialized with InitListExpr, it collects and merges the 
fields in the base classes that were initialized by their InitListExpr's. The 
code also asserts that the consistent set of fields are initialized with the 
ModeledFields.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D159284

Files:
  clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
  clang/lib/Analysis/FlowSensitive/Transfer.cpp
  clang/unittests/Analysis/FlowSensitive/TransferTest.cpp

Index: clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
===================================================================
--- clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -1438,6 +1438,150 @@
       llvm::Succeeded());
 }
 
+TEST(TransferTest, StructModeledFieldsWithComplicatedInheritance) {
+  std::string Code = R"(
+    struct Base3 {
+      int x7;
+      int x8;
+    };
+    struct Base2 : Base3 {
+      int x5;
+      int x6;
+    };
+    struct Base {
+      int x3;
+      int x4;
+    };
+    struct Foo : public Base, Base2 {
+      int x1;
+      int x2;
+    };
+
+    void target() {
+      Foo F;
+      F.x4 = F.x2;
+      F.x6 = 1;
+      F.x8 = 1;
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const Environment &Env =
+              getEnvironmentAtAnnotation(Results, "p");
+
+        const ValueDecl *FooDecl = findValueDecl(ASTCtx, "F");
+        const ValueDecl *X1Decl = findValueDecl(ASTCtx, "x1");
+        const ValueDecl *X2Decl = findValueDecl(ASTCtx, "x2");
+        const ValueDecl *X3Decl = findValueDecl(ASTCtx, "x3");
+        const ValueDecl *X4Decl = findValueDecl(ASTCtx, "x4");
+        const ValueDecl *X5Decl = findValueDecl(ASTCtx, "x5");
+        const ValueDecl *X6Decl = findValueDecl(ASTCtx, "x6");
+        const ValueDecl *X7Decl = findValueDecl(ASTCtx, "x7");
+        const ValueDecl *X8Decl = findValueDecl(ASTCtx, "x8");
+        ASSERT_THAT(FooDecl, NotNull());
+
+        // Only "x2", "x4", "x6", and "x8" are accessed and exist in the model.
+        const auto *FooLoc =
+            cast<RecordStorageLocation>(Env.getStorageLocation(*FooDecl));
+        llvm::DenseSet<const ValueDecl*> Fields;
+        for (auto [Field, _] : FooLoc->children()) {
+          Fields.insert(Field);
+        }
+        ASSERT_EQ(Fields.size(), 4);
+        ASSERT_TRUE(Fields.contains(X2Decl));
+        ASSERT_TRUE(Fields.contains(X4Decl));
+        ASSERT_TRUE(Fields.contains(X6Decl));
+        ASSERT_TRUE(Fields.contains(X8Decl));
+
+        // "x1", "x3", "x5", "x7" are not used therefore are filtered out.
+        ASSERT_FALSE(Fields.contains(X1Decl));
+        ASSERT_FALSE(Fields.contains(X3Decl));
+        ASSERT_FALSE(Fields.contains(X5Decl));
+        ASSERT_FALSE(Fields.contains(X7Decl));
+      });
+}
+
+TEST(TransferTest, StructInitializerListWithComplicatedInheritance) {
+  std::string Code = R"(
+    struct Base3 {
+      int x7;
+      int x8;
+    };
+    struct Base2 : Base3 {
+      int x5;
+      int x6;
+    };
+    struct Base {
+      int x3;
+      int x4;
+    };
+    struct Foo : public Base, Base2 {
+      int x1;
+      int x2;
+    };
+
+    void target() {
+      Foo F = {};
+      F.x4 = F.x2;
+      F.x6 = 1;
+      F.x8 = 1;
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const Environment &Env =
+              getEnvironmentAtAnnotation(Results, "p");
+
+        const ValueDecl *FooDecl = findValueDecl(ASTCtx, "F");
+        const ValueDecl *X1Decl = findValueDecl(ASTCtx, "x1");
+        const ValueDecl *X2Decl = findValueDecl(ASTCtx, "x2");
+        const ValueDecl *X3Decl = findValueDecl(ASTCtx, "x3");
+        const ValueDecl *X4Decl = findValueDecl(ASTCtx, "x4");
+        const ValueDecl *X5Decl = findValueDecl(ASTCtx, "x5");
+        const ValueDecl *X6Decl = findValueDecl(ASTCtx, "x6");
+        const ValueDecl *X7Decl = findValueDecl(ASTCtx, "x7");
+        const ValueDecl *X8Decl = findValueDecl(ASTCtx, "x8");
+        ASSERT_THAT(FooDecl, NotNull());
+
+        // When a struct is initialized with a initializer list, all the
+        // fields are considered "accessed", and therefore do exist.
+        // (Without the initializer, we should only see "x2", "x4", "x6",
+        // and "x8")
+        const auto *FooLoc =
+            cast<RecordStorageLocation>(Env.getStorageLocation(*FooDecl));
+        const auto *X1Val =
+            cast<IntegerValue>(getFieldValue(FooLoc, *X1Decl, Env));
+        const auto *X2Val =
+            cast<IntegerValue>(getFieldValue(FooLoc, *X2Decl, Env));
+        const auto *X3Val =
+            cast<IntegerValue>(getFieldValue(FooLoc, *X3Decl, Env));
+        const auto *X4Val =
+            cast<IntegerValue>(getFieldValue(FooLoc, *X4Decl, Env));
+        const auto *X5Val =
+            cast<IntegerValue>(getFieldValue(FooLoc, *X5Decl, Env));
+        const auto *X6Val =
+            cast<IntegerValue>(getFieldValue(FooLoc, *X6Decl, Env));
+        const auto *X7Val =
+            cast<IntegerValue>(getFieldValue(FooLoc, *X7Decl, Env));
+        const auto *X8Val =
+            cast<IntegerValue>(getFieldValue(FooLoc, *X8Decl, Env));
+        ASSERT_THAT(X1Val, NotNull());
+        ASSERT_THAT(X2Val, NotNull());
+        ASSERT_THAT(X3Val, NotNull());
+        ASSERT_THAT(X4Val, NotNull());
+        ASSERT_THAT(X5Val, NotNull());
+        ASSERT_THAT(X6Val, NotNull());
+        ASSERT_THAT(X7Val, NotNull());
+        ASSERT_THAT(X8Val, NotNull());
+      });
+}
+
 TEST(TransferTest, ReferenceMember) {
   std::string Code = R"(
     struct A {
@@ -2043,6 +2187,50 @@
       });
 }
 
+TEST(TransferTest, AssignmentOperatorWithInitAndInheritance) {
+  // This is a crash repro.
+  std::string Code = R"(
+    struct B { int Foo; };
+    struct S : public B {};
+    void target() {
+      S S1 = { 1 };
+      S S2;
+      S S3;
+      S1 = S2;  // Only Dst has InitListExpr.
+      S3 = S1;  // Only Src has InitListExpr.
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const ValueDecl *S1Decl = findValueDecl(ASTCtx, "S1");
+        const ValueDecl *S2Decl = findValueDecl(ASTCtx, "S2");
+        const ValueDecl *S3Decl = findValueDecl(ASTCtx, "S3");
+        const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
+
+        const Environment &Env =
+              getEnvironmentAtAnnotation(Results, "p");
+        const auto *S1Loc =
+            cast<RecordStorageLocation>(Env.getStorageLocation(*S1Decl));
+        const auto *S2Loc =
+            cast<RecordStorageLocation>(Env.getStorageLocation(*S2Decl));
+        const auto *S3Loc =
+            cast<RecordStorageLocation>(Env.getStorageLocation(*S3Decl));
+
+        // The value of `Foo` should exist in all records and are the same.
+        const auto *S1FooVal =
+            cast<IntegerValue>(getFieldValue(S1Loc, *FooDecl, Env));
+        const auto *S2FooVal =
+            cast<IntegerValue>(getFieldValue(S2Loc, *FooDecl, Env));
+        const auto *S3FooVal =
+            cast<IntegerValue>(getFieldValue(S3Loc, *FooDecl, Env));
+        EXPECT_EQ(S1FooVal, S2FooVal);
+        EXPECT_EQ(S2FooVal, S3FooVal);
+      });
+}
+
 TEST(TransferTest, CopyConstructor) {
   std::string Code = R"(
     struct A {
@@ -2253,6 +2441,75 @@
          ASTContext &ASTCtx) {});
 }
 
+TEST(TransferTest, CopyConstructorWithInitializerAndInheritance) {
+  // This is a crash repro.
+  std::string Code = R"(
+    struct B { int Foo; };
+    struct S : public B {};
+    void target() {
+      S S1 = { 1 };
+      S S2(S1);
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const ValueDecl *S1Decl = findValueDecl(ASTCtx, "S1");
+        const ValueDecl *S2Decl = findValueDecl(ASTCtx, "S2");
+        const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
+
+        const Environment &Env =
+              getEnvironmentAtAnnotation(Results, "p");
+        const auto *S1Loc =
+            cast<RecordStorageLocation>(Env.getStorageLocation(*S1Decl));
+        const auto *S2Loc =
+            cast<RecordStorageLocation>(Env.getStorageLocation(*S2Decl));
+
+        // The value of `Foo` should exist in all records and are the same.
+        const auto *S1FooVal =
+            cast<IntegerValue>(getFieldValue(S1Loc, *FooDecl, Env));
+        const auto *S2FooVal =
+            cast<IntegerValue>(getFieldValue(S2Loc, *FooDecl, Env));
+        EXPECT_EQ(S1FooVal, S2FooVal);
+      });
+}
+
+TEST(TransferTest, CopyConstructorWithBaseInitAndInheritance) {
+  // This is a crash repro.
+  std::string Code = R"(
+    struct Bar { int Foo; };
+    struct B { Bar Bar = { 1 }; };
+    struct S : public B {};
+    void target() {
+      S S1 = {};
+      S S2(S1);
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {});
+}
+
+TEST(TransferTest, ReturnStructWithInheritance) {
+  // This is a crash repro. (Returning a struct involves copy/move constructor)
+  std::string Code = R"(
+    struct B { int Foo; };
+    struct S : public B { };
+    S target() {
+      S S1 = { 1 };
+      return S1;
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {});
+}
+
 TEST(TransferTest, MoveConstructor) {
   std::string Code = R"(
     namespace std {
Index: clang/lib/Analysis/FlowSensitive/Transfer.cpp
===================================================================
--- clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -27,12 +27,12 @@
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/OperatorKinds.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+#include <assert.h>
 #include <cassert>
-#include <memory>
-#include <tuple>
+
+#define DEBUG_TYPE "dataflow"
 
 namespace clang {
 namespace dataflow {
@@ -629,17 +629,64 @@
       return;
     }
 
-    std::vector<FieldDecl *> Fields =
-        getFieldsForInitListExpr(Type->getAsRecordDecl());
+    // Type comparison for assertions.
+    [[maybe_unused]] auto GetCanonicalType = [](QualType Ty) {
+      return Ty.getCanonicalType().getUnqualifiedType();
+    };
+
     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
 
-    for (auto [Field, Init] : llvm::zip(Fields, S->inits())) {
-      assert(Field != nullptr);
-      assert(Init != nullptr);
+    // This only contains the direct fields for the given type.
+    std::vector<FieldDecl *> FieldsForInit =
+        getFieldsForInitListExpr(Type->getAsRecordDecl());
 
-      FieldLocs.insert({Field, &Env.createObject(Field->getType(), Init)});
+    // `S->init()` contains all the InitListExprs including the direct base
+    // classes.
+    auto Inits = S->inits();
+    int InitIdx = 0;
+
+    if (auto* R = S->getType()->getAsCXXRecordDecl()) {
+      assert(FieldsForInit.size() + R->getNumBases() == Inits.size());
+      for ([[maybe_unused]] const CXXBaseSpecifier &Base : R->bases()) {
+        assert(InitIdx < Inits.size());
+        auto Init = Inits[InitIdx++];
+        assert(GetCanonicalType(Base.getType()) == GetCanonicalType(Init->getType()));
+        auto* RecordVal = dyn_cast<RecordValue>(Env.getValue(*Init));
+        assert(RecordVal != nullptr);
+        auto Children = RecordVal->getLoc().children();
+        FieldLocs.insert(Children.begin(), Children.end());
+      }
     }
 
+    assert(FieldsForInit.size() == Inits.size() - InitIdx);
+    for (auto Field : FieldsForInit) {
+      assert(InitIdx < Inits.size());
+      auto Init = Inits[InitIdx++];
+      assert(
+          // The types are same, or
+          GetCanonicalType(Field->getType()) == GetCanonicalType(Init->getType()) ||
+          // The field's type is T&, and initializer is T
+          (Field->getType()->isReferenceType() &&
+              GetCanonicalType(Field->getType())->getPointeeType() ==
+              GetCanonicalType(Init->getType())));
+      auto& Loc = Env.createObject(Field->getType(), Init);
+      FieldLocs.insert({Field, &Loc});
+    }
+
+    LLVM_DEBUG({
+      // `ModeledFields` also contains from all the bases, but only the modeled
+      // ones. Having an initializer list for a struct basically populates all
+      // the fields for the struct, so the fields set that is populated here
+      // should match the modeled fields without additional filtering.
+      auto Modeled = Env.getDataflowAnalysisContext().getModeledFields(Type);
+      llvm::DenseSet<const Decl *> ModeledFields;
+      ModeledFields.insert(Modeled.begin(), Modeled.end());
+      assert(ModeledFields.size() == FieldLocs.size());
+      for ([[maybe_unused]] auto [Field, Loc] : FieldLocs) {
+        assert(ModeledFields.contains(Field));
+      }
+    });
+
     auto &Loc =
         Env.getDataflowAnalysisContext().arena().create<RecordStorageLocation>(
             Type, std::move(FieldLocs));
Index: clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
===================================================================
--- clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -126,6 +126,8 @@
     // Values to be merged are always associated with the same location in
     // `LocToVal`. The location stored in `RecordVal` should therefore also
     // be the same.
+    // FIXME: This assertion doesn't seem to necessarily hold when a record is
+    // initialized with InitListExpr.
     assert(&RecordVal1->getLoc() == &RecordVal2->getLoc());
 
     // `RecordVal1` and `RecordVal2` may have different properties associated
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to