Author: Samira Bakon
Date: 2025-10-06T18:04:10Z
New Revision: f5907df0381cd46cb3f15c5f2e8b00e961d86e04

URL: 
https://github.com/llvm/llvm-project/commit/f5907df0381cd46cb3f15c5f2e8b00e961d86e04
DIFF: 
https://github.com/llvm/llvm-project/commit/f5907df0381cd46cb3f15c5f2e8b00e961d86e04.diff

LOG: [clang][dataflow] Copy only the fields present in the current derived… 
(#162100)

… type.

Our modeling of cast expressions now results in the potential presence
of fields from other types derived from the SourceLocation's type.

Also add some additional debug logging and new tests, and update an
existing test to reflect real usage of synthetic field callbacks.

Added: 
    

Modified: 
    clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
    clang/lib/Analysis/FlowSensitive/RecordOps.cpp
    clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp
    clang/unittests/Analysis/FlowSensitive/TransferTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h 
b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
index 534b9a017d8f0..5d9a0f702a299 100644
--- a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
+++ b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
@@ -144,6 +144,17 @@ class RecordStorageLocation final : public StorageLocation 
{
   /// The synthetic field must exist.
   StorageLocation &getSyntheticField(llvm::StringRef Name) const {
     StorageLocation *Loc = SyntheticFields.lookup(Name);
+    LLVM_DEBUG({
+      if (Loc == nullptr) {
+        llvm::dbgs() << "Couldn't find synthetic field " << Name
+                     << " on StorageLocation " << this << " of type "
+                     << getType() << "\n";
+        llvm::dbgs() << "Existing synthetic fields:\n";
+        for ([[maybe_unused]] const auto &[Name, Loc] : SyntheticFields) {
+          llvm::dbgs() << Name << "\n";
+        }
+      }
+    });
     assert(Loc != nullptr);
     return *Loc;
   }

diff  --git a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp 
b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
index ed827ac2c7c90..03d6ed8020a0a 100644
--- a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
+++ b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
@@ -14,6 +14,9 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/Type.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringMap.h"
 
 #define DEBUG_TYPE "dataflow"
 
@@ -79,18 +82,41 @@ void copyRecord(RecordStorageLocation &Src, 
RecordStorageLocation &Dst,
 
   if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr &&
                              SrcDecl->isDerivedFrom(DstDecl))) {
+    // Dst may have children modeled from other derived types than SrcType, 
e.g.
+    // after casts of Dst to other types derived from DstType. Only copy the
+    // children and synthetic fields present in both Dst and SrcType.
+    const FieldSet FieldsInSrcType =
+        Env.getDataflowAnalysisContext().getModeledFields(SrcType);
     for (auto [Field, DstFieldLoc] : Dst.children())
-      copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
+      if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
+          FieldAsFieldDecl && FieldsInSrcType.contains(FieldAsFieldDecl))
+        copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
+    const llvm::StringMap<QualType> SyntheticFieldsForSrcType =
+        Env.getDataflowAnalysisContext().getSyntheticFields(SrcType);
     for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields())
-      copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
-                         *DstFieldLoc, Env);
+      if (SyntheticFieldsForSrcType.contains(Name))
+        copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
+                           *DstFieldLoc, Env);
   } else if (SrcDecl != nullptr && DstDecl != nullptr &&
              DstDecl->isDerivedFrom(SrcDecl)) {
-    for (auto [Field, SrcFieldLoc] : Src.children())
-      copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
-    for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields())
-      copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
-                         Dst.getSyntheticField(Name), Env);
+    // Src may have children modeled from other derived types than DstType, 
e.g.
+    // after other casts of Src to those types (likely in 
diff erent branches,
+    // but without flow-condition-dependent field modeling). Only copy the
+    // children and synthetic fields of Src that are present in DstType.
+    const FieldSet FieldsInDstType =
+        Env.getDataflowAnalysisContext().getModeledFields(DstType);
+    for (auto [Field, SrcFieldLoc] : Src.children()) {
+      if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
+          FieldAsFieldDecl && FieldsInDstType.contains(FieldAsFieldDecl))
+        copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
+    }
+    const llvm::StringMap<QualType> SyntheticFieldsForDstType =
+        Env.getDataflowAnalysisContext().getSyntheticFields(DstType);
+    for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) {
+      if (SyntheticFieldsForDstType.contains(Name))
+        copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
+                           Dst.getSyntheticField(Name), Env);
+    }
   } else {
     for (const FieldDecl *Field :
          Env.getDataflowAnalysisContext().getModeledFields(TypeToCopy)) {

diff  --git a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp 
b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp
index 57162cd2efcc4..73390a829464a 100644
--- a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp
@@ -64,7 +64,8 @@ TEST(RecordOpsTest, CopyRecord) {
   runDataflow(
       Code,
       [](QualType Ty) -> llvm::StringMap<QualType> {
-        if (Ty.getAsString() != "S")
+        std::string TypeAsString = Ty.getAsString();
+        if (TypeAsString != "S" && TypeAsString != "struct S")
           return {};
         QualType IntTy =
             getFieldNamed(Ty->getAsRecordDecl(), "outer_int")->getType();
@@ -123,7 +124,8 @@ TEST(RecordOpsTest, RecordsEqual) {
   runDataflow(
       Code,
       [](QualType Ty) -> llvm::StringMap<QualType> {
-        if (Ty.getAsString() != "S")
+        std::string TypeAsString = Ty.getAsString();
+        if (TypeAsString != "S" && TypeAsString != "struct S")
           return {};
         QualType IntTy =
             getFieldNamed(Ty->getAsRecordDecl(), "outer_int")->getType();
@@ -213,9 +215,10 @@ TEST(RecordOpsTest, CopyRecordBetweenDerivedAndBase) {
   )";
   auto SyntheticFieldCallback = [](QualType Ty) -> llvm::StringMap<QualType> {
     CXXRecordDecl *ADecl = nullptr;
-    if (Ty.getAsString() == "A")
+    std::string TypeAsString = Ty.getAsString();
+    if (TypeAsString == "A" || TypeAsString == "struct A")
       ADecl = Ty->getAsCXXRecordDecl();
-    else if (Ty.getAsString() == "B")
+    else if (TypeAsString == "B" || TypeAsString == "struct B")
       ADecl = Ty->getAsCXXRecordDecl()
                   ->bases_begin()
                   ->getType()

diff  --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp 
b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index cbd55966a3d88..66b3bba594fc9 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -3709,6 +3709,83 @@ TEST(TransferTest, StaticCastBaseToDerived) {
       });
 }
 
+TEST(TransferTest, MultipleConstructionsFromStaticCastsBaseToDerived) {
+  std::string Code = R"cc(
+ struct Base {};
+
+struct DerivedOne : public Base {
+  // Need a field in one of the derived siblings that the other doesn't have.
+  int I;
+};
+
+struct DerivedTwo : public Base {};
+
+int getInt();
+
+void target(Base* B) {
+  // Need something to cause modeling of I.
+  DerivedOne D1;
+  (void)D1.I;
+
+  // Switch cases are a reasonable pattern where the same variable might be
+  // safely cast to two 
diff erent derived types within the same function
+  // without resetting the value of the variable. getInt is a stand-in for what
+  // is usually a function indicating the dynamic derived type.
+  switch (getInt()) {
+    case 1:
+      // Need a CXXConstructExpr or copy/move CXXOperatorCallExpr from each of
+      // the casts to derived types, cast from the same base variable, to
+      // trigger the copyRecord behavior.
+      (void)new DerivedOne(*static_cast<DerivedOne*>(B));
+      break;
+    case 2:
+      (void)new DerivedTwo(*static_cast<DerivedTwo*>(B));
+      break;
+  };
+}
+)cc";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        // This is a crash repro. We used to crash when transferring the
+        // construction of DerivedTwo because B's StorageLocation had a child
+        // for the field I, but DerivedTwo doesn't. Now, we should only copy 
the
+        // fields from B that are present in DerivedTwo.
+      });
+}
+
+TEST(TransferTest, CopyConstructionOfBaseAfterStaticCastsBaseToDerived) {
+  std::string Code = R"cc(
+ struct Base {};
+
+struct Derived : public Base {
+// Need a field in Derived that is not in Base.
+  char C;
+};
+
+void target(Base* B, Base* OtherB) {
+  Derived* D = static_cast<Derived*>(B);
+  *B = *OtherB;
+  // Need something to cause modeling of C.
+  (void)D->C;
+}
+
+)cc";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        // This is a crash repro. We used to crash when transferring the
+        // copy construction of B from OtherB because B's StorageLocation had a
+        // child for the field C, but Base doesn't (so OtherB doesn't, since
+        // it's never been cast to any other type), and we tried to copy from
+        // the source (OtherB) all the fields present in the destination (B).
+        // Now, we should only try to copy the fields from OtherB that are
+        // present in Base.
+      });
+}
+
 TEST(TransferTest, ExplicitDerivedToBaseCast) {
   std::string Code = R"cc(
     struct Base {};
@@ -5320,7 +5397,7 @@ TEST(TransferTest, UnsupportedValueEquality) {
       A,
       B
     };
-  
+
     void target() {
       EC ec = EC::A;
 


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to