Hi everyone,

After pushing the code that supports various known function classes last week,
I've turned my attention back to the core reference count checking 
functionality. This functionality used to reside in region_model, which 
wasn't ideal. To address this, I've introduced a hook to register callbacks 
to pop_frame. Specifically, this allows the code that checks the reference 
count and emits diagnostics to be housed within the plugin, rather than the 
core analyzer.

As of now, the parameters of pop_frame_callback are tailored specifically to 
our needs. If the use of callbacks at the end of pop_frame becomes more 
prevalent, we can revisit the setup to potentially make it more general.

Moreover, the core reference count checking logic was previously somewhat 
bloated, contained in one extensive function. I've since refactored it, 
breaking it down into several helper functions to simplify and reduce 
complexity. There are still some aspects that need refinement, especially 
since the plugin has seen changes since I last worked on this logic. However, 
I believe that there aren't any significant problems.

Currently, I've started working a custom stmt_finder similar to 
leak_stmt_finder 
to address the issue of m_stmt and m_stmt_finder being NULL at the time of 
region_model::pop_frame. This approach was discussed as a viable solution in 
a previous email, and I'll keep everyone posted on my progress. Afterwards, I 
will go back to address the refinements necessary mentioned above.

For those interested, I've attached a WIP patch that highlights the specific 
changes mentioned above.

Best,
Eric

gcc/analyzer/ChangeLog:

        * region-model.cc (region_model::pop_frame): New callback
        * mechanism.
        * region-model.h (struct append_regions_cb_data): New variables.
        (class region_model): New functions and variables.

gcc/testsuite/ChangeLog:

        * gcc.dg/plugin/analyzer_cpython_plugin.c: New functions on
        * reference count checking.

---
 gcc/analyzer/region-model.cc                  |   3 +
 gcc/analyzer/region-model.h                   |  19 ++
 .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 234 +++++++++++++++++-
 3 files changed, 254 insertions(+), 2 deletions(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 494a9cdf149..18cea279e53 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -82,6 +82,8 @@ along with GCC; see the file COPYING3.  If not see
 
 namespace ana {
 
+auto_vec<pop_frame_callback> region_model::pop_frame_callbacks;
+
 /* Dump T to PP in language-independent form, for debugging/logging/dumping
    purposes.  */
 
@@ -4813,6 +4815,7 @@ region_model::pop_frame (tree result_lvalue,
     }
 
   unbind_region_and_descendents (frame_reg,POISON_KIND_POPPED_STACK);
+  notify_on_pop_frame (this, retval, ctxt);
 }
 
 /* Get the number of frames in this region_model's stack.  */
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index 4f09f2e585a..2fe6a60f7ba 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -236,6 +236,10 @@ public:
 
 struct append_regions_cb_data;
 
+typedef void (*pop_frame_callback) (const region_model *model,
+                                   const svalue *retval,
+                                   region_model_context *ctxt);
+
 /* A region_model encapsulates a representation of the state of memory, with
    a tree of regions, along with their associated values.
    The representation is graph-like because values can be pointers to
@@ -505,6 +509,20 @@ class region_model
   void check_for_null_terminated_string_arg (const call_details &cd,
                                             unsigned idx);
 
+  static void
+  register_pop_frame_callback (const pop_frame_callback &callback)
+  {
+    pop_frame_callbacks.safe_push (callback);
+  }
+
+  static void
+  notify_on_pop_frame (const region_model *model, const svalue *retval,
+                      region_model_context *ctxt)
+  {
+    for (auto &callback : pop_frame_callbacks)
+       callback (model, retval, ctxt);
+  }
+
 private:
   const region *get_lvalue_1 (path_var pv, region_model_context *ctxt) const;
   const svalue *get_rvalue_1 (path_var pv, region_model_context *ctxt) const;
@@ -592,6 +610,7 @@ private:
                                                tree callee_fndecl,
                                                region_model_context *ctxt) 
const;
 
+  static auto_vec<pop_frame_callback> pop_frame_callbacks;
   /* Storing this here to avoid passing it around everywhere.  */
   region_model_manager *const m_mgr;
 
diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c 
b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
index 7cd72e8a886..918bb5a5587 100644
--- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
+++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
@@ -44,6 +44,7 @@
 #include "analyzer/region-model.h"
 #include "analyzer/call-details.h"
 #include "analyzer/call-info.h"
+#include "analyzer/exploded-graph.h"
 #include "make-unique.h"
 
 int plugin_is_GPL_compatible;
@@ -191,6 +192,234 @@ public:
   }
 };
 
+class refcnt_mismatch : public pending_diagnostic_subclass<refcnt_mismatch>
+{
+public:
+  refcnt_mismatch (const region *base_region,
+                               const svalue *ob_refcnt,
+                               const svalue *actual_refcnt)
+      : m_base_region (base_region), m_ob_refcnt (ob_refcnt),
+       m_actual_refcnt (actual_refcnt)
+  {
+  }
+
+  const char *
+  get_kind () const final override
+  {
+    return "refcnt_mismatch";
+  }
+
+  bool
+  operator== (const refcnt_mismatch &other) const
+  {
+    return (m_base_region == other.m_base_region
+           && m_ob_refcnt == other.m_ob_refcnt
+           && m_actual_refcnt == other.m_actual_refcnt);
+  }
+
+  int get_controlling_option () const final override
+  {
+    return 0;
+  }
+
+  bool
+  emit (rich_location *rich_loc, logger *) final override
+  {
+    diagnostic_metadata m;
+    bool warned;
+    warned = warning_meta (rich_loc, m, get_controlling_option (),
+                          "REF COUNT PROBLEM");
+    return warned;
+  }
+
+  void mark_interesting_stuff (interesting_t *interest) final override
+  {
+    if (m_base_region)
+      interest->add_region_creation (m_base_region);
+  }
+
+private:
+  const region *m_base_region;
+  const svalue *m_ob_refcnt;
+  const svalue *m_actual_refcnt;
+};
+
+/* Checks if the given region is heap allocated. */
+bool
+is_heap_allocated (const region *base_reg)
+{
+  return base_reg->get_kind () == RK_HEAP_ALLOCATED;
+}
+
+/* Increments the actual reference count if the current region matches the base
+ * region. */
+void
+increment_count_if_base_matches (const region *curr_region,
+                                 const region *base_reg, int &actual_refcnt)
+{
+  if (curr_region->get_base_region () == base_reg)
+    actual_refcnt++;
+}
+
+/* For PyListObjects: processes the ob_item field within the current region and
+ * increments the reference count if conditions are met. */
+void
+process_ob_item_region (const region_model *model, region_model_manager *mgr,
+                       region_model_context *ctxt, const region *curr_region,
+                       const svalue *pylist_type_ptr, const region *base_reg,
+                       int &actual_refcnt)
+{
+  tree ob_item_field_tree = get_field_by_name (pylistobj_record, "ob_item");
+  const region *ob_item_field_reg
+      = mgr->get_field_region (curr_region, ob_item_field_tree);
+  const svalue *ob_item_ptr = model->get_store_value (ob_item_field_reg, ctxt);
+
+  if (const auto &cast_ob_item_reg = ob_item_ptr->dyn_cast_region_svalue ())
+    {
+      const region *ob_item_reg = cast_ob_item_reg->get_pointee ();
+      const svalue *allocated_bytes = model->get_dynamic_extents (ob_item_reg);
+      const region *ob_item_sized = mgr->get_sized_region (
+         ob_item_reg, pyobj_ptr_ptr, allocated_bytes);
+      const svalue *buffer_contents_sval
+         = model->get_store_value (ob_item_sized, ctxt);
+
+      if (const auto &buffer_contents
+         = buffer_contents_sval->dyn_cast_compound_svalue ())
+       {
+         for (const auto &buffer_content : buffer_contents->get_map ())
+           {
+                   const auto &content_value = buffer_content.second;
+                   if (const auto &content_region
+                       = content_value->dyn_cast_region_svalue ())
+                           if (content_region->get_pointee () == base_reg)
+                                   actual_refcnt++;
+           }
+       }
+    }
+}
+
+/* Counts the actual references from all clusters in the model's store. */
+int
+count_actual_references (const region_model *model, region_model_manager *mgr,
+                        region_model_context *ctxt, const region *base_reg,
+                        const svalue *pylist_type_ptr, tree ob_type_field)
+{
+  int actual_refcnt = 0;
+  for (const auto &other_cluster : *model->get_store ())
+    {
+      for (const auto &binding : other_cluster.second->get_map ())
+       {
+         const auto &sval = binding.second;
+         const auto &curr_region = sval->maybe_get_region ();
+
+         if (!curr_region || curr_region->get_kind () != RK_HEAP_ALLOCATED)
+           continue;
+
+         increment_count_if_base_matches (curr_region, base_reg,
+                                           actual_refcnt);
+
+         const region *ob_type_region
+             = mgr->get_field_region (curr_region, ob_type_field);
+         const svalue *stored_sval
+             = model->get_store_value (ob_type_region, ctxt);
+         const auto &remove_cast = stored_sval->dyn_cast_unaryop_svalue ();
+
+         if (!remove_cast)
+           continue;
+
+         const svalue *type = remove_cast->get_arg ();
+         if (type == pylist_type_ptr)
+           process_ob_item_region (model, mgr, ctxt, curr_region,
+                                   pylist_type_ptr, base_reg, actual_refcnt);
+       }
+    }
+  return actual_refcnt;
+}
+
+/* Retrieves the svalue associated with the ob_refcnt field of the base region.
+ */
+const svalue *
+retrieve_ob_refcnt_sval (const region *base_reg, const region_model *model,
+                        region_model_context *ctxt)
+{
+  region_model_manager *mgr = model->get_manager ();
+  tree ob_refcnt_tree = get_field_by_name (pyobj_record, "ob_refcnt");
+  const region *ob_refcnt_region
+      = mgr->get_field_region (base_reg, ob_refcnt_tree);
+  const svalue *ob_refcnt_sval
+      = model->get_store_value (ob_refcnt_region, ctxt);
+  ob_refcnt_sval->dump (true);
+  return ob_refcnt_sval;
+}
+
+/* Processes an individual cluster and computes the reference count. */
+void
+process_cluster (
+    const hash_map<const ana::region *,
+                  ana::binding_cluster *>::iterator::reference_pair cluster,
+    const region_model *model, const svalue *retval,
+    region_model_context *ctxt, const svalue *pylist_type_ptr,
+    tree ob_type_field)
+{
+  region_model_manager *mgr = model->get_manager ();
+  const region *base_reg = cluster.first;
+
+  int actual_refcnt = count_actual_references (model, mgr, ctxt, base_reg,
+                                              pylist_type_ptr, ob_type_field);
+  inform (UNKNOWN_LOCATION, "actual ref count: %d", actual_refcnt);
+
+  const svalue *ob_refcnt_sval
+      = retrieve_ob_refcnt_sval (base_reg, model, ctxt);
+  const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst (
+      ob_refcnt_sval->get_type (), actual_refcnt);
+
+  if (actual_refcnt_sval != ob_refcnt_sval && ctxt)
+    {
+      std::unique_ptr<pending_diagnostic> pd = make_unique<refcnt_mismatch> (
+         base_reg, ob_refcnt_sval, actual_refcnt_sval);
+    if (pd)
+    inform(UNKNOWN_LOCATION, "DIAGNOSTIC ");
+    }
+}
+
+/* Validates the reference count of Python objects. */
+void
+check_pyobj_refcnt (const region_model *model, const svalue *retval,
+                   region_model_context *ctxt)
+{
+  region_model_manager *mgr = model->get_manager ();
+
+  const region *pylist_type_region
+      = mgr->get_region_for_global (pylisttype_vardecl);
+  const svalue *pylist_type_ptr = mgr->get_ptr_svalue (
+      TREE_TYPE (pylisttype_vardecl), pylist_type_region);
+
+  tree ob_type_field = get_field_by_name (pyobj_record, "ob_type");
+
+  for (const auto &cluster : *model->get_store ())
+    {
+      if (!is_heap_allocated (cluster.first))
+       continue;
+
+      inform (UNKNOWN_LOCATION, "_________________");
+      const region *base_reg = cluster.first;
+      base_reg->dump (true);
+      if (const auto &retval_region_sval = retval->dyn_cast_region_svalue ())
+       {
+         const auto &retval_reg = retval_region_sval->get_pointee ();
+         if (retval_reg == base_reg)
+           {
+                   inform (UNKNOWN_LOCATION, "same thiong");
+                   continue;
+           }
+       }
+      process_cluster (cluster, model, retval, ctxt, pylist_type_ptr,
+                      ob_type_field);
+    }
+}
+
+
+
 /* Some concessions were made to
 simplify the analysis process when comparing kf_PyList_Append with the
 real implementation. In particular, PyList_Append performs some
@@ -940,8 +1169,9 @@ plugin_init (struct plugin_name_args *plugin_info,
   const char *plugin_name = plugin_info->base_name;
   if (0)
     inform (input_location, "got here; %qs", plugin_name);
-  ana::register_finish_translation_unit_callback (&stash_named_types);
-  ana::register_finish_translation_unit_callback (&stash_global_vars);
+  register_finish_translation_unit_callback (&stash_named_types);
+  register_finish_translation_unit_callback (&stash_global_vars);
+  region_model::register_pop_frame_callback(check_pyobj_refcnt);
   register_callback (plugin_info->base_name, PLUGIN_ANALYZER_INIT,
                      ana::cpython_analyzer_init_cb,
                      NULL); /* void *user_data */
-- 
2.30.2

Reply via email to