Hi,

On Wed, 1 Jun 2011, Michael Matz wrote:

> Hi,
> 
> so here's something more of my patch queue.  It adds the facility to merge 
> also other trees than types over compilation unit borders.  This specific 
> patch only deals with STRING_CST and INTEGER_CST nodes.  Originally I used 
> that place for merging declarations, hence the naming of some variables.  
> That needs to wait for adjustments of the cgraph/varpool streamer.
> 
> On building cc1 the overall numbers of trees that stay around after all 
> global sections are streamed in goes down from 620818 to 598843 trees, 
> overall saving 1 MB (out of 76 MB for just the trees).  Not much, but 
> hey...
> 
> Regstrapping on x86_64-linux in progress.  Okay for trunk?

Ahem.

        * lto.c (top_decls): New hash table.
        (simple_tree_hash, simple_tree_eq, uniquify_top): New.
        (LTO_FIXUP_TREE): Call uniquify_top.
        (uniquify_nodes): Ditto.
        (read_cgraph_and_symbols): Allocate and destroy top_decls.

Index: lto/lto.c
===================================================================
*** lto/lto.c   (revision 174524)
--- lto/lto.c   (working copy)
*************** lto_read_in_decl_state (struct data_in *
*** 231,236 ****
--- 231,316 ----
     that must be replaced with their prevailing variant.  */
  static GTY((if_marked ("ggc_marked_p"), param_is (union tree_node))) htab_t
    tree_with_vars;
+ /* A hashtable of top-level trees that can potentially be merged with trees
+    from other compilation units.  */
+ static GTY((if_marked ("ggc_marked_p"), param_is (union tree_node))) htab_t
+   top_decls;
+ 
+ /* Given a tree in ITEM, return a hash value designed to easily recognize
+    equal trees.  */
+ static hashval_t
+ simple_tree_hash (const void *item)
+ {
+   tree t = CONST_CAST_TREE ((const_tree) item);
+   enum tree_code code = TREE_CODE (t);
+   hashval_t hashcode = 0;
+   hashcode = iterative_hash_object (code, hashcode);
+   if (TREE_CODE (t) == STRING_CST)
+     {
+       if (TREE_TYPE (t))
+       hashcode = iterative_hash_object (TYPE_HASH (TREE_TYPE (t)), hashcode);
+       hashcode = iterative_hash_hashval_t ((hashval_t)TREE_STRING_LENGTH (t),
+                                          hashcode);
+       hashcode = iterative_hash (TREE_STRING_POINTER (t),
+                                TREE_STRING_LENGTH (t), hashcode);
+       return hashcode;
+     }
+   gcc_unreachable ();
+ }
+ 
+ /* Given two trees in VA and VB return true if both trees can be considered
+    equal for easy merging across different compilation units.  */
+ static int
+ simple_tree_eq (const void *va, const void *vb)
+ {
+   tree a = CONST_CAST_TREE ((const_tree) va);
+   tree b = CONST_CAST_TREE ((const_tree) vb);
+   if (a == b)
+     return 1;
+   if (TREE_CODE (a) != TREE_CODE (b))
+     return 0;
+   if (TREE_CODE (a) == STRING_CST)
+     {
+       return TREE_TYPE (a) == TREE_TYPE (b)
+            && TREE_STRING_LENGTH (a) == TREE_STRING_LENGTH (b)
+            && !memcmp (TREE_STRING_POINTER (a), TREE_STRING_POINTER (b),
+                        TREE_STRING_LENGTH (a));
+     }
+   gcc_unreachable ();
+ }
+ 
+ /* Given a tree T return its canonical variant, considering merging
+    of equal trees across different compilation units.  */
+ static tree
+ uniquify_top (tree t)
+ {
+   switch (TREE_CODE (t))
+     {
+       case INTEGER_CST:
+       {
+         tree newtype = gimple_register_type (TREE_TYPE (t));
+         if (newtype != TREE_TYPE (t))
+           t = build_int_cst_wide (newtype, TREE_INT_CST_LOW (t),
+                                   TREE_INT_CST_HIGH (t));
+       }
+         break;
+       case STRING_CST:
+       {
+         tree *t2;
+         if (TREE_TYPE (t))
+           TREE_TYPE (t) = gimple_register_type (TREE_TYPE (t));
+         t2 = (tree *) htab_find_slot (top_decls, t, INSERT);
+         if (*t2)
+           t = *t2;
+         else
+           *t2 = t;
+       }
+         break;
+       default:
+       break;
+     }
+   return t;
+ }
  
  /* Remember that T is a tree that (potentially) refers to a variable
     or function decl that may be replaced with its prevailing variant.  */
*************** remember_with_vars (tree t)
*** 247,252 ****
--- 327,334 ----
        { \
          if (TYPE_P (tt)) \
            (tt) = gimple_register_type (tt); \
+         else\
+           (tt) = uniquify_top (tt); \
          if (VAR_OR_FUNCTION_DECL_P (tt) && TREE_PUBLIC (tt)) \
            remember_with_vars (t); \
        } \
*************** lto_fixup_types (tree t)
*** 504,510 ****
  /* Given a streamer cache structure DATA_IN (holding a sequence of trees
     for one compilation unit) go over all trees starting at index FROM until 
the
     end of the sequence and replace fields of those trees, and the trees
!    themself with their canonical variants as per gimple_register_type.  */
  
  static void
  uniquify_nodes (struct data_in *data_in, unsigned from)
--- 586,593 ----
  /* Given a streamer cache structure DATA_IN (holding a sequence of trees
     for one compilation unit) go over all trees starting at index FROM until 
the
     end of the sequence and replace fields of those trees, and the trees
!    themself with their canonical variants as per gimple_register_type and
!    uniquify_top.  */
  
  static void
  uniquify_nodes (struct data_in *data_in, unsigned from)
*************** uniquify_nodes (struct data_in *data_in,
*** 522,529 ****
    for (i = len; i-- > from;)
      {
        tree t = VEC_index (tree, cache->nodes, i);
!       if (!t
!         || !TYPE_P (t))
        continue;
  
        gimple_register_type (t);
--- 605,611 ----
    for (i = len; i-- > from;)
      {
        tree t = VEC_index (tree, cache->nodes, i);
!       if (!t || !TYPE_P (t))
        continue;
  
        gimple_register_type (t);
*************** uniquify_nodes (struct data_in *data_in,
*** 540,552 ****
        /* First fixup the fields of T.  */
        lto_fixup_types (t);
  
!       if (!TYPE_P (t))
!       continue;
! 
!       /* Now try to find a canonical variant of T itself.  */
!       t = gimple_register_type (t);
  
!       if (t == oldt)
        {
          /* The following re-creates proper variant lists while fixing up
             the variant leaders.  We do not stream TYPE_NEXT_VARIANT so the
--- 622,634 ----
        /* First fixup the fields of T.  */
        lto_fixup_types (t);
  
!       if (TYPE_P (t))
!       /* Now try to find a canonical variant of T itself.  */
!       t = gimple_register_type (t);
!       else
!       t = uniquify_top (t);
  
!       if (t == oldt && TYPE_P (t))
        {
          /* The following re-creates proper variant lists while fixing up
             the variant leaders.  We do not stream TYPE_NEXT_VARIANT so the
*************** uniquify_nodes (struct data_in *data_in,
*** 609,619 ****
              TYPE_REFERENCE_TO (TREE_TYPE (t)) = t;
            }
        }
- 
        else
        {
          if (RECORD_OR_UNION_TYPE_P (t))
            {
              tree f1, f2;
              if (TYPE_FIELDS (t) != TYPE_FIELDS (oldt))
                for (f1 = TYPE_FIELDS (t), f2 = TYPE_FIELDS (oldt);
--- 691,705 ----
              TYPE_REFERENCE_TO (TREE_TYPE (t)) = t;
            }
        }
        else
        {
          if (RECORD_OR_UNION_TYPE_P (t))
            {
+             /* We could record FIELD_DECLs either in top_decls,
+                and uniquify them in uniquify_top, or we can simply replace
+                all FIELD_DECLs in one go if we found an already registered
+                type.  The latter is faster and doesn't waste space in the
+                hash table.  */
              tree f1, f2;
              if (TYPE_FIELDS (t) != TYPE_FIELDS (oldt))
                for (f1 = TYPE_FIELDS (t), f2 = TYPE_FIELDS (oldt);
*************** uniquify_nodes (struct data_in *data_in,
*** 656,663 ****
    for (i = len; i-- > from;)
      {
        tree t = VEC_index (tree, cache->nodes, i);
!       if (!t
!         || !TYPE_P (t))
        continue;
  
        if (!TYPE_CANONICAL (t))
--- 742,748 ----
    for (i = len; i-- > from;)
      {
        tree t = VEC_index (tree, cache->nodes, i);
!       if (!t || !TYPE_P (t))
        continue;
  
        if (!TYPE_CANONICAL (t))
*************** read_cgraph_and_symbols (unsigned nfiles
*** 2301,2306 ****
--- 2386,2392 ----
        gcc_assert (num_objects == nfiles);
      }
  
+   top_decls = htab_create_ggc (101, simple_tree_hash, simple_tree_eq, NULL);
    tree_with_vars = htab_create_ggc (101, htab_hash_pointer, htab_eq_pointer,
                                    NULL);
  
*************** read_cgraph_and_symbols (unsigned nfiles
*** 2340,2345 ****
--- 2426,2433 ----
    lto_stats.num_input_files = count;
    ggc_free(decl_data);
    real_file_decl_data = NULL;
+   htab_delete (top_decls);
+   top_decls = NULL;
  
    if (resolution_file_name)
      fclose (resolution);

Reply via email to