https://gcc.gnu.org/g:8152f1f549179b377634b7ec360e6907fdd528c1

commit 8152f1f549179b377634b7ec360e6907fdd528c1
Author: Alexandre Oliva <ol...@gnu.org>
Date:   Wed Aug 14 21:59:28 2024 -0300

    optimize initialization of small padded objects

Diff:
---
 gcc/expr.cc        | 20 +++++++++++++-------
 gcc/expr.h         |  3 ++-
 gcc/fold-const.cc  | 33 +++++++++++++++++++++++++++++++++
 gcc/gimple-fold.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/gimplify.cc    | 14 +++++++++++++-
 5 files changed, 111 insertions(+), 9 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 2089c2b86a98..a701c67b3485 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7096,7 +7096,7 @@ count_type_elements (const_tree type, bool for_ctor_p)
 static bool
 categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
                            HOST_WIDE_INT *p_unique_nz_elts,
-                           HOST_WIDE_INT *p_init_elts, bool *p_complete)
+                           HOST_WIDE_INT *p_init_elts, int *p_complete)
 {
   unsigned HOST_WIDE_INT idx;
   HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
@@ -7218,7 +7218,10 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
 
   if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor),
                                                num_fields, elt_type))
-    *p_complete = false;
+    *p_complete = 0;
+  else if (*p_complete > 0
+          && type_has_padding_at_level_p (TREE_TYPE (ctor)))
+    *p_complete = -1;
 
   *p_nz_elts += nz_elts;
   *p_unique_nz_elts += unique_nz_elts;
@@ -7239,7 +7242,10 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
      and place it in *P_ELT_COUNT.
    * whether the constructor is complete -- in the sense that every
      meaningful byte is explicitly given a value --
-     and place it in *P_COMPLETE.
+     and place it in *P_COMPLETE:
+     -  0 if any field is missing
+     -  1 if all fields are initialized, and there's no padding
+     - -1 if all fields are initialized, but there's padding
 
    Return whether or not CTOR is a valid static constant initializer, the same
    as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0".  */
@@ -7247,12 +7253,12 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
 bool
 categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
                          HOST_WIDE_INT *p_unique_nz_elts,
-                         HOST_WIDE_INT *p_init_elts, bool *p_complete)
+                         HOST_WIDE_INT *p_init_elts, int *p_complete)
 {
   *p_nz_elts = 0;
   *p_unique_nz_elts = 0;
   *p_init_elts = 0;
-  *p_complete = true;
+  *p_complete = 1;
 
   return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts,
                                     p_init_elts, p_complete);
@@ -7313,7 +7319,7 @@ mostly_zeros_p (const_tree exp)
   if (TREE_CODE (exp) == CONSTRUCTOR)
     {
       HOST_WIDE_INT nz_elts, unz_elts, init_elts;
-      bool complete_p;
+      int complete_p;
 
       categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
                                &complete_p);
@@ -7331,7 +7337,7 @@ all_zeros_p (const_tree exp)
   if (TREE_CODE (exp) == CONSTRUCTOR)
     {
       HOST_WIDE_INT nz_elts, unz_elts, init_elts;
-      bool complete_p;
+      int complete_p;
 
       categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
                                &complete_p);
diff --git a/gcc/expr.h b/gcc/expr.h
index 533ae0af3871..04782b15f192 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -361,7 +361,8 @@ extern unsigned HOST_WIDE_INT highest_pow2_factor 
(const_tree);
 
 extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
                                      HOST_WIDE_INT *, HOST_WIDE_INT *,
-                                     bool *);
+                                     int *);
+extern bool type_has_padding_at_level_p (tree);
 extern bool immediate_const_ctor_p (const_tree, unsigned int words = 1);
 extern void store_constructor (tree, rtx, int, poly_int64, bool);
 extern HOST_WIDE_INT int_expr_size (const_tree exp);
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 8908e7381e72..5e7fd6460c5d 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -8193,6 +8193,36 @@ native_encode_string (const_tree expr, unsigned char 
*ptr, int len, int off)
   return len;
 }
 
+/* subroutine of native_encode_expr.  Encode the CONSTRUCTOR
+   specified by EXPR into the buffer PTR of length LEN bytes.
+   Return the number of bytes placed in the buffer, or zero
+   upon failure.  */
+
+static int
+native_encode_constructor (const_tree expr, unsigned char *ptr, int len, int 
off)
+{
+  /* We are only concerned with zero-initialization constructors here.  */
+  if (CONSTRUCTOR_NELTS (expr))
+    return 0;
+
+  /* Wide-char strings are encoded in target byte-order so native
+     encoding them is trivial.  */
+  if (BITS_PER_UNIT != CHAR_BIT
+      || !tree_fits_shwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))))
+    return 0;
+
+  HOST_WIDE_INT total_bytes = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
+  if ((off == -1 && total_bytes > len) || off >= total_bytes)
+    return 0;
+  if (off == -1)
+    off = 0;
+  len = MIN (total_bytes - off, len);
+  if (ptr == NULL)
+    /* Dry run.  */;
+  else
+    memset (ptr, 0, len);
+  return len;
+}
 
 /* Subroutine of fold_view_convert_expr.  Encode the INTEGER_CST, REAL_CST,
    FIXED_CST, COMPLEX_CST, STRING_CST, or VECTOR_CST specified by EXPR into
@@ -8229,6 +8259,9 @@ native_encode_expr (const_tree expr, unsigned char *ptr, 
int len, int off)
     case STRING_CST:
       return native_encode_string (expr, ptr, len, off);
 
+    case CONSTRUCTOR:
+      return native_encode_constructor (expr, ptr, len, off);
+
     default:
       return 0;
     }
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 18d7a6b176db..23ae40635a04 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -4661,6 +4661,56 @@ clear_padding_type_may_have_padding_p (tree type)
     }
 }
 
+/* Return true if TYPE has padding bits aside from those in fields,
+   elements, etc.  */
+
+bool
+type_has_padding_at_level_p (tree type)
+{
+  switch (TREE_CODE (type))
+    {
+    case RECORD_TYPE:
+      {
+       tree bitpos = size_zero_node;
+       /* Expect fields to be sorted by bit position.  */
+       for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+         if (TREE_CODE (f) == FIELD_DECL)
+           {
+             if (DECL_PADDING_P (f))
+               return true;
+             tree pos = bit_position (f);
+             if (simple_cst_equal (bitpos, pos) != 1)
+               return true;
+             if (!DECL_SIZE (f))
+               return true;
+             bitpos = int_const_binop (PLUS_EXPR, pos, DECL_SIZE (f));
+           }
+       if (simple_cst_equal (bitpos, TYPE_SIZE (type)) != 1)
+         return true;
+       return false;
+      }
+    case UNION_TYPE:
+      /* If any of the fields is smaller than the whole, there is padding.  */
+      for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+       if (TREE_CODE (f) == FIELD_DECL)
+         if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)),
+                               TREE_TYPE (type)) != 1)
+           return true;
+      return false;
+    case ARRAY_TYPE:
+    case COMPLEX_TYPE:
+    case VECTOR_TYPE:
+      /* No recursing here, no padding at this level.  */
+      return false;
+    case REAL_TYPE:
+      return clear_padding_real_needs_padding_p (CONST_CAST_TREE (type));
+    case BITINT_TYPE:
+      return clear_padding_bitint_needs_padding_p (CONST_CAST_TREE (type));
+    default:
+      return false;
+    }
+}
+
 /* Emit a runtime loop:
    for (; buf.base != end; buf.base += sz)
      __builtin_clear_padding (buf.base);  */
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 26a216e151d6..354a6f53a721 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -5564,7 +5564,8 @@ gimplify_init_constructor (tree *expr_p, gimple_seq 
*pre_p, gimple_seq *post_p,
        struct gimplify_init_ctor_preeval_data preeval_data;
        HOST_WIDE_INT num_ctor_elements, num_nonzero_elements;
        HOST_WIDE_INT num_unique_nonzero_elements;
-       bool complete_p, valid_const_initializer;
+       int complete_p;
+       bool valid_const_initializer;
 
        /* Aggregate types must lower constructors to initialization of
           individual elements.  The exception is that a CONSTRUCTOR node
@@ -5668,6 +5669,17 @@ gimplify_init_constructor (tree *expr_p, gimple_seq 
*pre_p, gimple_seq *post_p,
          /* If a single access to the target must be ensured and all elements
             are zero, then it's optimal to clear whatever their number.  */
          cleared = true;
+       /* If the object is small enough to go in registers, and it's
+          not required to be constructed in memory, clear it first.
+          That will avoid wasting cycles preserving any padding bits
+          that might be there, and if there aren't any, the compiler
+          is smart enough to optimize the clearing out.  */
+       else if (complete_p <= 0
+                && !TREE_ADDRESSABLE (ctor) && !TREE_THIS_VOLATILE (object)
+                && (TYPE_MODE (type) != BLKmode || TYPE_NO_FORCE_BLK (type))
+                && (opt_for_fn (cfun->decl, optimize)
+                    || opt_for_fn (cfun->decl, optimize_size)))
+         cleared = true;
        else
          cleared = false;

Reply via email to