https://gcc.gnu.org/g:8152f1f549179b377634b7ec360e6907fdd528c1
commit 8152f1f549179b377634b7ec360e6907fdd528c1 Author: Alexandre Oliva <ol...@gnu.org> Date: Wed Aug 14 21:59:28 2024 -0300 optimize initialization of small padded objects Diff: --- gcc/expr.cc | 20 +++++++++++++------- gcc/expr.h | 3 ++- gcc/fold-const.cc | 33 +++++++++++++++++++++++++++++++++ gcc/gimple-fold.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/gimplify.cc | 14 +++++++++++++- 5 files changed, 111 insertions(+), 9 deletions(-) diff --git a/gcc/expr.cc b/gcc/expr.cc index 2089c2b86a98..a701c67b3485 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -7096,7 +7096,7 @@ count_type_elements (const_tree type, bool for_ctor_p) static bool categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, HOST_WIDE_INT *p_unique_nz_elts, - HOST_WIDE_INT *p_init_elts, bool *p_complete) + HOST_WIDE_INT *p_init_elts, int *p_complete) { unsigned HOST_WIDE_INT idx; HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields; @@ -7218,7 +7218,10 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor), num_fields, elt_type)) - *p_complete = false; + *p_complete = 0; + else if (*p_complete > 0 + && type_has_padding_at_level_p (TREE_TYPE (ctor))) + *p_complete = -1; *p_nz_elts += nz_elts; *p_unique_nz_elts += unique_nz_elts; @@ -7239,7 +7242,10 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, and place it in *P_ELT_COUNT. * whether the constructor is complete -- in the sense that every meaningful byte is explicitly given a value -- - and place it in *P_COMPLETE. + and place it in *P_COMPLETE: + - 0 if any field is missing + - 1 if all fields are initialized, and there's no padding + - -1 if all fields are initialized, but there's padding Return whether or not CTOR is a valid static constant initializer, the same as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */ @@ -7247,12 +7253,12 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, bool categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts, HOST_WIDE_INT *p_unique_nz_elts, - HOST_WIDE_INT *p_init_elts, bool *p_complete) + HOST_WIDE_INT *p_init_elts, int *p_complete) { *p_nz_elts = 0; *p_unique_nz_elts = 0; *p_init_elts = 0; - *p_complete = true; + *p_complete = 1; return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts, p_init_elts, p_complete); @@ -7313,7 +7319,7 @@ mostly_zeros_p (const_tree exp) if (TREE_CODE (exp) == CONSTRUCTOR) { HOST_WIDE_INT nz_elts, unz_elts, init_elts; - bool complete_p; + int complete_p; categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts, &complete_p); @@ -7331,7 +7337,7 @@ all_zeros_p (const_tree exp) if (TREE_CODE (exp) == CONSTRUCTOR) { HOST_WIDE_INT nz_elts, unz_elts, init_elts; - bool complete_p; + int complete_p; categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts, &complete_p); diff --git a/gcc/expr.h b/gcc/expr.h index 533ae0af3871..04782b15f192 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -361,7 +361,8 @@ extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree); extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *, HOST_WIDE_INT *, - bool *); + int *); +extern bool type_has_padding_at_level_p (tree); extern bool immediate_const_ctor_p (const_tree, unsigned int words = 1); extern void store_constructor (tree, rtx, int, poly_int64, bool); extern HOST_WIDE_INT int_expr_size (const_tree exp); diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 8908e7381e72..5e7fd6460c5d 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -8193,6 +8193,36 @@ native_encode_string (const_tree expr, unsigned char *ptr, int len, int off) return len; } +/* subroutine of native_encode_expr. Encode the CONSTRUCTOR + specified by EXPR into the buffer PTR of length LEN bytes. + Return the number of bytes placed in the buffer, or zero + upon failure. */ + +static int +native_encode_constructor (const_tree expr, unsigned char *ptr, int len, int off) +{ + /* We are only concerned with zero-initialization constructors here. */ + if (CONSTRUCTOR_NELTS (expr)) + return 0; + + /* Wide-char strings are encoded in target byte-order so native + encoding them is trivial. */ + if (BITS_PER_UNIT != CHAR_BIT + || !tree_fits_shwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr)))) + return 0; + + HOST_WIDE_INT total_bytes = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (expr))); + if ((off == -1 && total_bytes > len) || off >= total_bytes) + return 0; + if (off == -1) + off = 0; + len = MIN (total_bytes - off, len); + if (ptr == NULL) + /* Dry run. */; + else + memset (ptr, 0, len); + return len; +} /* Subroutine of fold_view_convert_expr. Encode the INTEGER_CST, REAL_CST, FIXED_CST, COMPLEX_CST, STRING_CST, or VECTOR_CST specified by EXPR into @@ -8229,6 +8259,9 @@ native_encode_expr (const_tree expr, unsigned char *ptr, int len, int off) case STRING_CST: return native_encode_string (expr, ptr, len, off); + case CONSTRUCTOR: + return native_encode_constructor (expr, ptr, len, off); + default: return 0; } diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 18d7a6b176db..23ae40635a04 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -4661,6 +4661,56 @@ clear_padding_type_may_have_padding_p (tree type) } } +/* Return true if TYPE has padding bits aside from those in fields, + elements, etc. */ + +bool +type_has_padding_at_level_p (tree type) +{ + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + { + tree bitpos = size_zero_node; + /* Expect fields to be sorted by bit position. */ + for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + { + if (DECL_PADDING_P (f)) + return true; + tree pos = bit_position (f); + if (simple_cst_equal (bitpos, pos) != 1) + return true; + if (!DECL_SIZE (f)) + return true; + bitpos = int_const_binop (PLUS_EXPR, pos, DECL_SIZE (f)); + } + if (simple_cst_equal (bitpos, TYPE_SIZE (type)) != 1) + return true; + return false; + } + case UNION_TYPE: + /* If any of the fields is smaller than the whole, there is padding. */ + for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)), + TREE_TYPE (type)) != 1) + return true; + return false; + case ARRAY_TYPE: + case COMPLEX_TYPE: + case VECTOR_TYPE: + /* No recursing here, no padding at this level. */ + return false; + case REAL_TYPE: + return clear_padding_real_needs_padding_p (CONST_CAST_TREE (type)); + case BITINT_TYPE: + return clear_padding_bitint_needs_padding_p (CONST_CAST_TREE (type)); + default: + return false; + } +} + /* Emit a runtime loop: for (; buf.base != end; buf.base += sz) __builtin_clear_padding (buf.base); */ diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 26a216e151d6..354a6f53a721 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -5564,7 +5564,8 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, struct gimplify_init_ctor_preeval_data preeval_data; HOST_WIDE_INT num_ctor_elements, num_nonzero_elements; HOST_WIDE_INT num_unique_nonzero_elements; - bool complete_p, valid_const_initializer; + int complete_p; + bool valid_const_initializer; /* Aggregate types must lower constructors to initialization of individual elements. The exception is that a CONSTRUCTOR node @@ -5668,6 +5669,17 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, /* If a single access to the target must be ensured and all elements are zero, then it's optimal to clear whatever their number. */ cleared = true; + /* If the object is small enough to go in registers, and it's + not required to be constructed in memory, clear it first. + That will avoid wasting cycles preserving any padding bits + that might be there, and if there aren't any, the compiler + is smart enough to optimize the clearing out. */ + else if (complete_p <= 0 + && !TREE_ADDRESSABLE (ctor) && !TREE_THIS_VOLATILE (object) + && (TYPE_MODE (type) != BLKmode || TYPE_NO_FORCE_BLK (type)) + && (opt_for_fn (cfun->decl, optimize) + || opt_for_fn (cfun->decl, optimize_size))) + cleared = true; else cleared = false;