On Wed, Nov 10, 2021 at 12:31:29AM +0530, Siddhesh Poyarekar wrote:
> * tree-object-size.h (compute_builtin_object_size): Return tree
> instead of HOST_WIDE_INT.
> * builtins.c (fold_builtin_object_size): Adjust.
> * gimple-fold.c (gimple_fold_builtin_strncat): Likewise.
> * ubsan.c (instrument_object_size): Likewise.
> * tree-object-size.c (object_sizes): Change type to vec<tree>.
> (initval): New function.
> (unknown): Use it.
> (size_unknown_p, size_initval, size_unknown): New functions.
> (object_sizes_unknown_p): Use it.
> (object_sizes_get): Return tree.
> (object_sizes_initialize): Rename from object_sizes_set_force
> and set VAL parameter type as tree.
> (object_sizes_set): Set VAL parameter type as tree and adjust
> implementation.
> (size_for_offset): New function.
> (addr_object_size): Change PSIZE parameter to tree and adjust
> implementation.
> (alloc_object_size): Return tree.
> (compute_builtin_object_size): Return tree in PSIZE.
> (expr_object_size, call_object_size, unknown_object_size):
> Adjust for object_sizes_set change.
> (merge_object_sizes): Set OFFSET type to tree and adjust
> implementation.
> (plus_stmt_object_size, cond_expr_object_size,
> collect_object_sizes_for, check_for_plus_in_loops_1): Adjust for
> change of type from HOST_WIDE_INT to tree.
> (object_sizes_execute): Adjust for type change to tree.
> --- a/gcc/builtins.c
> +++ b/gcc/builtins.c
> @@ -10226,7 +10226,7 @@ maybe_emit_sprintf_chk_warning (tree exp, enum
> built_in_function fcode)
> static tree
> fold_builtin_object_size (tree ptr, tree ost)
> {
> - unsigned HOST_WIDE_INT bytes;
> + tree bytes;
> int object_size_type;
>
> if (!validate_arg (ptr, POINTER_TYPE)
> @@ -10251,17 +10251,15 @@ fold_builtin_object_size (tree ptr, tree ost)
> if (TREE_CODE (ptr) == ADDR_EXPR)
> {
> compute_builtin_object_size (ptr, object_size_type, &bytes);
> - if (wi::fits_to_tree_p (bytes, size_type_node))
> - return build_int_cstu (size_type_node, bytes);
> + return fold_convert (size_type_node, bytes);
> }
> else if (TREE_CODE (ptr) == SSA_NAME)
> {
> /* If object size is not known yet, delay folding until
> later. Maybe subsequent passes will help determining
> it. */
> - if (compute_builtin_object_size (ptr, object_size_type, &bytes)
> - && wi::fits_to_tree_p (bytes, size_type_node))
> - return build_int_cstu (size_type_node, bytes);
> + if (compute_builtin_object_size (ptr, object_size_type, &bytes))
> + return fold_convert (size_type_node, bytes);
Neither of these are equivalent to what it used to do before.
If some target has e.g. pointers wider than size_t, then previously we could
compute bytes that doesn't fit into size_t and would return NULL which
eventually would result in object_sizes_execute or expand_builtin_object_size
resolving it to the unknown value. But fold_convert will perform modulo
arithmetics on it, so say if size_t is 24-bit and bytes is 0x1000001, it
will fold to (size_t) 1. I think we should still punt if it doesn't fit.
Or do we ensure that what it returns always has sizetype type?
> @@ -2486,13 +2486,14 @@ gimple_fold_builtin_strncat (gimple_stmt_iterator
> *gsi)
> if (cmpsrc < 0)
> return false;
>
> - unsigned HOST_WIDE_INT dstsize;
> + tree dstsize;
>
> bool nowarn = warning_suppressed_p (stmt, OPT_Wstringop_overflow_);
>
> - if (!nowarn && compute_builtin_object_size (dst, 1, &dstsize))
> + if (!nowarn && compute_builtin_object_size (dst, 1, &dstsize)
> + && tree_fits_uhwi_p (dstsize))
> {
> - int cmpdst = compare_tree_int (len, dstsize);
> + int cmpdst = compare_tree_int (len, tree_to_uhwi (dstsize));
Why jump from tree to UHWI and back?
Check that TREE_CODE (dstsize) == INTEGER_CST and do
tree_int_cst_compare instead?
>
> if (cmpdst >= 0)
> {
> @@ -2509,7 +2510,7 @@ gimple_fold_builtin_strncat (gimple_stmt_iterator *gsi)
> "destination size")
> : G_("%qD specified bound %E exceeds "
> "destination size %wu"),
> - fndecl, len, dstsize);
> + fndecl, len, tree_to_uhwi (dstsize));
Similarly, use %E and pass dstsize to it.
> +/* Initial value of object sizes; zero for maximum and SIZE_MAX for minimum
> + object size. */
> +
> +static inline unsigned HOST_WIDE_INT
> +initval (int object_size_type)
> +{
> + return (unsigned HOST_WIDE_INT) -popcount_hwi (object_size_type
> + & OST_MINIMUM);
This makes the code quite unreadable and on some arches it will be also
much worse (popcount is a libcall on many, and even if not, it is inline
function using a builtin inside of it). Can't you use a simple readable
return (object_size_type & OST_MINUMUM) ? HOST_WIDE_INT_M1U : 0;
and let the compiler optimize as much as it wants?
?
> +/* Bytes at end of the object with SZ from offset OFFSET. */
> +
> +static tree
> +size_for_offset (tree sz, tree offset)
> +{
> + return size_binop (MINUS_EXPR, size_binop (MAX_EXPR, sz, offset), offset);
> +}
>
> @@ -314,27 +356,22 @@ addr_object_size (struct object_size_info *osi,
> const_tree ptr,
> SSA_NAME_VERSION (var)))
> sz = object_sizes_get (osi, SSA_NAME_VERSION (var));
> else
> - sz = unknown (object_size_type);
> + sz = size_unknown (object_size_type);
> }
> - if (sz != unknown (object_size_type))
> + if (!size_unknown_p (sz, object_size_type))
> {
> - offset_int mem_offset;
> - if (mem_ref_offset (pt_var).is_constant (&mem_offset))
> - {
> - offset_int dsz = wi::sub (sz, mem_offset);
> - if (wi::neg_p (dsz))
> - sz = 0;
> - else if (wi::fits_uhwi_p (dsz))
> - sz = dsz.to_uhwi ();
> - else
> - sz = unknown (object_size_type);
> - }
> + tree offset = TREE_OPERAND (pt_var, 1);
> + if (TREE_CODE (offset) != INTEGER_CST
> + || TREE_CODE (sz) != INTEGER_CST)
> + sz = size_unknown (object_size_type);
> else
> - sz = unknown (object_size_type);
> + sz = size_for_offset (sz, offset);
This doesn't match what the code did and I'm surprised if it works at all.
TREE_OPERAND (pt_var, 1), while it is an INTEGER_CST or POLY_INT_CST,
has in its type encoded the type for aliasing, so the type is some pointer
type. Performing size_binop etc. on such values can misbehave, the code
assumes that it is sizetype or at least some integral type compatible with
it.Also, mem_ref_offset is signed and offset_int has bigger precision
than pointers on the target such that it can be always signed. So
e.g. if MEM_REF's second operand is bigger or equal than half of the
address space, in the old code it would appear to be negative, wi::sub would
result in a value bigger than sz. Not really sure right now if that is
exactly how we want to treat it, would be nice to try some testcase.
In any case, size_for_offset should be called with the offset converted
to sizetype if it does size_binop on it.
> + reexamine |= merge_object_sizes (osi, var, then_, size_int (0));
size_zero_node ?
> else
> expr_object_size (osi, var, then_);
>
> @@ -952,7 +970,7 @@ cond_expr_object_size (struct object_size_info *osi, tree
> var, gimple *stmt)
> return reexamine;
>
> if (TREE_CODE (else_) == SSA_NAME)
> - reexamine |= merge_object_sizes (osi, var, else_, 0);
> + reexamine |= merge_object_sizes (osi, var, else_, size_int (0));
Likewise. Many times.
> - unsigned HOST_WIDE_INT size;
> - if (compute_builtin_object_size (base_addr, 0, &size))
> - sizet = build_int_cst (sizetype, size);
> - else if (optimize)
I'd prefer not to reindent it to avoid useless churn, just do
if (compute_builtin_object_size (base_addr, 0, &sizet))
;
else if (optimize)
...
Jakub