> Am 30.11.2024 um 10:53 schrieb Jakub Jelinek <ja...@redhat.com>:
>
> Hi!
>
> The following patch handles VECTOR_TYPE_P CONSTRUCTORs in
> count_nonzero_bytes, including handling them if they have some elements
> non-constant.
> If there are still some constant elements before it (in the range queried),
> we derive info at least from those bytes and consider the rest as unknown.
>
> The first 3 hunks just punt in IMHO problematic cases, the spaghetti code
> considers byte_size 0 as unknown size, determine yourself, so if offset
> is equal to exp size, there are 0 bytes to consider (so nothing useful
> to determine), but using byte_size 0 would mean use any size.
> Similarly, native_encode_expr uses int type for offset (and size), so
> padding it offset larger than INT_MAX could be silent miscompilation.
>
> I've guarded the test to just a couple of targets known to handle it,
> because e.g. on ia32 without -msse forwprop1 seems to lower the CONSTRUCTOR
> into 4 BIT_FIELD_REF stores and I haven't figured out on what exactly
> that depends on (e.g. powerpc* is fine on any CPUs, even with -mno-altivec
> -mno-vsx, even -m32).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok
Thanks,
Richard
> 2024-11-30 Jakub Jelinek <ja...@redhat.com>
>
> PR tree-optimization/117057
> * tree-ssa-strlen.cc (strlen_pass::count_nonzero_bytes): Punt also
> when byte_size is equal to offset or nchars. Punt if offset is bigger
> than INT_MAX. Handle vector CONSTRUCTOR with some elements constant,
> possibly followed by non-constant.
>
> * gcc.dg/strlenopt-32.c: Remove xfail and vect_slp_v2qi_store_unalign
> specific scan-tree-dump-times directive.
> * gcc.dg/strlenopt-96.c: New test.
>
> --- gcc/tree-ssa-strlen.cc.jj 2024-11-23 13:00:31.476982808 +0100
> +++ gcc/tree-ssa-strlen.cc 2024-11-29 22:53:17.465837084 +0100
> @@ -4629,7 +4629,7 @@ strlen_pass::count_nonzero_bytes (tree e
> return false;
>
> unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
> - if (byte_size < offset)
> + if (byte_size <= offset)
> return false;
>
> nbytes = byte_size - offset;
> @@ -4682,7 +4682,7 @@ strlen_pass::count_nonzero_bytes (tree e
> if (TREE_CODE (exp) == STRING_CST)
> {
> unsigned nchars = TREE_STRING_LENGTH (exp);
> - if (nchars < offset)
> + if (nchars <= offset)
> return false;
>
> if (!nbytes)
> @@ -4700,7 +4700,7 @@ strlen_pass::count_nonzero_bytes (tree e
> unsigned char buf[256];
> if (!prep)
> {
> - if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
> + if (CHAR_BIT != 8 || BITS_PER_UNIT != 8 || offset > INT_MAX)
> return false;
> /* If the pointer to representation hasn't been set above
> for STRING_CST point it at the buffer. */
> @@ -4710,11 +4710,75 @@ strlen_pass::count_nonzero_bytes (tree e
> unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
> if (repsize < nbytes)
> {
> - /* This should only happen when REPSIZE is zero because EXP
> - doesn't denote an object with a known initializer, except
> - perhaps when the reference reads past its end. */
> - lenrange[0] = 0;
> - prep = NULL;
> + /* Handle vector { 0x12345678, 0x23003412, x_1(D), y_2(D) }
> + and similar cases. Even when not all the elements are constant,
> + we can perhaps figure out something from the constant ones
> + and assume the others can be anything. */
> + if (TREE_CODE (exp) == CONSTRUCTOR
> + && CONSTRUCTOR_NELTS (exp)
> + && VECTOR_TYPE_P (TREE_TYPE (exp))
> + && nbytes <= sizeof buf)
> + {
> + tree v0 = CONSTRUCTOR_ELT (exp, 0)->value;
> + unsigned HOST_WIDE_INT elt_sz
> + = int_size_in_bytes (TREE_TYPE (v0));
> + unsigned int i, s = 0;
> + tree v, idx;
> + FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (exp), i, idx, v)
> + {
> + if (idx
> + && (VECTOR_TYPE_P (TREE_TYPE (v0))
> + || !tree_fits_uhwi_p (idx)
> + || tree_to_uhwi (idx) != i))
> + {
> + s = 0;
> + break;
> + }
> + if ((i + 1) * elt_sz <= offset)
> + continue;
> + unsigned int o = 0;
> + if (i * elt_sz < offset)
> + o = offset % elt_sz;
> + repsize = native_encode_expr (v, buf + s,
> + sizeof (buf) - s, o);
> + if (repsize != elt_sz - o)
> + break;
> + s += repsize;
> + }
> + if (s != 0 && s < nbytes)
> + {
> + unsigned HOST_WIDE_INT n = strnlen (prep, s);
> + if (n < lenrange[0])
> + lenrange[0] = n;
> + if (lenrange[1] < n && n != s)
> + lenrange[1] = n;
> + if (lenrange[2] < nbytes)
> + lenrange[2] = nbytes;
> + /* We haven't processed all bytes, the rest are unknown.
> + So, clear NULTERM if none of the initial bytes are
> + zero, and clear ALLNUL and ALLNONNULL because we don't
> + know about the remaining bytes. */
> + if (n == s)
> + *nulterm = false;
> + *allnul = false;
> + *allnonnul = false;
> + return true;
> + }
> + else if (s != nbytes)
> + {
> + /* See below. */
> + lenrange[0] = 0;
> + prep = NULL;
> + }
> + }
> + else
> + {
> + /* This should only happen when REPSIZE is zero because EXP
> + doesn't denote an object with a known initializer, except
> + perhaps when the reference reads past its end. */
> + lenrange[0] = 0;
> + prep = NULL;
> + }
> }
> else if (!nbytes)
> nbytes = repsize;
> --- gcc/testsuite/gcc.dg/strlenopt-32.c.jj 2024-10-11 11:30:38.865389752
> +0200
> +++ gcc/testsuite/gcc.dg/strlenopt-32.c 2024-11-29 15:52:20.296109588 +0100
> @@ -190,5 +190,4 @@ main ()
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" { xfail
> vect_slp_v2qi_store_unalign } } } */
> -/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen1" { target
> vect_slp_v2qi_store_unalign } } } */
> +/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" } } */
> --- gcc/testsuite/gcc.dg/strlenopt-96.c.jj 2024-11-29 15:50:37.233571779
> +0100
> +++ gcc/testsuite/gcc.dg/strlenopt-96.c 2024-11-30 01:45:29.638067059 +0100
> @@ -0,0 +1,42 @@
> +/* PR tree-optimization/117057 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-additional-options "-msse" { target { i?86-*-* x86_64-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "return \[0-9\]*;" 4 "optimized" {
> target i?86-*-* x86_64-*-* aarch64*-*-* powerpc*-*-* } } } */
> +
> +#include "strlenopt.h"
> +
> +typedef unsigned int V __attribute__((vector_size (2 * sizeof (int))));
> +typedef unsigned int W __attribute__((vector_size (4 * sizeof (int))));
> +
> +size_t
> +foo (void)
> +{
> + char a[64];
> + *(long long *) a = 0x12003456789abcdeULL;
> + return strlen (a);
> +}
> +
> +size_t
> +bar (void)
> +{
> + char a[64];
> + *(V *) a = (V) { 0x12345678U, 0x9a00bcdeU };
> + return strlen (a);
> +}
> +
> +size_t
> +baz (unsigned int x)
> +{
> + char a[64];
> + *(V *) a = (V) { 0x12005678U, x };
> + return strlen (a);
> +}
> +
> +size_t
> +qux (unsigned int x)
> +{
> + char a[64];
> + *(W *)a = (W) { 0x12345678U, 0x9abcdef0U, 0x12005678U, x };
> + return strlen (a);
> +}
>
> Jakub
>