On December 18, 2020 9:41:24 PM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >constant_byte_string now uses a convert_to_bytes function, which >doesn't >handle bitfields at all (don't punt on them, just puts them into wrong >bits >or bytes). Furthermore, I don't see a reason why that function should >exist >at all, it duplicates native_encode_initializer functionality. >Except that native_encode_initializer punted on flexible array members >and 2 >tests in the testsuite relied on constant_byte_string handling those. >So, this patch throws away convert_to_bytes, uses >native_encode_initializer >instead, but teaches it to handle flexible array members (only in the >non-mask mode with off == -1 for now), furthermore, it adds various >corner >case checks that the old implementation was missing (like that >STRING_CSTs >use int as length and therefore we shouldn't try to build larger than >that >strings, or that native_encode*/native_interpret* APIs require sane >host and target bytes (8-bit on both). > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Richard. >2020-12-18 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/98366 > * fold-const.c (native_encode_initializer): Don't try to > memset more than total_bytes with off == -1 even if len is large. > Handle flexible array member initializers if off == -1 and mask is > NULL. > * expr.c (convert_to_bytes): Remove. > (constant_byte_string): Use native_encode_initializer instead of > convert_to_bytes. Remove extraneous semicolon. Punt on various > corner-cases the APIs don't handle, like sizes > INT_MAX, > BITS_PER_UNIT != 8, CHAR_BIT != 8. > > * gcc.c-torture/execute/pr98366.c: New test. > >--- gcc/fold-const.c.jj 2020-12-09 09:36:05.017217418 +0100 >+++ gcc/fold-const.c 2020-12-18 16:25:52.053997090 +0100 >@@ -8197,7 +8197,7 @@ native_encode_initializer (tree init, un > > gcc_assert (TREE_CODE (type) == RECORD_TYPE || mask == NULL); > if (ptr != NULL) >- memset (ptr, '\0', MIN (total_bytes - off, len)); >+ memset (ptr, '\0', MIN (total_bytes - o, len)); > for (cnt = 0; ; cnt++) > { > tree val = NULL_TREE, field = NULL_TREE; >@@ -8266,11 +8266,33 @@ native_encode_initializer (tree init, un > if (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE > && TYPE_DOMAIN (TREE_TYPE (field)) > && ! TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (field)))) >- return 0; >- if (DECL_SIZE_UNIT (field) == NULL_TREE >- || !tree_fits_shwi_p (DECL_SIZE_UNIT (field))) >- return 0; >- fieldsize = tree_to_shwi (DECL_SIZE_UNIT (field)); >+ { >+ if (mask || off != -1) >+ return 0; >+ if (val == NULL_TREE) >+ continue; >+ if (TREE_CODE (TREE_TYPE (val)) != ARRAY_TYPE) >+ return 0; >+ fieldsize = int_size_in_bytes (TREE_TYPE (val)); >+ if (fieldsize < 0 >+ || (int) fieldsize != fieldsize >+ || (pos + fieldsize) > INT_MAX) >+ return 0; >+ if (pos + fieldsize > total_bytes) >+ { >+ if (ptr != NULL && total_bytes - o < len) >+ memset (ptr + (total_bytes - o), >+ '\0', MIN (pos + fieldsize - o, len)); >+ total_bytes = pos + fieldsize; >+ } >+ } >+ else >+ { >+ if (DECL_SIZE_UNIT (field) == NULL_TREE >+ || !tree_fits_shwi_p (DECL_SIZE_UNIT (field))) >+ return 0; >+ fieldsize = tree_to_shwi (DECL_SIZE_UNIT (field)); >+ } > if (fieldsize == 0) > continue; > >@@ -8439,12 +8461,31 @@ native_encode_initializer (tree init, un > || (pos >= off > && (pos + fieldsize <= (HOST_WIDE_INT) off + len))) > { >- if (!native_encode_initializer (val, ptr ? ptr + pos - o >- : NULL, >- fieldsize, >- off == -1 ? -1 : 0, >- mask ? mask + pos : NULL)) >+ int fldsize = fieldsize; >+ if (off == -1) >+ { >+ tree fld = DECL_CHAIN (field); >+ while (fld) >+ { >+ if (TREE_CODE (fld) == FIELD_DECL) >+ break; >+ fld = DECL_CHAIN (fld); >+ } >+ if (fld == NULL_TREE) >+ fldsize = len - pos; >+ } >+ r = native_encode_initializer (val, ptr ? ptr + pos - o >+ : NULL, >+ fldsize, >+ off == -1 ? -1 : 0, >+ mask ? mask + pos : NULL); >+ if (!r) > return 0; >+ if (off == -1 >+ && fldsize != fieldsize >+ && r > fieldsize >+ && pos + r > total_bytes) >+ total_bytes = pos + r; > } > else > { >--- gcc/expr.c.jj 2020-12-11 11:10:08.520613827 +0100 >+++ gcc/expr.c 2020-12-18 16:31:09.841374954 +0100 >@@ -11631,111 +11631,6 @@ is_aligning_offset (const_tree offset, c >return TREE_CODE (offset) == ADDR_EXPR && TREE_OPERAND (offset, 0) == >exp; > } > >-/* If EXPR is a constant initializer (either an expression or >CONSTRUCTOR), >- attempt to obtain its native representation as an array of nonzero >BYTES. >- Return true on success and false on failure (the latter without >modifying >- BYTES). */ >- >-static bool >-convert_to_bytes (tree type, tree expr, vec<unsigned char> *bytes) >-{ >- if (TREE_CODE (expr) == CONSTRUCTOR) >- { >- /* Set to the size of the CONSTRUCTOR elements. */ >- unsigned HOST_WIDE_INT ctor_size = bytes->length (); >- >- if (TREE_CODE (type) == ARRAY_TYPE) >- { >- tree val, idx; >- tree eltype = TREE_TYPE (type); >- unsigned HOST_WIDE_INT elsize = >- tree_to_uhwi (TYPE_SIZE_UNIT (eltype)); >- >- /* Jump through hoops to determine the lower bound for languages >- like Ada that can set it to an (almost) arbitrary value. */ >- tree dom = TYPE_DOMAIN (type); >- if (!dom) >- return false; >- tree min = TYPE_MIN_VALUE (dom); >- if (!min || !tree_fits_uhwi_p (min)) >- return false; >- unsigned HOST_WIDE_INT i, last_idx = tree_to_uhwi (min) - 1; >- FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (expr), i, idx, val) >- { >- /* Append zeros for elements with no initializers. */ >- if (!tree_fits_uhwi_p (idx)) >- return false; >- unsigned HOST_WIDE_INT cur_idx = tree_to_uhwi (idx); >- if (unsigned HOST_WIDE_INT size = cur_idx - (last_idx + 1)) >- { >- size = size * elsize + bytes->length (); >- bytes->safe_grow_cleared (size, true); >- } >- >- if (!convert_to_bytes (eltype, val, bytes)) >- return false; >- >- last_idx = cur_idx; >- } >- } >- else if (TREE_CODE (type) == RECORD_TYPE) >- { >- tree val, fld; >- unsigned HOST_WIDE_INT i; >- FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (expr), i, fld, val) >- { >- /* Append zeros for members with no initializers and >- any padding. */ >- unsigned HOST_WIDE_INT cur_off = int_byte_position (fld); >- if (bytes->length () < cur_off) >- bytes->safe_grow_cleared (cur_off, true); >- >- if (!convert_to_bytes (TREE_TYPE (val), val, bytes)) >- return false; >- } >- } >- else >- return false; >- >- /* Compute the size of the COSNTRUCTOR elements. */ >- ctor_size = bytes->length () - ctor_size; >- >- /* Append zeros to the byte vector to the full size of the type. >- The type size can be less than the size of the CONSTRUCTOR >- if the latter contains initializers for a flexible array >- member. */ >- tree size = TYPE_SIZE_UNIT (type); >- unsigned HOST_WIDE_INT type_size = tree_to_uhwi (size); >- if (ctor_size < type_size) >- if (unsigned HOST_WIDE_INT size_grow = type_size - ctor_size) >- bytes->safe_grow_cleared (bytes->length () + size_grow, true); >- >- return true; >- } >- >- /* Except for RECORD_TYPE which may have an initialized flexible >array >- member, the size of a type is the same as the size of the >initializer >- (including any implicitly zeroed out members and padding). >Allocate >- just enough for that many bytes. */ >- tree expr_size = TYPE_SIZE_UNIT (TREE_TYPE (expr)); >- if (!expr_size || !tree_fits_uhwi_p (expr_size)) >- return false; >- const unsigned HOST_WIDE_INT expr_bytes = tree_to_uhwi (expr_size); >- const unsigned bytes_sofar = bytes->length (); >- /* native_encode_expr can convert at most INT_MAX bytes. vec is >limited >- to at most UINT_MAX. */ >- if (bytes_sofar + expr_bytes > INT_MAX) >- return false; >- >- /* Unlike for RECORD_TYPE, there is no need to clear the memory >since >- it's completely overwritten by native_encode_expr. */ >- bytes->safe_grow (bytes_sofar + expr_bytes, true); >- unsigned char *pnext = bytes->begin () + bytes_sofar; >- int nbytes = native_encode_expr (expr, pnext, expr_bytes, 0); >- /* NBYTES is zero on failure. Otherwise it should equal EXPR_BYTES. > */ >- return (unsigned HOST_WIDE_INT) nbytes == expr_bytes; >-} >- >/* Return a STRING_CST corresponding to ARG's constant initializer >either > if it's a string constant, or, when VALREP is set, any other constant, > or null otherwise. >@@ -11748,7 +11643,7 @@ static tree >constant_byte_string (tree arg, tree *ptr_offset, tree *mem_size, tree >*decl, > bool valrep = false) > { >- tree dummy = NULL_TREE;; >+ tree dummy = NULL_TREE; > if (!mem_size) > mem_size = &dummy; > >@@ -11903,18 +11798,42 @@ constant_byte_string (tree arg, tree *pt > if (!base_off.is_constant (&cstoff)) > return NULL_TREE; > >+ /* Check that the host and target are sane. */ >+ if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) >+ return NULL_TREE; >+ >+ HOST_WIDE_INT typesz = int_size_in_bytes (TREE_TYPE (init)); >+ if (typesz <= 0 || (int) typesz != typesz) >+ return NULL_TREE; >+ >+ HOST_WIDE_INT size = typesz; >+ if (VAR_P (array) >+ && DECL_SIZE_UNIT (array) >+ && tree_fits_shwi_p (DECL_SIZE_UNIT (array))) >+ { >+ size = tree_to_shwi (DECL_SIZE_UNIT (array)); >+ gcc_checking_assert (size >= typesz); >+ } >+ > /* If value representation was requested convert the initializer > for the whole array or object into a string of bytes forming > its value representation and return it. */ >- auto_vec<unsigned char> bytes; >- if (!convert_to_bytes (TREE_TYPE (init), init, &bytes)) >- return NULL_TREE; >+ unsigned char *bytes = XNEWVEC (unsigned char, size); >+ int r = native_encode_initializer (init, bytes, size); >+ if (r < typesz) >+ { >+ XDELETEVEC (bytes); >+ return NULL_TREE; >+ } >+ >+ if (r < size) >+ memset (bytes + r, '\0', size - r); > >- unsigned n = bytes.length (); >- const char *p = reinterpret_cast<const char *>(bytes.address >()); >- init = build_string_literal (n, p, char_type_node); >+ const char *p = reinterpret_cast<const char *>(bytes); >+ init = build_string_literal (size, p, char_type_node); > init = TREE_OPERAND (init, 0); > init = TREE_OPERAND (init, 0); >+ XDELETE (bytes); > > *mem_size = size_int (TREE_STRING_LENGTH (init)); > *ptr_offset = wide_int_to_tree (ssizetype, base_off); >@@ -11965,6 +11884,10 @@ constant_byte_string (tree arg, tree *pt > && (TREE_CODE (TREE_TYPE (array)) == INTEGER_TYPE > || TYPE_MAIN_VARIANT (inittype) == char_type_node)) > { >+ /* Check that the host and target are sane. */ >+ if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) >+ return NULL_TREE; >+ > /* For a reference to (address of) a single constant character, > store the native representation of the character in CHARBUF. > If the reference is to an element of an array or a member >@@ -12007,6 +11930,9 @@ constant_byte_string (tree arg, tree *pt > initsize = integer_zero_node; > > unsigned HOST_WIDE_INT size = tree_to_uhwi (initsize); >+ if (size > (unsigned HOST_WIDE_INT) INT_MAX) >+ return NULL_TREE; >+ > init = build_string_literal (size, NULL, chartype, size); > init = TREE_OPERAND (init, 0); > init = TREE_OPERAND (init, 0); >--- gcc/testsuite/gcc.c-torture/execute/pr98366.c.jj 2020-12-18 >14:46:43.665716370 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr98366.c 2020-12-18 >14:50:12.732338089 +0100 >@@ -0,0 +1,13 @@ >+/* PR middle-end/98366 */ >+/* { dg-require-effective-target int32 } */ >+ >+typedef struct S { int a, b, c : 7, d : 8, e : 17; } S; >+const S f[] = { {0, 3, 4, 2, 0} }; >+ >+int >+main () >+{ >+ if (__builtin_memcmp (f, (S[]){{.b = 3, .c = 4, .d = 2, .e = 0}}, >sizeof (S))) >+ __builtin_abort (); >+ return 0; >+} > > Jakub