Hi! This fixes two wrong code bugs where string_constant returns over length string constants. Initializers like that are rejected in C++, but valid in C.
I have xfailed strlenopt-49.c, which tests this feature. Personally I don't think that it is worth the effort to optimize something that is per se invalid in C++. The hunk in builtins.c is unrelated, but I would like to use tree_to_shwi here, to be in line with the tree_fits_shwi_p check above, and the rest of that function which uses signed HWI throughout. Bootstrapped and reg-tested on x86_64-pc-linux-gnu. Is it OK for trunk? Thanks Bernd.
gcc: 2018-07-29 Bernd Edlinger <bernd.edlin...@hotmail.de> PR middle-end/86711 PR middle-end/86714 * builtins.c (c_strlen): Use tree_to_shwi because maxelts is signed. * expr.c (string_constant): Don't return truncated string literals. * fold-const.c (c_getstr): Fix function comment. Remove unused third argument. Fix range checks. * fold-const.c (c_getstr): Adjust protoype. testsuite: 2018-07-29 Bernd Edlinger <bernd.edlin...@hotmail.de> PR middle-end/86711 PR middle-end/86714 * gcc.c-torture/execute/pr86711.c: New test. * gcc.c-torture/execute/pr86714.c: New test. * gcc.dg/strlenopt-49.c: Add xfail.
Index: gcc/builtins.c =================================================================== --- gcc/builtins.c (revision 263045) +++ gcc/builtins.c (working copy) @@ -617,7 +617,7 @@ c_strlen (tree src, int only_value) if (tree size = TYPE_SIZE_UNIT (type)) if (tree_fits_shwi_p (size)) { - maxelts = tree_to_uhwi (size); + maxelts = tree_to_shwi (size); maxelts = maxelts / eltsize - 1; } Index: gcc/expr.c =================================================================== --- gcc/expr.c (revision 263045) +++ gcc/expr.c (working copy) @@ -11410,24 +11410,14 @@ string_constant (tree arg, tree *ptr_offset) if (!init || TREE_CODE (init) != STRING_CST) return NULL_TREE; - tree array_size = DECL_SIZE_UNIT (array); - if (!array_size || TREE_CODE (array_size) != INTEGER_CST) - return NULL_TREE; - /* Avoid returning a string that doesn't fit in the array - it is stored in, like - const char a[4] = "abcde"; - but do handle those that fit even if they have excess - initializers, such as in - const char a[4] = "abc\000\000"; - The excess elements contribute to TREE_STRING_LENGTH() - but not to strlen(). */ - unsigned HOST_WIDE_INT charsize - = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (init)))); - unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init); - length = string_length (TREE_STRING_POINTER (init), charsize, - length / charsize); - if (compare_tree_int (array_size, length + 1) < 0) + it is stored in, like: + const char a[4] = "abcd"; + because callers expect to be able to access the string + up to the limit imposed by TREE_STRING_LENGTH which + always includes the terminating NUL char. */ + if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (init)), + TREE_STRING_LENGTH (init)) < 0) return NULL_TREE; *ptr_offset = offset; Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c (revision 263045) +++ gcc/fold-const.c (working copy) @@ -14577,16 +14577,12 @@ fold_build_pointer_plus_hwi_loc (location_t loc, t /* Return a pointer P to a NUL-terminated string representing the sequence of constant characters referred to by SRC (or a subsequence of such characters within it if SRC is a reference to a string plus some - constant offset). If STRLEN is non-null, store stgrlen(P) in *STRLEN. - If STRSIZE is non-null, store in *STRSIZE the size of the array - the string is stored in; in that case, even though P points to a NUL - terminated string, SRC need not refer to one. This can happen when - SRC refers to a constant character array initialized to all non-NUL - values, as in the C declaration: char a[4] = "1234"; */ + constant offset). If STRLEN is non-null, store the number of bytes + in the string constant including the terminating NUL char. *STRLEN is + typically strlen(P) + 1 in the absence of embedded NUL characters. */ const char * -c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */, - unsigned HOST_WIDE_INT *strsize /* = NULL */) +c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */) { tree offset_node; @@ -14613,40 +14609,24 @@ const char * unsigned HOST_WIDE_INT string_size = string_length; tree type = TREE_TYPE (src); if (tree size = TYPE_SIZE_UNIT (type)) - if (tree_fits_shwi_p (size)) + if (tree_fits_uhwi_p (size)) string_size = tree_to_uhwi (size); + if (offset >= string_size) + return NULL; + if (strlen) { /* Compute and store the length of the substring at OFFSET. All offsets past the initial length refer to null strings. */ - if (offset <= string_length) + if (offset < string_length) *strlen = string_length - offset; else - *strlen = 0; + *strlen = 1; } const char *string = TREE_STRING_POINTER (src); - - if (string_length == 0 - || offset >= string_size) - return NULL; - - if (strsize) - { - /* Support even constant character arrays that aren't proper - NUL-terminated strings. */ - *strsize = string_size; - } - else if (string[string_length - 1] != '\0') - { - /* Support only properly NUL-terminated strings but handle - consecutive strings within the same array, such as the six - substrings in "1\0002\0003". */ - return NULL; - } - - return offset <= string_length ? string + offset : ""; + return offset < string_length ? string + offset : ""; } /* Given a tree T, compute which bits in T may be nonzero. */ Index: gcc/fold-const.h =================================================================== --- gcc/fold-const.h (revision 263045) +++ gcc/fold-const.h (working copy) @@ -187,8 +187,7 @@ extern bool expr_not_equal_to (tree t, const wide_ extern tree const_unop (enum tree_code, tree, tree); extern tree const_binop (enum tree_code, tree, tree, tree); extern bool negate_mathfn_p (combined_fn); -extern const char *c_getstr (tree, unsigned HOST_WIDE_INT * = NULL, - unsigned HOST_WIDE_INT * = NULL); +extern const char *c_getstr (tree, unsigned HOST_WIDE_INT * = NULL); extern wide_int tree_nonzero_bits (const_tree); /* Return OFF converted to a pointer offset type suitable as offset for Index: gcc/testsuite/gcc.c-torture/execute/pr86711.c =================================================================== --- gcc/testsuite/gcc.c-torture/execute/pr86711.c (revision 0) +++ gcc/testsuite/gcc.c-torture/execute/pr86711.c (working copy) @@ -0,0 +1,11 @@ +/* PR middle-end/86711 */ + +static const char a[2][4] = { "1234", "5678" }; + +int main () +{ + void *p = __builtin_memchr (a, 0, 5); + + if (p) + __builtin_abort (); +} Index: gcc/testsuite/gcc.c-torture/execute/pr86714.c =================================================================== --- gcc/testsuite/gcc.c-torture/execute/pr86714.c (revision 0) +++ gcc/testsuite/gcc.c-torture/execute/pr86714.c (working copy) @@ -0,0 +1,12 @@ +/* PR middle-end/86714 */ + +const char a[2][3] = { "1234", "xyz" }; +char b[6]; + +int main () +{ + __builtin_memcpy (b, a, 4); + __builtin_memset (b + 4, 'a', 2); + if (__builtin_memcmp (b, "123xaa", 6)) + __builtin_abort (); +} Index: gcc/testsuite/gcc.dg/strlenopt-49.c =================================================================== --- gcc/testsuite/gcc.dg/strlenopt-49.c (revision 263045) +++ gcc/testsuite/gcc.dg/strlenopt-49.c (working copy) @@ -45,9 +45,9 @@ int cmp88 (void) return cmp88; } -/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } } - { dg-final { scan-tree-dump-times "len0 = 0;" 1 "gimple" } } - { dg-final { scan-tree-dump-times "len = 18;" 1 "gimple" } } - { dg-final { scan-tree-dump-times "lenx = 8;" 1 "gimple" } } - { dg-final { scan-tree-dump-times "leny = 0;" 1 "gimple" } } - { dg-final { scan-tree-dump-times "cmp88 = 0;" 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" { xfail *-*-* } } } + { dg-final { scan-tree-dump-times "len0 = 0;" 1 "gimple" { xfail *-*-* } } } + { dg-final { scan-tree-dump-times "len = 18;" 1 "gimple" { xfail *-*-* } } } + { dg-final { scan-tree-dump-times "lenx = 8;" 1 "gimple" { xfail *-*-* } } } + { dg-final { scan-tree-dump-times "leny = 0;" 1 "gimple" { xfail *-*-* } } } + { dg-final { scan-tree-dump-times "cmp88 = 0;" 1 "gimple" { xfail *-*-* } } } */