Hi!
After the other patch has been applied, I re-based this patch accordingly.
Except the mechanical changes, there are a few notable differences to the
previous version:
In string_constant, I added a similar check for the STRING_CSTs
because when callers don't use mem_size, they assume to be
able to read "TREE_STRING_LENGTH (array)" bytes, but that is
not always the case, for languages that don't always use
zero-terminated strings, for instance hollerith strings in fortran.
--- gcc/expr.c 2018-08-17 05:32:57.332211963 +0200
+++ gcc/expr.c 2018-08-16 23:08:23.544940795 +0200
@@ -11372,6 +11372,9 @@ string_constant (tree arg, tree *ptr_off
*ptr_offset = fold_convert (sizetype, offset);
if (mem_size)
*mem_size = TYPE_SIZE_UNIT (TREE_TYPE (array));
+ else if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (array)),
+ TREE_STRING_LENGTH (array)) < 0)
+ return NULL_TREE;
return array;
}
The range check in c_getstr was refined as well:
This I added, because vla arrays can be initialized with string constants,
especially since the 71625 patch was installed:
In this case we end up with mem_size that fails to be constant.
@@ -14606,25 +14603,17 @@ c_getstr (tree src, unsigned HOST_WIDE_I
offset = tree_to_uhwi (offset_node);
}
+ if (!tree_fits_uhwi_p (mem_size))
+ return NULL;
+
/* STRING_LENGTH is the size of the string literal, including any
embedded NULs. STRING_SIZE is the size of the array the string
literal is stored in. */
Also the rest of the string length checks are refined, to return
actually zero-terminated single byte strings when strlen is not given,
and return something not necessarily zero-terminated which is
suitable for memxxx-functions otherwise.
Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk?
Thanks
Bernd.
gcc:
2018-08-17 Bernd Edlinger <bernd.edlin...@hotmail.de>
PR middle-end/86711
PR middle-end/86714
* expr.c (string_constant): Don't return truncated string literals.
* fold-const.c (c_getstr): Fix function comment. Remove unused third
argument. Fix range checks.
* fold-const.c (c_getstr): Adjust protoype.
testsuite:
2018-08-17 Bernd Edlinger <bernd.edlin...@hotmail.de>
PR middle-end/86711
PR middle-end/86714
* gcc.c-torture/execute/pr86711.c: New test.
* gcc.c-torture/execute/pr86714.c: New test.
diff -Npur gcc/expr.c gcc/expr.c
--- gcc/expr.c 2018-08-17 05:32:57.332211963 +0200
+++ gcc/expr.c 2018-08-16 23:08:23.544940795 +0200
@@ -11372,6 +11372,9 @@ string_constant (tree arg, tree *ptr_off
*ptr_offset = fold_convert (sizetype, offset);
if (mem_size)
*mem_size = TYPE_SIZE_UNIT (TREE_TYPE (array));
+ else if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (array)),
+ TREE_STRING_LENGTH (array)) < 0)
+ return NULL_TREE;
return array;
}
@@ -11414,26 +11417,10 @@ string_constant (tree arg, tree *ptr_off
if (!init || TREE_CODE (init) != STRING_CST)
return NULL_TREE;
- tree array_size = DECL_SIZE_UNIT (array);
- if (!array_size || TREE_CODE (array_size) != INTEGER_CST)
- return NULL_TREE;
-
- /* Avoid returning a string that doesn't fit in the array
- it is stored in, like
- const char a[4] = "abcde";
- but do handle those that fit even if they have excess
- initializers, such as in
- const char a[4] = "abc\000\000";
- The excess elements contribute to TREE_STRING_LENGTH()
- but not to strlen(). */
- unsigned HOST_WIDE_INT charsize
- = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (init))));
- unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init);
- length = string_length (TREE_STRING_POINTER (init), charsize,
- length / charsize);
if (mem_size)
*mem_size = TYPE_SIZE_UNIT (TREE_TYPE (init));
- else if (compare_tree_int (array_size, length + 1) < 0)
+ else if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (init)),
+ TREE_STRING_LENGTH (init)) < 0)
return NULL_TREE;
*ptr_offset = offset;
diff -Npur gcc/fold-const.c gcc/fold-const.c
--- gcc/fold-const.c 2018-07-16 08:49:39.000000000 +0200
+++ gcc/fold-const.c 2018-08-16 23:31:11.490869136 +0200
@@ -14577,23 +14577,20 @@ fold_build_pointer_plus_hwi_loc (locatio
/* Return a pointer P to a NUL-terminated string representing the sequence
of constant characters referred to by SRC (or a subsequence of such
characters within it if SRC is a reference to a string plus some
- constant offset). If STRLEN is non-null, store stgrlen(P) in *STRLEN.
- If STRSIZE is non-null, store in *STRSIZE the size of the array
- the string is stored in; in that case, even though P points to a NUL
- terminated string, SRC need not refer to one. This can happen when
- SRC refers to a constant character array initialized to all non-NUL
- values, as in the C declaration: char a[4] = "1234"; */
+ constant offset). If STRLEN is non-null, store the number of bytes
+ in the string constant including the terminating NUL char. *STRLEN is
+ typically strlen(P) + 1 in the absence of embedded NUL characters. */
const char *
-c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */,
- unsigned HOST_WIDE_INT *strsize /* = NULL */)
+c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */)
{
tree offset_node;
+ tree mem_size;
if (strlen)
*strlen = 0;
- src = string_constant (src, &offset_node);
+ src = string_constant (src, &offset_node, &mem_size);
if (src == 0)
return NULL;
@@ -14606,25 +14603,17 @@ c_getstr (tree src, unsigned HOST_WIDE_I
offset = tree_to_uhwi (offset_node);
}
+ if (!tree_fits_uhwi_p (mem_size))
+ return NULL;
+
/* STRING_LENGTH is the size of the string literal, including any
embedded NULs. STRING_SIZE is the size of the array the string
literal is stored in. */
unsigned HOST_WIDE_INT string_length = TREE_STRING_LENGTH (src);
- unsigned HOST_WIDE_INT string_size = string_length;
- tree type = TREE_TYPE (src);
- if (tree size = TYPE_SIZE_UNIT (type))
- if (tree_fits_shwi_p (size))
- string_size = tree_to_uhwi (size);
+ unsigned HOST_WIDE_INT string_size = tree_to_uhwi (mem_size);
- if (strlen)
- {
- /* Compute and store the length of the substring at OFFSET.
- All offsets past the initial length refer to null strings. */
- if (offset <= string_length)
- *strlen = string_length - offset;
- else
- *strlen = 0;
- }
+ if (string_length > string_size)
+ string_length = string_size;
const char *string = TREE_STRING_POINTER (src);
@@ -14632,21 +14621,26 @@ c_getstr (tree src, unsigned HOST_WIDE_I
|| offset >= string_size)
return NULL;
- if (strsize)
+ if (strlen)
{
- /* Support even constant character arrays that aren't proper
- NUL-terminated strings. */
- *strsize = string_size;
+ /* Compute and store the length of the substring at OFFSET.
+ All offsets past the initial length refer to null strings. */
+ if (offset < string_length)
+ *strlen = string_length - offset;
+ else
+ *strlen = 1;
}
- else if (string[string_length - 1] != '\0')
+ else
{
- /* Support only properly NUL-terminated strings but handle
- consecutive strings within the same array, such as the six
- substrings in "1\0002\0003". */
- return NULL;
+ tree eltype = TREE_TYPE (TREE_TYPE (src));
+ /* Support only properly NUL-terminated single byte strings. */
+ if (tree_to_uhwi (TYPE_SIZE_UNIT (eltype)) != 1)
+ return NULL;
+ if (string[string_length - 1] != '\0')
+ return NULL;
}
- return offset <= string_length ? string + offset : "";
+ return offset < string_length ? string + offset : "";
}
/* Given a tree T, compute which bits in T may be nonzero. */
diff -Npur gcc/fold-const.h gcc/fold-const.h
--- gcc/fold-const.h 2018-07-16 08:49:39.000000000 +0200
+++ gcc/fold-const.h 2018-08-16 22:38:49.962205027 +0200
@@ -187,8 +187,7 @@ extern bool expr_not_equal_to (tree t, c
extern tree const_unop (enum tree_code, tree, tree);
extern tree const_binop (enum tree_code, tree, tree, tree);
extern bool negate_mathfn_p (combined_fn);
-extern const char *c_getstr (tree, unsigned HOST_WIDE_INT * = NULL,
- unsigned HOST_WIDE_INT * = NULL);
+extern const char *c_getstr (tree, unsigned HOST_WIDE_INT * = NULL);
extern wide_int tree_nonzero_bits (const_tree);
/* Return OFF converted to a pointer offset type suitable as offset for
diff -Npur gcc/testsuite/gcc.c-torture/execute/pr86711.c gcc/testsuite/gcc.c-torture/execute/pr86711.c
--- gcc/testsuite/gcc.c-torture/execute/pr86711.c 1970-01-01 01:00:00.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr86711.c 2018-08-16 22:38:49.963205014 +0200
@@ -0,0 +1,11 @@
+/* PR middle-end/86711 */
+
+static const char a[2][4] = { "1234", "5678" };
+
+int main ()
+{
+ void *p = __builtin_memchr (a, 0, 5);
+
+ if (p)
+ __builtin_abort ();
+}
diff -Npur gcc/testsuite/gcc.c-torture/execute/pr86714.c gcc/testsuite/gcc.c-torture/execute/pr86714.c
--- gcc/testsuite/gcc.c-torture/execute/pr86714.c 1970-01-01 01:00:00.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr86714.c 2018-08-16 22:38:49.963205014 +0200
@@ -0,0 +1,12 @@
+/* PR middle-end/86714 */
+
+const char a[2][3] = { "1234", "xyz" };
+char b[6];
+
+int main ()
+{
+ __builtin_memcpy (b, a, 4);
+ __builtin_memset (b + 4, 'a', 2);
+ if (__builtin_memcmp (b, "123xaa", 6))
+ __builtin_abort ();
+}