Hi!

After the other patch has been applied, I re-based this patch accordingly.

Except the mechanical changes, there are a few notable differences to the
previous version:

In string_constant, I added a similar check for the STRING_CSTs
because when callers don't use mem_size, they assume to be
able to read "TREE_STRING_LENGTH (array)" bytes, but that is
not always the case, for languages that don't always use
zero-terminated strings, for instance hollerith strings in fortran.

--- gcc/expr.c  2018-08-17 05:32:57.332211963 +0200
+++ gcc/expr.c  2018-08-16 23:08:23.544940795 +0200
@@ -11372,6 +11372,9 @@ string_constant (tree arg, tree *ptr_off
       *ptr_offset = fold_convert (sizetype, offset);
       if (mem_size)
        *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (array));
+      else if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (array)),
+                                TREE_STRING_LENGTH (array)) < 0)
+       return NULL_TREE;
       return array;
     }


The range check in c_getstr was refined as well:

This I added, because vla arrays can be initialized with string constants,
especially since the 71625 patch was installed:
In this case we end up with mem_size that fails to be constant.


@@ -14606,25 +14603,17 @@ c_getstr (tree src, unsigned HOST_WIDE_I
        offset = tree_to_uhwi (offset_node);
     }

+  if (!tree_fits_uhwi_p (mem_size))
+    return NULL;
+
   /* STRING_LENGTH is the size of the string literal, including any
      embedded NULs.  STRING_SIZE is the size of the array the string
      literal is stored in.  */

Also the rest of the string length checks are refined, to return
actually zero-terminated single byte strings when strlen is not given,
and return something not necessarily zero-terminated which is
suitable for memxxx-functions otherwise.


Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk?


Thanks
Bernd.
gcc:
2018-08-17  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	PR middle-end/86711
	PR middle-end/86714
	* expr.c (string_constant): Don't return truncated string literals.
	* fold-const.c (c_getstr): Fix function comment.  Remove unused third
	argument.  Fix range checks.
	* fold-const.c (c_getstr): Adjust protoype.

testsuite:
2018-08-17  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	PR middle-end/86711
	PR middle-end/86714
	* gcc.c-torture/execute/pr86711.c: New test.
	* gcc.c-torture/execute/pr86714.c: New test.

diff -Npur gcc/expr.c gcc/expr.c
--- gcc/expr.c	2018-08-17 05:32:57.332211963 +0200
+++ gcc/expr.c	2018-08-16 23:08:23.544940795 +0200
@@ -11372,6 +11372,9 @@ string_constant (tree arg, tree *ptr_off
       *ptr_offset = fold_convert (sizetype, offset);
       if (mem_size)
 	*mem_size = TYPE_SIZE_UNIT (TREE_TYPE (array));
+      else if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (array)),
+				 TREE_STRING_LENGTH (array)) < 0)
+	return NULL_TREE;
       return array;
     }
 
@@ -11414,26 +11417,10 @@ string_constant (tree arg, tree *ptr_off
   if (!init || TREE_CODE (init) != STRING_CST)
     return NULL_TREE;
 
-  tree array_size = DECL_SIZE_UNIT (array);
-  if (!array_size || TREE_CODE (array_size) != INTEGER_CST)
-    return NULL_TREE;
-
-  /* Avoid returning a string that doesn't fit in the array
-     it is stored in, like
-     const char a[4] = "abcde";
-     but do handle those that fit even if they have excess
-     initializers, such as in
-     const char a[4] = "abc\000\000";
-     The excess elements contribute to TREE_STRING_LENGTH()
-     but not to strlen().  */
-  unsigned HOST_WIDE_INT charsize
-    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (init))));
-  unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init);
-  length = string_length (TREE_STRING_POINTER (init), charsize,
-			  length / charsize);
   if (mem_size)
     *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (init));
-  else if (compare_tree_int (array_size, length + 1) < 0)
+  else if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (init)),
+			     TREE_STRING_LENGTH (init)) < 0)
     return NULL_TREE;
 
   *ptr_offset = offset;
diff -Npur gcc/fold-const.c gcc/fold-const.c
--- gcc/fold-const.c	2018-07-16 08:49:39.000000000 +0200
+++ gcc/fold-const.c	2018-08-16 23:31:11.490869136 +0200
@@ -14577,23 +14577,20 @@ fold_build_pointer_plus_hwi_loc (locatio
 /* Return a pointer P to a NUL-terminated string representing the sequence
    of constant characters referred to by SRC (or a subsequence of such
    characters within it if SRC is a reference to a string plus some
-   constant offset).  If STRLEN is non-null, store stgrlen(P) in *STRLEN.
-   If STRSIZE is non-null, store in *STRSIZE the size of the array
-   the string is stored in; in that case, even though P points to a NUL
-   terminated string, SRC need not refer to one.  This can happen when
-   SRC refers to a constant character array initialized to all non-NUL
-   values, as in the C declaration: char a[4] = "1234";  */
+   constant offset).  If STRLEN is non-null, store the number of bytes
+   in the string constant including the terminating NUL char.  *STRLEN is
+   typically strlen(P) + 1 in the absence of embedded NUL characters.  */
 
 const char *
-c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */,
-	  unsigned HOST_WIDE_INT *strsize /* = NULL */)
+c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */)
 {
   tree offset_node;
+  tree mem_size;
 
   if (strlen)
     *strlen = 0;
 
-  src = string_constant (src, &offset_node);
+  src = string_constant (src, &offset_node, &mem_size);
   if (src == 0)
     return NULL;
 
@@ -14606,25 +14603,17 @@ c_getstr (tree src, unsigned HOST_WIDE_I
 	offset = tree_to_uhwi (offset_node);
     }
 
+  if (!tree_fits_uhwi_p (mem_size))
+    return NULL;
+
   /* STRING_LENGTH is the size of the string literal, including any
      embedded NULs.  STRING_SIZE is the size of the array the string
      literal is stored in.  */
   unsigned HOST_WIDE_INT string_length = TREE_STRING_LENGTH (src);
-  unsigned HOST_WIDE_INT string_size = string_length;
-  tree type = TREE_TYPE (src);
-  if (tree size = TYPE_SIZE_UNIT (type))
-    if (tree_fits_shwi_p (size))
-      string_size = tree_to_uhwi (size);
+  unsigned HOST_WIDE_INT string_size = tree_to_uhwi (mem_size);
 
-  if (strlen)
-    {
-      /* Compute and store the length of the substring at OFFSET.
-	 All offsets past the initial length refer to null strings.  */
-      if (offset <= string_length)
-	*strlen = string_length - offset;
-      else
-	*strlen = 0;
-    }
+  if (string_length > string_size)
+    string_length = string_size;
 
   const char *string = TREE_STRING_POINTER (src);
 
@@ -14632,21 +14621,26 @@ c_getstr (tree src, unsigned HOST_WIDE_I
       || offset >= string_size)
     return NULL;
 
-  if (strsize)
+  if (strlen)
     {
-      /* Support even constant character arrays that aren't proper
-	 NUL-terminated strings.  */
-      *strsize = string_size;
+      /* Compute and store the length of the substring at OFFSET.
+	 All offsets past the initial length refer to null strings.  */
+      if (offset < string_length)
+	*strlen = string_length - offset;
+      else
+	*strlen = 1;
     }
-  else if (string[string_length - 1] != '\0')
+  else
     {
-      /* Support only properly NUL-terminated strings but handle
-	 consecutive strings within the same array, such as the six
-	 substrings in "1\0002\0003".  */
-      return NULL;
+      tree eltype = TREE_TYPE (TREE_TYPE (src));
+      /* Support only properly NUL-terminated single byte strings.  */
+      if (tree_to_uhwi (TYPE_SIZE_UNIT (eltype)) != 1)
+	return NULL;
+      if (string[string_length - 1] != '\0')
+	return NULL;
     }
 
-  return offset <= string_length ? string + offset : "";
+  return offset < string_length ? string + offset : "";
 }
 
 /* Given a tree T, compute which bits in T may be nonzero.  */
diff -Npur gcc/fold-const.h gcc/fold-const.h
--- gcc/fold-const.h	2018-07-16 08:49:39.000000000 +0200
+++ gcc/fold-const.h	2018-08-16 22:38:49.962205027 +0200
@@ -187,8 +187,7 @@ extern bool expr_not_equal_to (tree t, c
 extern tree const_unop (enum tree_code, tree, tree);
 extern tree const_binop (enum tree_code, tree, tree, tree);
 extern bool negate_mathfn_p (combined_fn);
-extern const char *c_getstr (tree, unsigned HOST_WIDE_INT * = NULL,
-			     unsigned HOST_WIDE_INT * = NULL);
+extern const char *c_getstr (tree, unsigned HOST_WIDE_INT * = NULL);
 extern wide_int tree_nonzero_bits (const_tree);
 
 /* Return OFF converted to a pointer offset type suitable as offset for
diff -Npur gcc/testsuite/gcc.c-torture/execute/pr86711.c gcc/testsuite/gcc.c-torture/execute/pr86711.c
--- gcc/testsuite/gcc.c-torture/execute/pr86711.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr86711.c	2018-08-16 22:38:49.963205014 +0200
@@ -0,0 +1,11 @@
+/* PR middle-end/86711 */
+
+static const char a[2][4] = { "1234", "5678" };
+
+int main ()
+{
+  void *p = __builtin_memchr (a, 0, 5);
+
+  if (p)
+    __builtin_abort ();
+}
diff -Npur gcc/testsuite/gcc.c-torture/execute/pr86714.c gcc/testsuite/gcc.c-torture/execute/pr86714.c
--- gcc/testsuite/gcc.c-torture/execute/pr86714.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr86714.c	2018-08-16 22:38:49.963205014 +0200
@@ -0,0 +1,12 @@
+/* PR middle-end/86714 */
+
+const char a[2][3] = { "1234", "xyz" };
+char b[6];
+
+int main ()
+{
+  __builtin_memcpy (b, a, 4);
+  __builtin_memset (b + 4, 'a', 2);
+  if (__builtin_memcmp (b, "123xaa", 6))
+    __builtin_abort ();
+}

Reply via email to