tree-ssa-strlen.c looks for cases in which a string is built up using
operations like:

    memcpy (a, "foo", 4);
    memcpy (a + 3, "bar", 4);
    int x = strlen (a);

As a side-effect, it optimises the non-final memcpys so that they don't
include the nul terminator.

However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE pass
does this optimisation itself (because it can tell that later memcpys
overwrite the terminators).  The strlen pass wasn't able to handle these
pre-optimised calls in the same way as the unoptimised ones.

This patch adds support for tracking unterminated strings.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


[Based on commit branches/ARM/sve-branch@246236]

2017-05-05  Richard Sandiford  <richard.sandif...@linaro.org>

gcc/
        * tree-ssa-strlen.c (strinfo): Add a terminated field.
        (new_strinfo): Add a corresponding parameter and initialize the field.
        (get_string_length): Return null for unterminated strings.
        (unshare_strinfo): Update call to new_strinfo.
        (get_stridx_plus_constant): Likewise.
        (zero_length_string): Likewise.
        (handle_builtin_strchr): Likewise.
        (handle_builtin_strcat): Likewise.
        (handle_builtin_malloc): Likewise.
        (adjust_related_strinfos): Add a terminated parameter.
        (adjust_last_stmt): Update test for a zero-length terminated string.
        (handle_builtin_strlen): Assert that we can only know the length
        of terminated strings.  Update calls to new_strinfo.
        (handle_builtin_strcpy): Update calls to new_strinfo and set the
        terminated field when adjusting strinfos manually.
        (handle_builtin_memcpy): Handle unterminated strings.  Update calls
        to new_strinfo.
        (handle_builtin_memset): Initialize the terminated field.
        (handle_pointer_plus): Check for terminated strings.
        (handle_char_store): Handle unterminated strings.

gcc/testsuite/
        * gcc.dg/strlenopt-31.c: New testcase.

Index: gcc/tree-ssa-strlen.c
===================================================================
--- gcc/tree-ssa-strlen.c       2017-02-23 19:54:03.000000000 +0000
+++ gcc/tree-ssa-strlen.c       2017-05-05 12:53:08.764475923 +0100
@@ -99,6 +99,9 @@ struct strinfo
   /* A flag for the next maybe_invalidate that this strinfo shouldn't
      be invalidated.  Always cleared by maybe_invalidate.  */
   bool dont_invalidate;
+  /* True if the string is nul-terminated.  False is useful when
+     detecting strings that are built up via successive memcpys.  */
+  bool terminated;
 };
 
 /* Pool for allocating strinfo_struct entries.  */
@@ -400,7 +403,7 @@ new_addr_stridx (tree exp)
 /* Create a new strinfo.  */
 
 static strinfo *
-new_strinfo (tree ptr, int idx, tree length)
+new_strinfo (tree ptr, int idx, tree length, bool terminated)
 {
   strinfo *si = strinfo_pool.allocate ();
   si->length = length;
@@ -414,6 +417,7 @@ new_strinfo (tree ptr, int idx, tree len
   si->next = 0;
   si->writable = false;
   si->dont_invalidate = false;
+  si->terminated = terminated;
   return si;
 }
 
@@ -443,6 +447,9 @@ set_strinfo (int idx, strinfo *si)
 static tree
 get_string_length (strinfo *si)
 {
+  if (!si->terminated)
+    return NULL;
+
   if (si->length)
     return si->length;
 
@@ -595,7 +602,7 @@ unshare_strinfo (strinfo *si)
   if (si->refcount == 1 && !strinfo_shared ())
     return si;
 
-  nsi = new_strinfo (si->ptr, si->idx, si->length);
+  nsi = new_strinfo (si->ptr, si->idx, si->length, si->terminated);
   nsi->stmt = si->stmt;
   nsi->endptr = si->endptr;
   nsi->first = si->first;
@@ -694,7 +701,8 @@ get_stridx_plus_constant (strinfo *bases
   int idx = new_stridx (ptr);
   if (idx == 0)
     return 0;
-  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, len));
+  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, len),
+                   basesi->terminated);
   set_strinfo (idx, si);
   if (chainsi->next)
     {
@@ -778,7 +786,7 @@ zero_length_string (tree ptr, strinfo *c
   idx = new_stridx (ptr);
   if (idx == 0)
     return NULL;
-  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0));
+  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
   set_strinfo (idx, si);
   si->endptr = ptr;
   if (chainsi != NULL)
@@ -797,11 +805,12 @@ zero_length_string (tree ptr, strinfo *c
 }
 
 /* For strinfo ORIGSI whose length has been just updated
-   update also related strinfo lengths (add ADJ to each,
-   but don't adjust ORIGSI).  */
+   update also related strinfo lengths (add ADJ to each, and change
+   the terminated flag to TERMINATED, but don't adjust ORIGSI).  */
 
 static void
-adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
+adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj,
+                        bool terminated)
 {
   strinfo *si = verify_related_strinfos (origsi);
 
@@ -823,10 +832,11 @@ adjust_related_strinfos (location_t loc,
              si->length = fold_build2_loc (loc, PLUS_EXPR,
                                            TREE_TYPE (si->length), si->length,
                                            tem);
+             si->terminated = terminated;
            }
          else if (si->stmt != NULL)
            /* Delayed length computation is unaffected.  */
-           ;
+           si->terminated = terminated;
          else
            gcc_unreachable ();
 
@@ -1009,7 +1019,9 @@ adjust_last_stmt (strinfo *si, gimple *s
 
   if (!is_strcat)
     {
-      if (si->length == NULL_TREE || !integer_zerop (si->length))
+      if (si->length == NULL_TREE
+         || !integer_zerop (si->length)
+         || !si->terminated)
        return;
     }
 
@@ -1131,6 +1143,7 @@ handle_builtin_strlen (gimple_stmt_itera
            {
              si = unshare_strinfo (si);
              si->length = lhs;
+             gcc_assert (si->terminated);
            }
          return;
        }
@@ -1143,7 +1156,7 @@ handle_builtin_strlen (gimple_stmt_itera
     return;
   if (idx)
     {
-      strinfo *si = new_strinfo (src, idx, lhs);
+      strinfo *si = new_strinfo (src, idx, lhs, true);
       set_strinfo (idx, si);
       find_equal_ptrs (src, idx);
     }
@@ -1247,7 +1260,7 @@ handle_builtin_strchr (gimple_stmt_itera
          tree srcu = fold_convert_loc (loc, size_type_node, src);
          tree length = fold_build2_loc (loc, MINUS_EXPR,
                                         size_type_node, lhsu, srcu);
-         strinfo *si = new_strinfo (src, idx, length);
+         strinfo *si = new_strinfo (src, idx, length, true);
          si->endptr = lhs;
          set_strinfo (idx, si);
          find_equal_ptrs (src, idx);
@@ -1339,6 +1352,7 @@ handle_builtin_strcpy (enum built_in_fun
       oldlen = olddsi->length;
       dsi = unshare_strinfo (olddsi);
       dsi->length = srclen;
+      dsi->terminated = true;
       /* Break the chain, so adjust_related_strinfo on later pointers in
         the chain won't adjust this one anymore.  */
       dsi->next = 0;
@@ -1347,7 +1361,7 @@ handle_builtin_strcpy (enum built_in_fun
     }
   else
     {
-      dsi = new_strinfo (dst, didx, srclen);
+      dsi = new_strinfo (dst, didx, srclen, true);
       set_strinfo (didx, dsi);
       find_equal_ptrs (dst, didx);
     }
@@ -1376,6 +1390,7 @@ handle_builtin_strcpy (enum built_in_fun
              chainsi = unshare_strinfo (chainsi);
              chainsi->stmt = stmt;
              chainsi->length = NULL_TREE;
+             chainsi->terminated = true;
              chainsi->endptr = NULL_TREE;
              chainsi->dont_invalidate = true;
            }
@@ -1398,7 +1413,7 @@ handle_builtin_strcpy (enum built_in_fun
                               fold_convert_loc (loc, TREE_TYPE (srclen),
                                                 oldlen));
       if (adj != NULL_TREE)
-       adjust_related_strinfos (loc, dsi, adj);
+       adjust_related_strinfos (loc, dsi, adj, true);
       else
        dsi->prev = 0;
     }
@@ -1543,31 +1558,45 @@ handle_builtin_memcpy (enum built_in_fun
       && !integer_zerop (len))
     adjust_last_stmt (olddsi, stmt, false);
 
+  bool terminated;
   if (idx > 0)
     {
       gimple *def_stmt;
 
-      /* Handle memcpy (x, y, l) where l is strlen (y) + 1.  */
+      /* Handle memcpy (x, y, l) where l is strlen (y){, + 1}.  */
       si = get_strinfo (idx);
       if (si == NULL || si->length == NULL_TREE)
        return;
-      if (TREE_CODE (len) != SSA_NAME)
-       return;
-      def_stmt = SSA_NAME_DEF_STMT (len);
-      if (!is_gimple_assign (def_stmt)
-         || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
-         || gimple_assign_rhs1 (def_stmt) != si->length
-         || !integer_onep (gimple_assign_rhs2 (def_stmt)))
-       return;
+      if (len == si->length)
+       terminated = false;
+      else
+       {
+         if (!si->terminated)
+           return;
+         if (TREE_CODE (len) != SSA_NAME)
+           return;
+         def_stmt = SSA_NAME_DEF_STMT (len);
+         if (!is_gimple_assign (def_stmt)
+             || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
+             || gimple_assign_rhs1 (def_stmt) != si->length
+             || !integer_onep (gimple_assign_rhs2 (def_stmt)))
+           return;
+         terminated = true;
+       }
     }
   else
     {
       si = NULL;
       /* Handle memcpy (x, "abcd", 5) or
         memcpy (x, "abc\0uvw", 7).  */
-      if (!tree_fits_uhwi_p (len)
-         || tree_to_uhwi (len) <= (unsigned HOST_WIDE_INT) ~idx)
+      if (!tree_fits_uhwi_p (len))
+       return;
+
+      unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
+      if (clen < (unsigned HOST_WIDE_INT) ~idx)
        return;
+
+      terminated = (clen > (unsigned HOST_WIDE_INT) ~idx);
     }
 
   if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
@@ -1589,6 +1618,7 @@ handle_builtin_memcpy (enum built_in_fun
       dsi = unshare_strinfo (olddsi);
       oldlen = olddsi->length;
       dsi->length = newlen;
+      dsi->terminated = terminated;
       /* Break the chain, so adjust_related_strinfo on later pointers in
         the chain won't adjust this one anymore.  */
       dsi->next = 0;
@@ -1597,7 +1627,7 @@ handle_builtin_memcpy (enum built_in_fun
     }
   else
     {
-      dsi = new_strinfo (dst, didx, newlen);
+      dsi = new_strinfo (dst, didx, newlen, terminated);
       set_strinfo (didx, dsi);
       find_equal_ptrs (dst, didx);
     }
@@ -1618,7 +1648,7 @@ handle_builtin_memcpy (enum built_in_fun
                               fold_convert_loc (loc, TREE_TYPE (dsi->length),
                                                 oldlen));
       if (adj != NULL_TREE)
-       adjust_related_strinfos (loc, dsi, adj);
+       adjust_related_strinfos (loc, dsi, adj, terminated);
       else
        dsi->prev = 0;
     }
@@ -1627,27 +1657,30 @@ handle_builtin_memcpy (enum built_in_fun
   if (si != NULL)
     si->dont_invalidate = true;
 
-  lhs = gimple_call_lhs (stmt);
-  switch (bcode)
+  if (terminated)
     {
-    case BUILT_IN_MEMCPY:
-    case BUILT_IN_MEMCPY_CHK:
-    case BUILT_IN_MEMCPY_CHKP:
-    case BUILT_IN_MEMCPY_CHK_CHKP:
-      /* Allow adjust_last_stmt to decrease this memcpy's size.  */
-      laststmt.stmt = stmt;
-      laststmt.len = dsi->length;
-      laststmt.stridx = dsi->idx;
-      if (lhs)
-       ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
-      break;
-    case BUILT_IN_MEMPCPY:
-    case BUILT_IN_MEMPCPY_CHK:
-    case BUILT_IN_MEMPCPY_CHKP:
-    case BUILT_IN_MEMPCPY_CHK_CHKP:
-      break;
-    default:
-      gcc_unreachable ();
+      lhs = gimple_call_lhs (stmt);
+      switch (bcode)
+       {
+       case BUILT_IN_MEMCPY:
+       case BUILT_IN_MEMCPY_CHK:
+       case BUILT_IN_MEMCPY_CHKP:
+       case BUILT_IN_MEMCPY_CHK_CHKP:
+         /* Allow adjust_last_stmt to decrease this memcpy's size.  */
+         laststmt.stmt = stmt;
+         laststmt.len = dsi->length;
+         laststmt.stridx = dsi->idx;
+         if (lhs)
+           ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
+         break;
+       case BUILT_IN_MEMPCPY:
+       case BUILT_IN_MEMPCPY_CHK:
+       case BUILT_IN_MEMPCPY_CHKP:
+       case BUILT_IN_MEMPCPY_CHK_CHKP:
+         break;
+       default:
+         gcc_unreachable ();
+       }
     }
 }
 
@@ -1696,7 +1729,7 @@ handle_builtin_strcat (enum built_in_fun
            }
          if (dsi == NULL)
            {
-             dsi = new_strinfo (dst, didx, NULL_TREE);
+             dsi = new_strinfo (dst, didx, NULL_TREE, true);
              set_strinfo (didx, dsi);
              find_equal_ptrs (dst, didx);
            }
@@ -1704,6 +1737,7 @@ handle_builtin_strcat (enum built_in_fun
            {
              dsi = unshare_strinfo (dsi);
              dsi->length = NULL_TREE;
+             dsi->terminated = true;
              dsi->next = 0;
              dsi->endptr = NULL_TREE;
            }
@@ -1739,7 +1773,7 @@ handle_builtin_strcat (enum built_in_fun
     {
       dsi->length = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (dsi->length),
                                     dsi->length, srclen);
-      adjust_related_strinfos (loc, dsi, srclen);
+      adjust_related_strinfos (loc, dsi, srclen, true);
       dsi->dont_invalidate = true;
     }
   else
@@ -1877,7 +1911,7 @@ handle_builtin_malloc (enum built_in_fun
   tree length = NULL_TREE;
   if (bcode == BUILT_IN_CALLOC)
     length = build_int_cst (size_type_node, 0);
-  strinfo *si = new_strinfo (lhs, idx, length);
+  strinfo *si = new_strinfo (lhs, idx, length, bcode == BUILT_IN_CALLOC);
   if (bcode == BUILT_IN_CALLOC)
     si->endptr = lhs;
   set_strinfo (idx, si);
@@ -1920,6 +1954,7 @@ handle_builtin_memset (gimple_stmt_itera
       update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
                          size, build_one_cst (size_type_node));
       si1->length = build_int_cst (size_type_node, 0);
+      si1->terminated = true;
       si1->stmt = gsi_stmt (gsi1);
     }
   else
@@ -2056,12 +2091,13 @@ handle_pointer_plus (gimple_stmt_iterato
 
   off = gimple_assign_rhs2 (stmt);
   zsi = NULL;
-  if (operand_equal_p (si->length, off, 0))
+  if (si->terminated && operand_equal_p (si->length, off, 0))
     zsi = zero_length_string (lhs, si);
   else if (TREE_CODE (off) == SSA_NAME)
     {
       gimple *def_stmt = SSA_NAME_DEF_STMT (off);
       if (gimple_assign_single_p (def_stmt)
+         && si->terminated
          && operand_equal_p (si->length, gimple_assign_rhs1 (def_stmt), 0))
        zsi = zero_length_string (lhs, si);
     }
@@ -2088,6 +2124,8 @@ handle_char_store (gimple_stmt_iterator
   strinfo *si = NULL;
   gimple *stmt = gsi_stmt (*gsi);
   tree ssaname = NULL_TREE, lhs = gimple_assign_lhs (stmt);
+  tree rhs = gimple_assign_rhs1 (stmt);
+  int terminated = -1;
 
   if (TREE_CODE (lhs) == MEM_REF
       && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
@@ -2101,12 +2139,18 @@ handle_char_store (gimple_stmt_iterator
   else
     idx = get_addr_stridx (lhs, NULL_TREE);
 
+  if (initializer_zerop (rhs))
+    terminated = 1;
+  else if (TREE_CODE (rhs) == INTEGER_CST && integer_nonzerop (rhs))
+    terminated = 0;
+
   if (idx > 0)
     {
       si = get_strinfo (idx);
       if (si != NULL && si->length != NULL_TREE && integer_zerop (si->length))
        {
-         if (initializer_zerop (gimple_assign_rhs1 (stmt)))
+         /* We're writing to the end of a previous string.  */
+         if (si->terminated && terminated == 1)
            {
              /* When storing '\0', the store can be removed
                 if we know it has been stored in the current function.  */
@@ -2125,15 +2169,20 @@ handle_char_store (gimple_stmt_iterator
                }
            }
          else
-           /* Otherwise this statement overwrites the '\0' with
-              something, if the previous stmt was a memcpy,
-              its length may be decreased.  */
-           adjust_last_stmt (si, stmt, false);
+           {
+             /* Otherwise leave the entry alone and allow it to be
+                invalidated.  */
+             if (si->terminated)
+               /* This statement overwrites the '\0' with something, if the
+                  previous stmt was a memcpy, its length may be decreased.  */
+               adjust_last_stmt (si, stmt, false);
+           }
        }
-      else if (si != NULL && integer_zerop (gimple_assign_rhs1 (stmt)))
+      else if (si != NULL && terminated == 1)
        {
          si = unshare_strinfo (si);
          si->length = build_int_cst (size_type_node, 0);
+         si->terminated = true;
          si->endptr = NULL;
          si->prev = 0;
          si->next = 0;
@@ -2164,35 +2213,31 @@ handle_char_store (gimple_stmt_iterator
           bar (len, len2, len3, len4);
         }
        */ 
-      else if (si != NULL && si->length != NULL_TREE
+      else if (si != NULL
+              && si->length != NULL_TREE
               && TREE_CODE (si->length) == INTEGER_CST
-              && integer_nonzerop (gimple_assign_rhs1 (stmt)))
+              && terminated == 0)
        {
          gsi_next (gsi);
          return false;
        }
     }
-  else if (idx == 0 && initializer_zerop (gimple_assign_rhs1 (stmt)))
+  else if (idx == 0 && terminated >= 0)
     {
       if (ssaname)
-       {
-         si = zero_length_string (ssaname, NULL);
-         if (si != NULL)
-           si->dont_invalidate = true;
-       }
+       idx = new_stridx (ssaname);
       else
+       idx = new_addr_stridx (lhs);
+      if (idx != 0)
        {
-         int idx = new_addr_stridx (lhs);
-         if (idx != 0)
-           {
-             si = new_strinfo (build_fold_addr_expr (lhs), idx,
-                               build_int_cst (size_type_node, 0));
-             set_strinfo (idx, si);
-             si->dont_invalidate = true;
-           }
+         tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
+         si = new_strinfo (ptr, idx, size_int (!terminated), terminated);
+         set_strinfo (idx, si);
+         if (ssaname && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
+           si->endptr = ssaname;
+         si->dont_invalidate = true;
+         si->writable = true;
        }
-      if (si != NULL)
-       si->writable = true;
     }
   else if (idx == 0
           && TREE_CODE (gimple_assign_rhs1 (stmt)) == STRING_CST
@@ -2207,14 +2252,14 @@ handle_char_store (gimple_stmt_iterator
          if (idx != 0)
            {
              si = new_strinfo (build_fold_addr_expr (lhs), idx,
-                               build_int_cst (size_type_node, l));
+                               build_int_cst (size_type_node, l), true);
              set_strinfo (idx, si);
              si->dont_invalidate = true;
            }
        }
     }
 
-  if (si != NULL && initializer_zerop (gimple_assign_rhs1 (stmt)))
+  if (si != NULL && terminated == 1)
     {
       /* Allow adjust_last_stmt to remove it if the stored '\0'
         is immediately overwritten.  */
Index: gcc/testsuite/gcc.dg/strlenopt-31.c
===================================================================
--- /dev/null   2017-05-05 07:30:08.141451281 +0100
+++ gcc/testsuite/gcc.dg/strlenopt-31.c 2017-05-05 12:53:08.763475920 +0100
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+#include "strlenopt.h"
+
+volatile int v;
+
+size_t
+f1 (void)
+{
+  char a[30];
+  v += 1;
+  memcpy (a, "1234567", 7);
+  memcpy (a + 7, "89abcdefg", 9);
+  memcpy (a + 16, "h", 2);
+  return strlen (a);   // This strlen should be optimized into 17.
+}
+
+size_t
+f2 (char *a)
+{
+  v += 2;
+  memcpy (a, "1234567", 7);
+  memcpy (a + 7, "89abcdefg", 9);
+  memcpy (a + 16, "h", 2);
+  return strlen (a);   // This strlen should be optimized into 17.
+}
+
+size_t
+f3 (void)
+{
+  char a[30];
+  v += 3;
+  a[0] = '1';
+  memcpy (a + 1, "2345678", 8);
+  return strlen (a);   // This strlen should be optimized into 8.
+}
+
+size_t
+f4 (char *a)
+{
+  v += 4;
+  a[0] = '1';
+  memcpy (a + 1, "2345678", 8);
+  return strlen (a);   // This strlen should be optimized into 8.
+}
+
+int
+main ()
+{
+  char a[30];
+  if (f1 () != 17 || f2 (a) != 17 || f3 () != 8 || f4 (a) != 8)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */

Reply via email to