Ping*3

Richard Sandiford <richard.sandif...@linaro.org> writes:
> Ping*2
>
> Richard Sandiford <richard.sandif...@linaro.org> writes:
>> In this testcase, we (correctly) record after:
>>
>>   strcpy (p1, "abcde");
>>   char *p2 = strchr (p1, '\0');
>>   strcpy (p2, q);
>>
>> that the length of p1 and p2 can be calculated by converting the
>> second strcpy to:
>>
>>   tmp = stpcpy (p2, q)
>>
>> and then doing tmp - p1 for p1 and tmp - p2 for p2.  This is delayed
>> until we know whether we actually need it.  Then:
>>
>>   char *p3 = strchr (p2, '\0');
>>
>> forces us to calculate the length of p2 in this way.  At this point
>> we had three related strinfos:
>>
>>   p1: delayed length, calculated from tmp = stpcpy (p2, q)
>>   p2: known length, tmp - p2
>>   p3: known length, 0
>>
>> After:
>>
>>   memcpy (p3, "x", 2);
>>
>> we use adjust_related_strinfos to add 1 to each length.  However,
>> that didn't do anything for delayed lengths because:
>>
>>        else if (si->stmt != NULL)
>>          /* Delayed length computation is unaffected.  */
>>          ;
>>
>> So after the memcpy we had:
>>
>>   p1: delayed length, calculated from tmp = stpcpy (p2, q)
>>   p2: known length, tmp - p2 + 1
>>   p3: known length, 1
>>
>> where the length of p1 was no longer correct.
>>
>> I thought about three fixes:
>>
>>   (1) Make adjust_related_strinfos drop strinfos with a delayed length.
>>
>>   (2) Make adjust_related_strinfos compute the length itself
>>       (via get_string_length).
>>
>>   (3) Make get_string_length update all related strinfos.  We can then
>>       maintain the invariant that all lengths in a chain are delayed or
>>       none are.
>>
>> (3) seemed like the best.  get_string_length has already made the
>> invasive step of changing the code and computing the length for one
>> strinfo.  Updating the related strinfos is just a couple of fold_builds,
>> of the kind that the pass already does in several places.
>>
>> The point is that the code above only triggers if one of the delayed
>> lengths has been computed.  That makes (1) unnecessarily pessimistic.
>> It also wasn't obvious (to me) from first glance, so (2) might look
>> more intrusive than it is.  I think it becomes easier to reason about
>> if we do (3) and can assume that all lengths are delayed or none are.
>> It also makes the min-length/maybe-not-terminated patch easier.
>>
>> [ I can go into more detail about why this should be enough to
>>   maintain the invariant, and why the asserts should be valid,
>>   but the message is already pretty long. ]
>>
>> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
>>
>> Thanks,
>> Richard
>>
>>
>> 2017-05-16  Richard Sandiford  <richard.sandif...@linaro.org>
>>
>> gcc/
>>      PR tree-optimization/80769
>>      * tree-ssa-strlen.c (strinfo): Document that "stmt" is also used
>>      for malloc and calloc.  Document the new invariant that all related
>>      strinfos have delayed lengths or none do.
>>      (verify_related_strinfos): Move earlier in file.
>>      (set_endptr_and_length): New function, split out from...
>>      (get_string_length): ...here.  Also set the lengths of related
>>      strinfos.
>>      (zero_length_string): Assert that chainsi has known (rather than
>>      delayed) lengths.
>>      (adjust_related_strinfos): Likewise.
>>
>> gcc/testsuite/
>>      PR tree-optimization/80769
>>      * gcc.dg/strlenopt-31.c: New test.
>>      * gcc.dg/strlenopt-31g.c: Likewise.
>>
>> Index: gcc/tree-ssa-strlen.c
>> ===================================================================
>> --- gcc/tree-ssa-strlen.c    2017-05-16 08:00:03.808133862 +0100
>> +++ gcc/tree-ssa-strlen.c    2017-05-16 08:20:51.408572143 +0100
>> @@ -61,7 +61,13 @@ struct strinfo
>>    tree length;
>>    /* Any of the corresponding pointers for querying alias oracle.  */
>>    tree ptr;
>> -  /* Statement for delayed length computation.  */
>> +  /* This is used for two things:
>> +
>> +     - To record the statement that should be used for delayed length
>> +       computations.  We maintain the invariant that all related strinfos
>> +       have delayed lengths or none do.
>> +
>> +     - To record the malloc or calloc call that produced this result.  */
>>    gimple *stmt;
>>    /* Pointer to '\0' if known, if NULL, it can be computed as
>>       ptr + length.  */
>> @@ -451,6 +457,45 @@ set_strinfo (int idx, strinfo *si)
>>    (*stridx_to_strinfo)[idx] = si;
>>  }
>>  
>> +/* Return the first strinfo in the related strinfo chain
>> +   if all strinfos in between belong to the chain, otherwise NULL.  */
>> +
>> +static strinfo *
>> +verify_related_strinfos (strinfo *origsi)
>> +{
>> +  strinfo *si = origsi, *psi;
>> +
>> +  if (origsi->first == 0)
>> +    return NULL;
>> +  for (; si->prev; si = psi)
>> +    {
>> +      if (si->first != origsi->first)
>> +    return NULL;
>> +      psi = get_strinfo (si->prev);
>> +      if (psi == NULL)
>> +    return NULL;
>> +      if (psi->next != si->idx)
>> +    return NULL;
>> +    }
>> +  if (si->idx != si->first)
>> +    return NULL;
>> +  return si;
>> +}
>> +
>> +/* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
>> +   Use LOC for folding.  */
>> +
>> +static void
>> +set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
>> +{
>> +  si->endptr = endptr;
>> +  si->stmt = NULL;
>> +  tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
>> +  tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
>> +  si->length = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
>> +                            end_as_size, start_as_size);
>> +}
>> +
>>  /* Return string length, or NULL if it can't be computed.  */
>>  
>>  static tree
>> @@ -546,12 +591,12 @@ get_string_length (strinfo *si)
>>      case BUILT_IN_STPCPY_CHK_CHKP:
>>        gcc_assert (lhs != NULL_TREE);
>>        loc = gimple_location (stmt);
>> -      si->endptr = lhs;
>> -      si->stmt = NULL;
>> -      lhs = fold_convert_loc (loc, size_type_node, lhs);
>> -      si->length = fold_convert_loc (loc, size_type_node, si->ptr);
>> -      si->length = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
>> -                                    lhs, si->length);
>> +      set_endptr_and_length (loc, si, lhs);
>> +      for (strinfo *chainsi = verify_related_strinfos (si);
>> +           chainsi != NULL;
>> +           chainsi = get_next_strinfo (chainsi))
>> +        if (chainsi->length == NULL)
>> +          set_endptr_and_length (loc, chainsi, lhs);
>>        break;
>>      case BUILT_IN_MALLOC:
>>        break;
>> @@ -620,32 +665,6 @@ unshare_strinfo (strinfo *si)
>>    return nsi;
>>  }
>>  
>> -/* Return first strinfo in the related strinfo chain
>> -   if all strinfos in between belong to the chain, otherwise
>> -   NULL.  */
>> -
>> -static strinfo *
>> -verify_related_strinfos (strinfo *origsi)
>> -{
>> -  strinfo *si = origsi, *psi;
>> -
>> -  if (origsi->first == 0)
>> -    return NULL;
>> -  for (; si->prev; si = psi)
>> -    {
>> -      if (si->first != origsi->first)
>> -    return NULL;
>> -      psi = get_strinfo (si->prev);
>> -      if (psi == NULL)
>> -    return NULL;
>> -      if (psi->next != si->idx)
>> -    return NULL;
>> -    }
>> -  if (si->idx != si->first)
>> -    return NULL;
>> -  return si;
>> -}
>> -
>>  /* Attempt to create a new strinfo for BASESI + OFF, or find existing
>>     strinfo if there is any.  Return it's idx, or 0 if no strinfo has
>>     been created.  */
>> @@ -749,7 +768,8 @@ zero_length_string (tree ptr, strinfo *c
>>      {
>>        do
>>          {
>> -          gcc_assert (si->length || si->stmt);
>> +          /* We shouldn't mix delayed and non-delayed lengths.  */
>> +          gcc_assert (si->length);
>>            if (si->endptr == NULL_TREE)
>>              {
>>                si = unshare_strinfo (si);
>> @@ -770,12 +790,17 @@ zero_length_string (tree ptr, strinfo *c
>>            return chainsi;
>>          }
>>      }
>> -      else if (chainsi->first || chainsi->prev || chainsi->next)
>> +      else
>>      {
>> -      chainsi = unshare_strinfo (chainsi);
>> -      chainsi->first = 0;
>> -      chainsi->prev = 0;
>> -      chainsi->next = 0;
>> +      /* We shouldn't mix delayed and non-delayed lengths.  */
>> +      gcc_assert (chainsi->length);
>> +      if (chainsi->first || chainsi->prev || chainsi->next)
>> +        {
>> +          chainsi = unshare_strinfo (chainsi);
>> +          chainsi->first = 0;
>> +          chainsi->prev = 0;
>> +          chainsi->next = 0;
>> +        }
>>      }
>>      }
>>    idx = new_stridx (ptr);
>> @@ -820,18 +845,13 @@ adjust_related_strinfos (location_t loc,
>>        tree tem;
>>  
>>        si = unshare_strinfo (si);
>> -      if (si->length)
>> -        {
>> -          tem = fold_convert_loc (loc, TREE_TYPE (si->length), adj);
>> -          si->length = fold_build2_loc (loc, PLUS_EXPR,
>> -                                        TREE_TYPE (si->length), si->length,
>> -                                        tem);
>> -        }
>> -      else if (si->stmt != NULL)
>> -        /* Delayed length computation is unaffected.  */
>> -        ;
>> -      else
>> -        gcc_unreachable ();
>> +      /* We shouldn't see delayed lengths here; the caller must have
>> +         calculated the old length in order to calculate the
>> +         adjustment.  */
>> +      gcc_assert (si->length);
>> +      tem = fold_convert_loc (loc, TREE_TYPE (si->length), adj);
>> +      si->length = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (si->length),
>> +                                    si->length, tem);
>>  
>>        si->endptr = NULL_TREE;
>>        si->dont_invalidate = true;
>> Index: gcc/testsuite/gcc.dg/strlenopt-31.c
>> ===================================================================
>> --- /dev/null        2017-05-11 19:11:40.989165404 +0100
>> +++ gcc/testsuite/gcc.dg/strlenopt-31.c      2017-05-16 08:20:26.780371709 
>> +0100
>> @@ -0,0 +1,25 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O2" } */
>> +
>> +#include "strlenopt.h"
>> +
>> +__attribute__((noinline, noclone)) int
>> +bar (char *p1, const char *q)
>> +{
>> +  strcpy (p1, "abcde");
>> +  char *p2 = strchr (p1, '\0');
>> +  strcpy (p2, q);
>> +  char *p3 = strchr (p2, '\0');
>> +  memcpy (p3, "x", 2);
>> +  return strlen (p1);
>> +}
>> +
>> +int
>> +main (void)
>> +{
>> +  char buffer[10];
>> +  int res = bar (buffer, "foo");
>> +  if (strcmp (buffer, "abcdefoox") != 0 || res != 9)
>> +    abort ();
>> +  return 0;
>> +}
>> Index: gcc/testsuite/gcc.dg/strlenopt-31g.c
>> ===================================================================
>> --- /dev/null        2017-05-11 19:11:40.989165404 +0100
>> +++ gcc/testsuite/gcc.dg/strlenopt-31g.c     2017-05-16 08:20:26.780371709 
>> +0100
>> @@ -0,0 +1,9 @@
>> +/* { dg-do run { target *-*-linux* *-*-gnu* } } */
>> +/* { dg-options "-O2 -fdump-tree-strlen" } */
>> +
>> +#define USE_GNU
>> +#include "strlenopt-31.c"
>> +
>> +/* { dg-final { scan-tree-dump-times "stpcpy \\(" 1 "strlen" } } */
>> +/* { dg-final { scan-tree-dump-times "memcpy \\(" 2 "strlen" } } */
>> +/* { dg-final { scan-tree-dump-not "strlen \\(" "strlen" } } */

Reply via email to