On Fri, Feb 28, 2014 at 11:48 PM, Marc Glisse <marc.gli...@inria.fr> wrote:
> Hello,
>
> this is a stage 1 patch, and I'll ping it then, but if you have comments
> now...
>
> Passes bootstrap+testsuite on x86_64-linux-gnu.
>
> 2014-02-28  Marc Glisse  <marc.gli...@inria.fr>
>
>         PR tree-optimization/57742
> gcc/
>         * tree-ssa-forwprop.c (simplify_malloc_memset): New function.
>         (simplify_builtin_call): Call it.
> gcc/testsuite/
>         * g++.dg/tree-ssa/calloc.C: New testcase.
>         * gcc.dg/tree-ssa/calloc.c: Likewise.
>
> --
> Marc Glisse
> Index: gcc/testsuite/g++.dg/tree-ssa/calloc.C
> ===================================================================
> --- gcc/testsuite/g++.dg/tree-ssa/calloc.C      (revision 0)
> +++ gcc/testsuite/g++.dg/tree-ssa/calloc.C      (working copy)
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-std=gnu++11 -O3 -fdump-tree-optimized" } */
> +
> +#include <new>
> +#include <vector>
> +#include <cstdlib>
> +
> +void g(void*);
> +inline void* operator new(std::size_t sz) _GLIBCXX_THROW (std::bad_alloc)
> +{
> +  void *p;
> +
> +  if (sz == 0)
> +    sz = 1;
> +
> +  // Slightly modified from the libsupc++ version, that one has 2 calls
> +  // to malloc which makes it too hard to optimize.
> +  while ((p = std::malloc (sz)) == 0)
> +    {
> +      std::new_handler handler = std::get_new_handler ();
> +      if (! handler)
> +        _GLIBCXX_THROW_OR_ABORT(std::bad_alloc());
> +      handler ();
> +    }
> +  return p;
> +}
> +
> +void f(void*p,int n){
> +  new(p)std::vector<int>(n);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "calloc" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Property changes on: gcc/testsuite/g++.dg/tree-ssa/calloc.C
> ___________________________________________________________________
> Added: svn:eol-style
> ## -0,0 +1 ##
> +native
> \ No newline at end of property
> Added: svn:keywords
> ## -0,0 +1 ##
> +Author Date Id Revision URL
> \ No newline at end of property
> Index: gcc/testsuite/gcc.dg/tree-ssa/calloc.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/calloc.c      (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/calloc.c      (working copy)
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +#include <stdlib.h>
> +#include <string.h>
> +
> +extern int a;
> +extern int* b;
> +int n;
> +void* f(long*q){
> +  int*p=malloc(n);
> +  ++*q;
> +  if(p){
> +    ++*q;
> +    a=2;
> +    memset(p,0,n);
> +    *b=3;
> +  }
> +  return p;
> +}
> +void* g(void){
> +  float*p=calloc(8,4);
> +  return memset(p,0,32);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "calloc" 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/tree-ssa/calloc.c
> ___________________________________________________________________
> Added: svn:keywords
> ## -0,0 +1 ##
> +Author Date Id Revision URL
> \ No newline at end of property
> Added: svn:eol-style
> ## -0,0 +1 ##
> +native
> \ No newline at end of property
> Index: gcc/tree-ssa-forwprop.c
> ===================================================================
> --- gcc/tree-ssa-forwprop.c     (revision 208224)
> +++ gcc/tree-ssa-forwprop.c     (working copy)
> @@ -1487,20 +1487,149 @@ constant_pointer_difference (tree p1, tr
>      }
>
>    for (i = 0; i < cnt[0]; i++)
>      for (j = 0; j < cnt[1]; j++)
>        if (exps[0][i] == exps[1][j])
>         return size_binop (MINUS_EXPR, offs[0][i], offs[1][j]);
>
>    return NULL_TREE;
>  }
>
> +/* Optimize
> +   ptr = malloc (n);
> +   memset (ptr, 0, n);
> +   into
> +   ptr = calloc (n);
> +   gsi_p is known to point to a call to __builtin_memset.  */
> +static bool
> +simplify_malloc_memset (gimple_stmt_iterator *gsi_p)
> +{
> +  /* First make sure we have:
> +     ptr = malloc (n);
> +     memset (ptr, 0, n);  */
> +  gimple stmt2 = gsi_stmt (*gsi_p);
> +  if (!integer_zerop (gimple_call_arg (stmt2, 1)))
> +    return false;
> +  tree ptr1, ptr2 = gimple_call_arg (stmt2, 0);
> +  tree size = gimple_call_arg (stmt2, 2);
> +  if (TREE_CODE (ptr2) != SSA_NAME)
> +    return false;
> +  gimple stmt1 = SSA_NAME_DEF_STMT (ptr2);
> +  tree callee1;
> +  /* Handle the case where STMT1 is a unary PHI, which happends
> +     for instance with:
> +     while (!(p = malloc (n))) { ... }
> +     memset (p, 0, n);  */
> +  if (!stmt1)
> +    return false;
> +  if (gimple_code (stmt1) == GIMPLE_PHI
> +      && gimple_phi_num_args (stmt1) == 1)
> +    {
> +      ptr1 = gimple_phi_arg_def (stmt1, 0);
> +      if (TREE_CODE (ptr1) != SSA_NAME)
> +       return false;
> +      stmt1 = SSA_NAME_DEF_STMT (ptr1);
> +    }
> +  else
> +    ptr1 = ptr2;
> +  if (!stmt1
> +      || !is_gimple_call (stmt1)
> +      || !(callee1 = gimple_call_fndecl (stmt1)))
> +    return false;

That's a bit much of ad-hoc pattern-matching ... wouldn't be
p = malloc (n);
memset (p, 0, n);

transform better suited to the strlen opt pass?  After all that tracks
what 'string' is associated with a SSA name pointer through
arbitrary satements using a lattice.

> +  bool is_calloc;
> +  if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MALLOC)
> +    {
> +      is_calloc = false;
> +      if (!operand_equal_p (gimple_call_arg (stmt1, 0), size, 0))
> +       return false;
> +    }
> +  else if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_CALLOC)
> +    {
> +      is_calloc = true;
> +      tree arg1 = gimple_call_arg (stmt1, 0);
> +      tree arg2 = gimple_call_arg (stmt1, 1);
> +      tree size1 = fold_build2 (MULT_EXPR, TREE_TYPE (size), arg1, arg2);
> +      if (!operand_equal_p (size1, size, 0))
> +       return false;
> +      /* We could look at SSA_NAME_DEF_STMT (size), but there can be many
> casts
> +        mixed with the MULT_EXPR which makes it hard to match with size1.
> */

The same probably applies to calloc(); memset (, 0,); though here you
could even match points-to info (after all even only clearing a portion of
the calloc()ed memory is dead code).  If points-to conservatively computes
that the memset pointer only points to null or the memory tag the
calloc return value points to then you can discard it without further
checking ...

> +    }
> +  else
> +    return false;
> +
> +  /* We only allow two simple CFG forms for now. Either stmt1 and stmt2 are
> +     in the same BB (in this order), or BB1 (malloc) ends with:
> +     if (ptr) goto BB2 (memset)  */
> +  basic_block bb1 = gimple_bb (stmt1);
> +  basic_block bb2 = gimple_bb (stmt2);
> +  if (bb1 != bb2)
> +    {
> +      if (!single_pred_p (bb2) || single_pred (bb2) != bb1)
> +       return false;
> +      gimple cond = last_stmt (bb1);
> +      if (gimple_code (cond) != GIMPLE_COND
> +         || !integer_zerop (gimple_cond_rhs (cond))
> +         || gimple_cond_lhs (cond) != ptr1)
> +       return false;
> +      int branch;
> +      tree_code comp = gimple_cond_code (cond);
> +      if (comp == NE_EXPR)
> +       branch = EDGE_TRUE_VALUE;
> +      else if (comp == EQ_EXPR)
> +       branch = EDGE_FALSE_VALUE;
> +      else
> +       return false;
> +      edge e;
> +      edge_iterator ei;
> +      FOR_EACH_EDGE (e, ei, bb1->succs)
> +       if ((e->flags & branch) && e->dest != bb2)
> +         return false;
> +    }

CFG matching in forwprop ... ehhh.  This really asks for integrating
this with a propagation engine.

> +  /* Finally, make sure the memory is not used before stmt2.  */
> +  ao_ref ref;
> +  ao_ref_init_from_ptr_and_size (&ref, ptr1, size);
> +  tree vdef = gimple_vuse (stmt2);
> +  if (vdef == NULL)
> +    return false;
> +  while (true)
> +    {
> +      gimple cur = SSA_NAME_DEF_STMT (vdef);
> +      if (cur == stmt1) break;
> +      if (stmt_may_clobber_ref_p_1 (cur, &ref))
> +       return false;
> +      vdef = gimple_vuse (cur);
> +    }

We have walk_aliased_vdefs() for this.

That said, please try to integrate this kind of transforms with
the strlen opt pass (even if it requires making its lattice more generic).
The memset removal even looks like a candidate for DSE.

Thanks,
Richard.

> +  /* Replace malloc with calloc and remove memset.  */
> +  if (!is_calloc)
> +    {
> +      gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
> +      update_gimple_call (&gsi, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
> +                         size, build_one_cst (size_type_node));
> +    }
> +  tree lhs = gimple_call_lhs (stmt2);
> +  unlink_stmt_vdef (stmt2);
> +  if (lhs)
> +    {
> +      gimple assign = gimple_build_assign (lhs, ptr2);
> +      gsi_replace (gsi_p, assign, false);
> +    }
> +  else
> +    {
> +      gsi_remove (gsi_p, true);
> +      release_defs (stmt2);
> +    }
> +  return true;
> +}
> +
>  /* *GSI_P is a GIMPLE_CALL to a builtin function.
>     Optimize
>     memcpy (p, "abcd", 4);
>     memset (p + 4, ' ', 3);
>     into
>     memcpy (p, "abcd   ", 7);
>     call if the latter can be stored by pieces during expansion.  */
>
>  static bool
>  simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
> @@ -1508,38 +1637,44 @@ simplify_builtin_call (gimple_stmt_itera
>    gimple stmt1, stmt2 = gsi_stmt (*gsi_p);
>    tree vuse = gimple_vuse (stmt2);
>    if (vuse == NULL)
>      return false;
>    stmt1 = SSA_NAME_DEF_STMT (vuse);
>
>    switch (DECL_FUNCTION_CODE (callee2))
>      {
>      case BUILT_IN_MEMSET:
>        if (gimple_call_num_args (stmt2) != 3
> -         || gimple_call_lhs (stmt2)
>           || CHAR_BIT != 8
>           || BITS_PER_UNIT != 8)
>         break;
>        else
>         {
> +         if (simplify_malloc_memset (gsi_p))
> +           return true;
> +
>           tree callee1;
>           tree ptr1, src1, str1, off1, len1, lhs1;
>           tree ptr2 = gimple_call_arg (stmt2, 0);
>           tree val2 = gimple_call_arg (stmt2, 1);
>           tree len2 = gimple_call_arg (stmt2, 2);
>           tree diff, vdef, new_str_cst;
>           gimple use_stmt;
>           unsigned int ptr1_align;
>           unsigned HOST_WIDE_INT src_len;
>           char *src_buf;
>           use_operand_p use_p;
>
> +         /* Not implemented yet.  */
> +         if (gimple_call_lhs (stmt2))
> +           break;
> +
>           if (!tree_fits_shwi_p (val2)
>               || !tree_fits_uhwi_p (len2))
>             break;
>           if (is_gimple_call (stmt1))
>             {
>               /* If first stmt is a call, it needs to be memcpy
>                  or mempcpy, with string literal as second argument and
>                  constant length.  */
>               callee1 = gimple_call_fndecl (stmt1);
>               if (callee1 == NULL_TREE
>

Reply via email to