Hello,

this is a stage 1 patch, and I'll ping it then, but if you have comments now...

Passes bootstrap+testsuite on x86_64-linux-gnu.

2014-02-28  Marc Glisse  <marc.gli...@inria.fr>

        PR tree-optimization/57742
gcc/
        * tree-ssa-forwprop.c (simplify_malloc_memset): New function.
        (simplify_builtin_call): Call it.
gcc/testsuite/
        * g++.dg/tree-ssa/calloc.C: New testcase.
        * gcc.dg/tree-ssa/calloc.c: Likewise.

--
Marc Glisse
Index: gcc/testsuite/g++.dg/tree-ssa/calloc.C
===================================================================
--- gcc/testsuite/g++.dg/tree-ssa/calloc.C      (revision 0)
+++ gcc/testsuite/g++.dg/tree-ssa/calloc.C      (working copy)
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu++11 -O3 -fdump-tree-optimized" } */
+
+#include <new>
+#include <vector>
+#include <cstdlib>
+
+void g(void*);
+inline void* operator new(std::size_t sz) _GLIBCXX_THROW (std::bad_alloc)
+{
+  void *p;
+
+  if (sz == 0)
+    sz = 1;
+
+  // Slightly modified from the libsupc++ version, that one has 2 calls
+  // to malloc which makes it too hard to optimize.
+  while ((p = std::malloc (sz)) == 0)
+    {
+      std::new_handler handler = std::get_new_handler ();
+      if (! handler)
+        _GLIBCXX_THROW_OR_ABORT(std::bad_alloc());
+      handler ();
+    }
+  return p;
+}
+
+void f(void*p,int n){
+  new(p)std::vector<int>(n);
+}
+
+/* { dg-final { scan-tree-dump-times "calloc" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: gcc/testsuite/g++.dg/tree-ssa/calloc.C
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/tree-ssa/calloc.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/calloc.c      (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/calloc.c      (working copy)
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include <stdlib.h>
+#include <string.h>
+
+extern int a;
+extern int* b;
+int n;
+void* f(long*q){
+  int*p=malloc(n);
+  ++*q;
+  if(p){
+    ++*q;
+    a=2;
+    memset(p,0,n);
+    *b=3;
+  }
+  return p;
+}
+void* g(void){
+  float*p=calloc(8,4);
+  return memset(p,0,32);
+}
+
+/* { dg-final { scan-tree-dump-times "calloc" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/calloc.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c     (revision 208224)
+++ gcc/tree-ssa-forwprop.c     (working copy)
@@ -1487,20 +1487,149 @@ constant_pointer_difference (tree p1, tr
     }
 
   for (i = 0; i < cnt[0]; i++)
     for (j = 0; j < cnt[1]; j++)
       if (exps[0][i] == exps[1][j])
        return size_binop (MINUS_EXPR, offs[0][i], offs[1][j]);
 
   return NULL_TREE;
 }
 
+/* Optimize
+   ptr = malloc (n);
+   memset (ptr, 0, n);
+   into
+   ptr = calloc (n);
+   gsi_p is known to point to a call to __builtin_memset.  */
+static bool
+simplify_malloc_memset (gimple_stmt_iterator *gsi_p)
+{
+  /* First make sure we have:
+     ptr = malloc (n);
+     memset (ptr, 0, n);  */
+  gimple stmt2 = gsi_stmt (*gsi_p);
+  if (!integer_zerop (gimple_call_arg (stmt2, 1)))
+    return false;
+  tree ptr1, ptr2 = gimple_call_arg (stmt2, 0);
+  tree size = gimple_call_arg (stmt2, 2);
+  if (TREE_CODE (ptr2) != SSA_NAME) 
+    return false;
+  gimple stmt1 = SSA_NAME_DEF_STMT (ptr2);
+  tree callee1;
+  /* Handle the case where STMT1 is a unary PHI, which happends
+     for instance with:
+     while (!(p = malloc (n))) { ... }
+     memset (p, 0, n);  */
+  if (!stmt1)
+    return false;
+  if (gimple_code (stmt1) == GIMPLE_PHI
+      && gimple_phi_num_args (stmt1) == 1)
+    {
+      ptr1 = gimple_phi_arg_def (stmt1, 0);
+      if (TREE_CODE (ptr1) != SSA_NAME)
+       return false;
+      stmt1 = SSA_NAME_DEF_STMT (ptr1);
+    }
+  else
+    ptr1 = ptr2;
+  if (!stmt1
+      || !is_gimple_call (stmt1)
+      || !(callee1 = gimple_call_fndecl (stmt1)))
+    return false;
+
+  bool is_calloc;
+  if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MALLOC)
+    {
+      is_calloc = false;
+      if (!operand_equal_p (gimple_call_arg (stmt1, 0), size, 0))
+       return false;
+    }
+  else if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_CALLOC)
+    {
+      is_calloc = true;
+      tree arg1 = gimple_call_arg (stmt1, 0);
+      tree arg2 = gimple_call_arg (stmt1, 1);
+      tree size1 = fold_build2 (MULT_EXPR, TREE_TYPE (size), arg1, arg2);
+      if (!operand_equal_p (size1, size, 0))
+       return false;
+      /* We could look at SSA_NAME_DEF_STMT (size), but there can be many casts
+        mixed with the MULT_EXPR which makes it hard to match with size1.  */
+    }
+  else
+    return false;
+
+  /* We only allow two simple CFG forms for now. Either stmt1 and stmt2 are
+     in the same BB (in this order), or BB1 (malloc) ends with:
+     if (ptr) goto BB2 (memset)  */
+  basic_block bb1 = gimple_bb (stmt1);
+  basic_block bb2 = gimple_bb (stmt2);
+  if (bb1 != bb2)
+    {
+      if (!single_pred_p (bb2) || single_pred (bb2) != bb1)
+       return false;
+      gimple cond = last_stmt (bb1);
+      if (gimple_code (cond) != GIMPLE_COND
+         || !integer_zerop (gimple_cond_rhs (cond))
+         || gimple_cond_lhs (cond) != ptr1)
+       return false;
+      int branch;
+      tree_code comp = gimple_cond_code (cond);
+      if (comp == NE_EXPR)
+       branch = EDGE_TRUE_VALUE;
+      else if (comp == EQ_EXPR)
+       branch = EDGE_FALSE_VALUE;
+      else
+       return false;
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, bb1->succs)
+       if ((e->flags & branch) && e->dest != bb2)
+         return false;
+    }
+  
+  /* Finally, make sure the memory is not used before stmt2.  */
+  ao_ref ref;
+  ao_ref_init_from_ptr_and_size (&ref, ptr1, size);
+  tree vdef = gimple_vuse (stmt2);
+  if (vdef == NULL)
+    return false;
+  while (true)
+    {
+      gimple cur = SSA_NAME_DEF_STMT (vdef);
+      if (cur == stmt1) break;
+      if (stmt_may_clobber_ref_p_1 (cur, &ref))
+       return false;
+      vdef = gimple_vuse (cur);
+    }
+
+  /* Replace malloc with calloc and remove memset.  */
+  if (!is_calloc)
+    {
+      gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
+      update_gimple_call (&gsi, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
+                         size, build_one_cst (size_type_node));
+    }
+  tree lhs = gimple_call_lhs (stmt2);
+  unlink_stmt_vdef (stmt2);
+  if (lhs)
+    {
+      gimple assign = gimple_build_assign (lhs, ptr2);
+      gsi_replace (gsi_p, assign, false);
+    }
+  else
+    {
+      gsi_remove (gsi_p, true);
+      release_defs (stmt2);
+    }
+  return true;
+}
+
 /* *GSI_P is a GIMPLE_CALL to a builtin function.
    Optimize
    memcpy (p, "abcd", 4);
    memset (p + 4, ' ', 3);
    into
    memcpy (p, "abcd   ", 7);
    call if the latter can be stored by pieces during expansion.  */
 
 static bool
 simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
@@ -1508,38 +1637,44 @@ simplify_builtin_call (gimple_stmt_itera
   gimple stmt1, stmt2 = gsi_stmt (*gsi_p);
   tree vuse = gimple_vuse (stmt2);
   if (vuse == NULL)
     return false;
   stmt1 = SSA_NAME_DEF_STMT (vuse);
 
   switch (DECL_FUNCTION_CODE (callee2))
     {
     case BUILT_IN_MEMSET:
       if (gimple_call_num_args (stmt2) != 3
-         || gimple_call_lhs (stmt2)
          || CHAR_BIT != 8
          || BITS_PER_UNIT != 8)
        break;
       else
        {
+         if (simplify_malloc_memset (gsi_p))
+           return true;
+
          tree callee1;
          tree ptr1, src1, str1, off1, len1, lhs1;
          tree ptr2 = gimple_call_arg (stmt2, 0);
          tree val2 = gimple_call_arg (stmt2, 1);
          tree len2 = gimple_call_arg (stmt2, 2);
          tree diff, vdef, new_str_cst;
          gimple use_stmt;
          unsigned int ptr1_align;
          unsigned HOST_WIDE_INT src_len;
          char *src_buf;
          use_operand_p use_p;
 
+         /* Not implemented yet.  */
+         if (gimple_call_lhs (stmt2))
+           break;
+
          if (!tree_fits_shwi_p (val2)
              || !tree_fits_uhwi_p (len2))
            break;
          if (is_gimple_call (stmt1))
            {
              /* If first stmt is a call, it needs to be memcpy
                 or mempcpy, with string literal as second argument and
                 constant length.  */
              callee1 = gimple_call_fndecl (stmt1);
              if (callee1 == NULL_TREE

Reply via email to