On Tue, Jul 22, 2025 at 7:16 PM Andrew Pinski <pins...@gmail.com> wrote: > > On Fri, Apr 18, 2025 at 5:08 PM Andrew Pinski <quic_apin...@quicinc.com> > wrote: > > > > This fixes a long standing (since GCC 5) issue where the > > malloc+memset->calloc > > optimization would happen even if the memset was not always executed. > > This is a varient of Nathan's patch: > > https://inbox.sourceware.org/gcc-patches/f4b5d106-8176-b7bd-709b-d43518878...@acm.org/ > > Jeff Law had suggested to look at probabilities of the basic blocks to see > > if it is profitable or not; I am not totally convinced that is a good idea. > > Though this is an extended version of Nathan's patch as it uses post > > domination to see > > if the memset is always called after the condition of null-ness. > > Ping? > https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681439.html
Ping? > > Thanks, > Andrew Pinski > > > > > PR tree-optimization/83022 > > > > gcc/ChangeLog: > > > > * tree-ssa-strlen.cc (last_stmt_ptr_check): New function. > > (allow_memset_malloc_to_calloc): New function. > > (strlen_pass::handle_builtin_memset): Check to see if it is a good > > idea to do the malloc+memset->calloc optimization. > > (printf_strlen_execute): Free post dom info. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/tree-ssa/calloc-6.c: New test. > > * gcc.dg/tree-ssa/calloc-7.c: New test. > > * gcc.dg/tree-ssa/calloc-8.c: New test. > > * gcc.dg/tree-ssa/calloc-9.c: New test. > > > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > > --- > > gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c | 31 +++++++++++ > > gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c | 30 +++++++++++ > > gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c | 21 ++++++++ > > gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c | 20 +++++++ > > gcc/tree-ssa-strlen.cc | 67 ++++++++++++++++++++++++ > > 5 files changed, 169 insertions(+) > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c > > b/gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c > > new file mode 100644 > > index 00000000000..ca5968707a8 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c > > @@ -0,0 +1,31 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > > + > > +/* PR tree-optimization/83022 */ > > + > > +void* f(int n, int *q) > > +{ > > + int *p = __builtin_malloc (n); > > + for(int i = 0; i < n; i++) > > + q[i] = i; > > + if (p) > > + __builtin_memset (p, 0, n); > > + return p; > > +} > > + > > +void* g(int n, int *q) > > +{ > > + int *p = __builtin_malloc (n); > > + for(int i = 0; i < n; i++) > > + q[i] = i; > > + __builtin_memset (p, 0, n); > > + return p; > > +} > > + > > +/* These 2 should be converted to calloc as the memset is > > + always executed or is conditional on the ptr nullness. */ > > + > > +/* { dg-final { scan-tree-dump-times "calloc" 2 "optimized" } } */ > > +/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */ > > +/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */ > > + > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c > > b/gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c > > new file mode 100644 > > index 00000000000..665ebe2a049 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c > > @@ -0,0 +1,30 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > > + > > +/* PR tree-optimization/83022 */ > > + > > +void* f(int n, int *q, int t) > > +{ > > + int *p = __builtin_malloc (n); > > + for(int i = 0; i < n; i++) > > + q[i] = i; > > + if (t) > > + __builtin_memset (p, 0, n); > > + return p; > > +} > > + > > +void* g(int n, int *q, int t) > > +{ > > + int *p = __builtin_malloc (n); > > + if (t) > > + __builtin_memset (p, 0, n); > > + return p; > > +} > > + > > +/* These 2 should not be converted into calloc as the memset is not > > + conditionalized on the pointer being checked for nullness. */ > > + > > +/* { dg-final { scan-tree-dump-not "calloc" "optimized" } } */ > > +/* { dg-final { scan-tree-dump-times "malloc" 2 "optimized" } } */ > > +/* { dg-final { scan-tree-dump-times "memset" 2 "optimized" } } */ > > + > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c > > b/gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c > > new file mode 100644 > > index 00000000000..7d7d504ff9e > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c > > @@ -0,0 +1,21 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > > + > > +/* PR tree-optimization/83022 */ > > + > > +void *combine_alt (int s, bool c, bool *a) > > +{ > > + void *r = __builtin_malloc (s); > > + if (!r) return 0; > > + if (a) *a = r != 0; > > + __builtin_memset (r, 0, s); > > + return r; > > +} > > + > > +/* These one should be converted to calloc as the memset is > > + is conditional on the ptr nullness. */ > > + > > +/* { dg-final { scan-tree-dump-times "calloc" 1 "optimized" } } */ > > +/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */ > > +/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */ > > + > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c > > b/gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c > > new file mode 100644 > > index 00000000000..e43dc359082 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c > > @@ -0,0 +1,20 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > > + > > +/* PR tree-optimization/83022 */ > > + > > +void *m (int s, int c) > > +{ > > + void *r = __builtin_malloc (s); > > + if (r && c) > > + __builtin_memset (r, 0, s); > > + return r; > > +} > > + > > +/* This one should not be converted into calloc as the memset is not > > + conditionalized on the pointer being checked for nullness. */ > > + > > +/* { dg-final { scan-tree-dump-not "calloc" "optimized" } } */ > > +/* { dg-final { scan-tree-dump-times "malloc" 1 "optimized" } } */ > > +/* { dg-final { scan-tree-dump-times "memset" 1 "optimized" } } */ > > + > > diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc > > index c4d64132e53..e69ceeffb03 100644 > > --- a/gcc/tree-ssa-strlen.cc > > +++ b/gcc/tree-ssa-strlen.cc > > @@ -3785,6 +3785,70 @@ strlen_pass::handle_alloc_call (built_in_function > > bcode) > > si->dont_invalidate = true; > > } > > > > +/* Returns true of the last statement of the bb is a conditional > > + that checks ptr for null-ness. */ > > +static bool > > +last_stmt_ptr_check (tree ptr, basic_block bb) > > +{ > > + gimple_stmt_iterator gsi = gsi_last_bb (bb); > > + gcond *cstmt = dyn_cast <gcond *>(gsi_stmt (gsi)); > > + if (!cstmt) > > + return false; > > + if (gimple_cond_code (cstmt) != EQ_EXPR && gimple_cond_code (cstmt) != > > NE_EXPR) > > + return false; > > + if (!integer_zerop (gimple_cond_rhs (cstmt))) > > + return false; > > + if (!operand_equal_p (gimple_cond_lhs (cstmt), ptr)) > > + return false; > > + return true; > > +} > > + > > +/* Check if doing a malloc+memset to calloc is a good idea. PTR is the > > + return value of the malloc/where the memset happens. MALLOC_BB is > > + the basic block of the malloc. MEMSET_BB is basic block of the memset. > > */ > > + > > +static bool > > +allow_memset_malloc_to_calloc (tree ptr, basic_block malloc_bb, > > + basic_block memset_bb) > > +{ > > + /* If the malloc and memset are in the same block, then always > > + allow the transformation. Don't need post dominator calculation. */ > > + if (malloc_bb == memset_bb) > > + return true; > > + > > + if (!dom_info_available_p (cfun, CDI_POST_DOMINATORS)) > > + calculate_dominance_info (CDI_POST_DOMINATORS); > > + > > + /* If the memset is always executed after the malloc, then allow > > + to optimize to calloc. */ > > + if (dominated_by_p (CDI_POST_DOMINATORS, malloc_bb, memset_bb)) > > + return true; > > + > > + /* If the malloc bb ends in a ptr check, then we need to check if > > + either successor is post dominated by the memset bb. */ > > + if (last_stmt_ptr_check (ptr, malloc_bb)) > > + { > > + if (dominated_by_p (CDI_POST_DOMINATORS, EDGE_SUCC (malloc_bb, > > 0)->dest, memset_bb)) > > + return true; > > + if (dominated_by_p (CDI_POST_DOMINATORS, EDGE_SUCC (malloc_bb, > > 1)->dest, memset_bb)) > > + return true; > > + } > > + > > + /* At this point we want to only handle: > > + malloc(); > > + ... > > + if (ptr) goto memset_bb; */ > > + if (!single_pred_p (memset_bb)) > > + return false; > > + > > + /* If the predecessor of the memset bb is not post dominated by malloc, > > then the memset is > > + conditionalized by something more than just the checking if ptr is > > non-null. */ > > + if (!dominated_by_p (CDI_POST_DOMINATORS, malloc_bb, single_pred_edge > > (memset_bb)->src)) > > + return false; > > + > > + return last_stmt_ptr_check (ptr, single_pred_edge (memset_bb)->src); > > +} > > + > > /* Handle a call to memset. > > After a call to calloc, memset(,0,) is unnecessary. > > memset(malloc(n),0,n) is calloc(n,1). > > @@ -3870,6 +3934,8 @@ strlen_pass::handle_builtin_memset (bool *zero_write) > > enum built_in_function code1 = DECL_FUNCTION_CODE (callee1); > > if (code1 == BUILT_IN_CALLOC) > > /* Not touching alloc_stmt */ ; > > + else if (!allow_memset_malloc_to_calloc (ptr, gimple_bb (si1->stmt), > > gimple_bb (memset_stmt))) > > + return false; > > else if (code1 == BUILT_IN_MALLOC > > && operand_equal_p (memset_size, alloc_size, 0)) > > { > > @@ -6005,6 +6071,7 @@ printf_strlen_execute (function *fun, bool warn_only) > > disable_ranger (fun); > > scev_finalize (); > > loop_optimizer_finalize (); > > + free_dominance_info (CDI_POST_DOMINATORS); > > > > return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0; > > } > > -- > > 2.43.0 > >