On Fri, Dec 16, 2016 at 2:54 AM, Jeff Law <[email protected]> wrote:
> This is the 3rd patch in the kit to improve our DSE implementation.
>
> This patch supports trimming of the head or tail of a memset, memcpy or
> memmove call. It's conceptually similar to trimming CONSTRUCTORS (and was
> in fact developed first).
>
> Try as I might, I couldn't find a BZ in our database that would be resolved
> by this patch. There's BZs in the LLVM database in this space, but I didn't
> actually test those. With that in mind, I don't think I can strictly call
> this a bugfix. It does represent closing a deficiency when compared to
> LLVM. So while I'd like to see it go onto the trunk, I won't lose sleep
> if we defer to gcc-8.
>
> Note this patch relies on the alignment tweak I mentioned in the discussion
> of patch #2 to avoid creating code that the strlen folding optimization
> can't optimize. The code is still correct/valid, it's just in a form that
> the strlen folders don't grok.
>
> This includes a trivial test that I used for development purposes. It hits
> fairly often building GCC itself. If we wanted more coverage i the
> testsuite, I could extract some tests from GCC and reduce them.
>
> This patch has (of course) been bootstrapped and regression tested on
> x86_64-linux-gnu. OK for the trunk or defer to gcc-8?
Didn't see a re-post of this one so reviewing the old.
>
> * tree-ssa-dse.c (need_ssa_update): New file scoped boolean.
> (decrement_count): New function.
> (increment_start_addr, trim_memstar_call): Likewise.
> (trim_partially_dead_store): Call trim_memstar_call.
> (pass_dse::execute): Initialize need_ssa_update. If set, then
> return TODO_ssa_update.
>
> * gcc.dg/tree-ssa/ssa-dse-25.c: New test.
>
> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
> index 1482c7f..b21b9b5 100644
> --- a/gcc/tree-ssa-dse.c
> +++ b/gcc/tree-ssa-dse.c
> @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup;
> It is always safe to return FALSE. But typically better optimziation
> can be achieved by analyzing more statements. */
>
> +/* If trimming stores requires insertion of new statements, then we
> + will need an SSA update. */
> +static bool need_ssa_update;
> +
huh? You set this to true after inserting a POINTER_PLUS_EXPR, I don't see
how you need an SSA update for this.
> static bool
> initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
> {
> @@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live,
> gimple *stmt)
> }
> }
>
> +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes
> + copied/set by DECREMENT. */
> +static void
> +decrement_count (gimple *stmt, int decrement)
> +{
> + tree *countp = gimple_call_arg_ptr (stmt, 2);
> + gcc_assert (TREE_CODE (*countp) == INTEGER_CST);
> + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp,
> + build_int_cst (TREE_TYPE (*countp), decrement));
> + *countp = x;
thanks to wide-int the following should work
*countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement);
(if not please use int_const_binop rather than fold_build2 here and
below as well)
> +}
> +
> +static void
> +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int
> increment)
> +{
> + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */
> + if (TREE_CODE (*where) == ADDR_EXPR
> + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF)
> + {
> + tree t = TREE_OPERAND (*where, 0);
> + t = build_ref_for_offset (EXPR_LOCATION (t), t,
> + increment * BITS_PER_UNIT, false,
> + ptr_type_node, NULL, false);
please don't use build_ref_for_offset for this. Simply only handle the SSA_NAME
case here and below ...
> + *where = build_fold_addr_expr (t);
> + return;
> + }
> + else if (TREE_CODE (*where) == SSA_NAME)
> + {
> + tree tem = make_ssa_name (TREE_TYPE (*where));
> + gassign *newop
> + = gimple_build_assign (tem, POINTER_PLUS_EXPR, *where,
> + build_int_cst (sizetype, increment));
> + gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
> + gsi_insert_before (&gsi, newop, GSI_SAME_STMT);
> + need_ssa_update = true;
> + *where = tem;
> + update_stmt (gsi_stmt (gsi));
> + return;
> + }
> +
> + /* We can just adjust the offset in the MEM_REF expression. */
> + tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1);
> + tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1,
> + build_int_cst (TREE_TYPE (x1), increment));
> + TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x;
...
re-fold the thing as MEM_REF which will do all the magic for you:
*where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node,
*where, build_int_cst (ptr_type_node, increment)));
that handles &MEM[] and &foo.bar just fine and avoids adding magic here.
Otherwise looks ok. I think I'd like to see this in GCC 7 given it's
so much similar to the
constructor pruning.
Richard.
> +
> +}
> +
> +/* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are
> dead
> + (ORIG & ~NEW) and need not be stored. Try to rewrite STMT to reduce
> + the amount of data it actually writes.
> +
> + Right now we only support trimming from the head or the tail of the
> + memory region. In theory we could split the mem* call, but it's
> + likely of marginal value. */
> +
> +static void
> +trim_memstar_call (bitmap orig, bitmap live, gimple *stmt)
> +{
> + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
> + {
> + case BUILT_IN_MEMCPY:
> + case BUILT_IN_MEMMOVE:
> + {
> + int head_trim, tail_trim;
> + compute_trims (orig, live, &head_trim, &tail_trim);
> +
> + /* Tail trimming is easy, we can just reduce the count. */
> + if (tail_trim)
> + decrement_count (stmt, tail_trim);
> +
> + /* Head trimming requires adjusting all the arguments. */
> + if (head_trim)
> + {
> + tree *dst = gimple_call_arg_ptr (stmt, 0);
> + increment_start_addr (stmt, dst, head_trim);
> + tree *src = gimple_call_arg_ptr (stmt, 1);
> + increment_start_addr (stmt, src, head_trim);
> + decrement_count (stmt, head_trim);
> + }
> + break;
> + }
> +
> + case BUILT_IN_MEMSET:
> + {
> + int head_trim, tail_trim;
> + compute_trims (orig, live, &head_trim, &tail_trim);
> +
> + /* Tail trimming is easy, we can just reduce the count. */
> + if (tail_trim)
> + decrement_count (stmt, tail_trim);
> +
> + /* Head trimming requires adjusting all the arguments. */
> + if (head_trim)
> + {
> + tree *dst = gimple_call_arg_ptr (stmt, 0);
> + increment_start_addr (stmt, dst, head_trim);
> + decrement_count (stmt, head_trim);
> + }
> + break;
> + }
> +
> + default:
> + break;
> + }
> +}
> +
> /* STMT is a memory write where one or more bytes written are dead
> stores. ORIG is the bitmap of bytes stored by STMT. LIVE is the
> bitmap of stores that are actually live.
> @@ -321,7 +433,9 @@ trim_constructor_store (bitmap orig, bitmap live, gimple
> *stmt)
> static void
> trim_partially_dead_store (bitmap orig, bitmap live, gimple *stmt)
> {
> - if (is_gimple_assign (stmt))
> + if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
> + trim_memstar_call (orig, live, stmt);
> + else if (is_gimple_assign (stmt))
> {
> switch (gimple_assign_rhs_code (stmt))
> {
> @@ -712,6 +826,7 @@ unsigned int
> pass_dse::execute (function *fun)
> {
> need_eh_cleanup = BITMAP_ALLOC (NULL);
> + need_ssa_update = false;
>
> renumber_gimple_stmt_uids ();
>
> @@ -738,7 +853,7 @@ pass_dse::execute (function *fun)
>
> /* For now, just wipe the post-dominator information. */
> free_dominance_info (CDI_POST_DOMINATORS);
> - return 0;
> + return (need_ssa_update ? TODO_update_ssa : 0);
> }
>
> } // anon namespace
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c
> new file mode 100644
> index 0000000..8b7db3a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse1-details -w" } */
> +
> +char z[32];
> +
> +
> +int
> +foo(void)
> +{
> + memset (z, 0, 16);
> + memset (z+8, 0, 24);
> +}
> +
> +/* { dg-final { scan-tree-dump "memset .&z, 0, 8." "dse1" } } */
> +
> +
>