On Mon, 6 Nov 2017, Jakub Jelinek wrote:
> Hi!
>
> The following patch contains 2 changes:
> 1) BIT_NOT_EXPR on a load from memory is handled, including when one
> or both BIT_{AND,IOR,XOR}_EXPR operands is BIT_NOT_EXPR of a memory load
> 2) it changes the aliasing handling, because the old
> ao_ref_init_from_ptr_and_size caused way too many unnecessary chain
> terminations
> For 2), I initially wrote a completely different patch, which computed
> base_ref for the store chain and minimum and maximum offsets from it which
> then were used in the ref_maybe_used_by_stmt_p/stmt_may_clobber_ref_p_1
> calls. But in the end this seems to be far simpler, it checks the aliasing
> the way it was checking it for the chain_info chain before, I see no reason
> to handle other chains differently. All that is needed is that even a
> successful addition of a store into a chain needs to terminate other chains
> that could possibly alias, just not the current chain, so the chain_info
> argument is repurposed for this - terminate chains that alias except for
> the one referenced by chain_info if non-NULL.
> To show in more detail what it does on store_merging_2.c:
> struct bar { int a; unsigned char b, c; short d; unsigned char e, f, g; };
> __attribute__ ((noinline)) void
> foo2 (struct bar *p, struct bar *p2)
> {
> p->b = 0xff;
> p2->b = 0xa;
> p->a = 0xfffff;
> p2->c = 0xc;
> p->c = 0xff;
> p2->d = 0xbf;
> p->d = 0xfff;
> }
> Previously the p2->* stores would terminate the p based chains and
> vice versa, no store merging would occur. With this patch, p2->b
> may alias p->b in the other chain, so we terminate that (and do nothing
> for p->b, because it has just a single store). Then we process p->a
> store, but that doesn't alias with p2->b, so we have 2 different chains
> with 1 store in it each. Then we process p2->c store, which again doesn't
> alias with p->a, so we have 2 stores in p2 based chain, 1 store in p based.
> Next, p->c aliases with p2->c in the other chain, so we terminate that
> chain, and optimize the p2->{b,c} stores into one combined one.
> So we have just one chain with p->{a,c} in it. Then we process p2->d,
> not aliasing with anything, create p2 based chain for it. Finally,
> p->d aliases with p2->d, so terminate that chain (no useful merging),
> and finally the p->{a,c,d} chain is not merged, because there is a gap
> between a and c, can't modify b, and while {c,d} is adjacent, it is 3 bytes
> and we'd need 2 stores like before.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok.
Thanks,
Richard.
> 2017-11-06 Jakub Jelinek <[email protected]>
>
> PR tree-optimization/78821
> * gimple-ssa-store-merging.c (struct store_operand_info): Add bit_not_p
> data member.
> (store_operand_info::store_operand_info): Initialize it to false.
> (pass_store_merging::terminate_all_aliasing_chains): Rewritten to use
> ref_maybe_used_by_stmt_p and stmt_may_clobber_ref_p on lhs of each
> store in the group, and if chain_info is non-NULL, to ignore altogether
> that chain.
> (compatible_load_p): Fail if bit_not_p does not match.
> (imm_store_chain_info::output_merged_store): Handle bit_not_p loads.
> (handled_load): Fill in bit_not_p. Handle BIT_NOT_EXPR.
> (pass_store_merging::process_store): Adjust
> terminate_all_aliasing_chains calls to pass NULL in all current spots,
> call terminate_all_aliasing_chains newly when adding a store into
> a chain with non-NULL chain_info.
>
> * gcc.dg/store_merging_2.c: Expect 3 store mergings instead of 2.
> * gcc.dg/store_merging_13.c (f7, f8, f9, f10, f11, f12, f13): New
> functions.
> (main): Test also those. Expect 13 store mergings instead of 6.
> * gcc.dg/store_merging_14.c (f7, f8, f9): New functions.
> (main): Test also those. Expect 9 store mergings instead of 6.
>
> --- gcc/gimple-ssa-store-merging.c.jj 2017-11-06 08:51:15.313990289 +0100
> +++ gcc/gimple-ssa-store-merging.c 2017-11-06 08:57:07.621523803 +0100
> @@ -183,12 +183,13 @@ struct store_operand_info
> unsigned HOST_WIDE_INT bitregion_start;
> unsigned HOST_WIDE_INT bitregion_end;
> gimple *stmt;
> + bool bit_not_p;
> store_operand_info ();
> };
>
> store_operand_info::store_operand_info ()
> : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
> - bitregion_start (0), bitregion_end (0), stmt (NULL)
> + bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
> {
> }
>
> @@ -910,8 +911,7 @@ private:
>
> void process_store (gimple *);
> bool terminate_and_process_all_chains ();
> - bool terminate_all_aliasing_chains (imm_store_chain_info **,
> - gimple *);
> + bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
> bool terminate_and_release_chain (imm_store_chain_info *);
> }; // class pass_store_merging
>
> @@ -930,13 +930,9 @@ pass_store_merging::terminate_and_proces
> return ret;
> }
>
> -/* Terminate all chains that are affected by the assignment to DEST,
> appearing
> - in statement STMT and ultimately points to the object BASE. Return true
> if
> - at least one aliasing chain was terminated. BASE and DEST are allowed to
> - be NULL_TREE. In that case the aliasing checks are performed on the whole
> - statement rather than a particular operand in it. VAR_OFFSET_P signifies
> - whether STMT represents a store to BASE offset by a variable amount.
> - If that is the case we have to terminate any chain anchored at BASE. */
> +/* Terminate all chains that are affected by the statement STMT.
> + CHAIN_INFO is the chain we should ignore from the checks if
> + non-NULL. */
>
> bool
> pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
> @@ -949,14 +945,18 @@ pass_store_merging::terminate_all_aliasi
> if (!gimple_vuse (stmt))
> return false;
>
> - /* Check if the assignment destination (BASE) is part of a store chain.
> - This is to catch non-constant stores to destinations that may be part
> - of a chain. */
> - if (chain_info)
> + for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur =
> next)
> {
> + next = cur->next;
> +
> + /* We already checked all the stores in chain_info and terminated the
> + chain if necessary. Skip it here. */
> + if (chain_info && *chain_info == cur)
> + continue;
> +
> store_immediate_info *info;
> unsigned int i;
> - FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
> + FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
> {
> if (ref_maybe_used_by_stmt_p (stmt, gimple_assign_lhs (info->stmt))
> || stmt_may_clobber_ref_p (stmt, gimple_assign_lhs (info->stmt)))
> @@ -966,37 +966,13 @@ pass_store_merging::terminate_all_aliasi
> fprintf (dump_file, "stmt causes chain termination:\n");
> print_gimple_stmt (dump_file, stmt, 0);
> }
> - terminate_and_release_chain (*chain_info);
> + terminate_and_release_chain (cur);
> ret = true;
> break;
> }
> }
> }
>
> - /* Check for aliasing with all other store chains. */
> - for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur =
> next)
> - {
> - next = cur->next;
> -
> - /* We already checked all the stores in chain_info and terminated the
> - chain if necessary. Skip it here. */
> - if (chain_info && (*chain_info) == cur)
> - continue;
> -
> - /* We can't use the base object here as that does not reliably exist.
> - Build a ao_ref from the base object address (if we know the
> - minimum and maximum offset and the maximum size we could improve
> - things here). */
> - ao_ref chain_ref;
> - ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE);
> - if (ref_maybe_used_by_stmt_p (stmt, &chain_ref)
> - || stmt_may_clobber_ref_p_1 (stmt, &chain_ref))
> - {
> - terminate_and_release_chain (cur);
> - ret = true;
> - }
> - }
> -
> return ret;
> }
>
> @@ -1053,6 +1029,7 @@ compatible_load_p (merged_store_group *m
> {
> store_immediate_info *infof = merged_store->stores[0];
> if (!info->ops[idx].base_addr
> + || info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p
> || (info->ops[idx].bitpos - infof->ops[idx].bitpos
> != info->bitpos - infof->bitpos)
> || !operand_equal_p (info->ops[idx].base_addr,
> @@ -1755,6 +1732,19 @@ imm_store_chain_info::output_merged_stor
> gimple_seq_add_stmt_without_update (&seq, stmt);
> }
> ops[j] = gimple_assign_lhs (stmt);
> + if (op.bit_not_p)
> + {
> + stmt = gimple_build_assign (make_ssa_name (int_type),
> + BIT_NOT_EXPR, ops[j]);
> + gimple_set_location (stmt, load_loc);
> + ops[j] = gimple_assign_lhs (stmt);
> +
> + if (gsi_bb (load_gsi[j]))
> + gimple_seq_add_stmt_without_update (&load_seq[j],
> + stmt);
> + else
> + gimple_seq_add_stmt_without_update (&seq, stmt);
> + }
> }
> else
> ops[j] = native_interpret_expr (int_type,
> @@ -2100,9 +2090,23 @@ handled_load (gimple *stmt, store_operan
> unsigned HOST_WIDE_INT bitregion_start,
> unsigned HOST_WIDE_INT bitregion_end)
> {
> - if (!is_gimple_assign (stmt) || !gimple_vuse (stmt))
> + if (!is_gimple_assign (stmt))
> return false;
> - if (gimple_assign_load_p (stmt)
> + if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
> + {
> + tree rhs1 = gimple_assign_rhs1 (stmt);
> + if (TREE_CODE (rhs1) == SSA_NAME
> + && has_single_use (rhs1)
> + && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
> + bitregion_start, bitregion_end))
> + {
> + op->bit_not_p = !op->bit_not_p;
> + return true;
> + }
> + return false;
> + }
> + if (gimple_vuse (stmt)
> + && gimple_assign_load_p (stmt)
> && !stmt_can_throw_internal (stmt)
> && !gimple_has_volatile_ops (stmt))
> {
> @@ -2119,6 +2123,7 @@ handled_load (gimple *stmt, store_operan
> {
> op->stmt = stmt;
> op->val = mem;
> + op->bit_not_p = false;
> return true;
> }
> }
> @@ -2202,16 +2207,16 @@ pass_store_merging::process_store (gimpl
> }
> }
>
> - struct imm_store_chain_info **chain_info = NULL;
> - if (base_addr)
> - chain_info = m_stores.get (base_addr);
> -
> if (invalid)
> {
> - terminate_all_aliasing_chains (chain_info, stmt);
> + terminate_all_aliasing_chains (NULL, stmt);
> return;
> }
>
> + struct imm_store_chain_info **chain_info = NULL;
> + if (base_addr)
> + chain_info = m_stores.get (base_addr);
> +
> store_immediate_info *info;
> if (chain_info)
> {
> @@ -2225,6 +2230,7 @@ pass_store_merging::process_store (gimpl
> print_gimple_stmt (dump_file, stmt, 0);
> }
> (*chain_info)->m_store_info.safe_push (info);
> + terminate_all_aliasing_chains (chain_info, stmt);
> /* If we reach the limit of stores to merge in a chain terminate and
> process the chain now. */
> if ((*chain_info)->m_store_info.length ()
> @@ -2239,7 +2245,7 @@ pass_store_merging::process_store (gimpl
> }
>
> /* Store aliases any existing chain? */
> - terminate_all_aliasing_chains (chain_info, stmt);
> + terminate_all_aliasing_chains (NULL, stmt);
> /* Start a new chain. */
> struct imm_store_chain_info *new_chain
> = new imm_store_chain_info (m_stores_head, base_addr);
> --- gcc/testsuite/gcc.dg/store_merging_2.c.jj 2017-11-06 08:46:29.996607515
> +0100
> +++ gcc/testsuite/gcc.dg/store_merging_2.c 2017-11-06 08:51:20.347926470
> +0100
> @@ -77,4 +77,4 @@ main (void)
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging"
> } } */
> +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging"
> } } */
> --- gcc/testsuite/gcc.dg/store_merging_13.c.jj 2017-11-06
> 08:46:29.962607947 +0100
> +++ gcc/testsuite/gcc.dg/store_merging_13.c 2017-11-06 08:51:20.347926470
> +0100
> @@ -104,6 +104,90 @@ f6 (struct S *p, struct S *q)
> p->g = pg;
> }
>
> +__attribute__((noipa)) void
> +f7 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a |= q->a;
> + p->b |= q->b;
> + p->c |= q->c;
> + p->d |= q->d;
> + p->e |= q->e;
> + p->f |= q->f;
> + p->g |= q->g;
> +}
> +
> +__attribute__((noipa)) void
> +f8 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a &= q->a;
> + p->b &= q->b;
> + p->c &= q->c;
> + p->d &= q->d;
> + p->e &= q->e;
> + p->f &= q->f;
> + p->g &= q->g;
> +}
> +
> +__attribute__((noipa)) void
> +f9 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a ^= q->a;
> + p->b ^= q->b;
> + p->c ^= q->c;
> + p->d ^= q->d;
> + p->e ^= q->e;
> + p->f ^= q->f;
> + p->g ^= q->g;
> +}
> +
> +__attribute__((noipa)) void
> +f10 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a = ~q->a;
> + p->b = ~q->b;
> + p->c = ~q->c;
> + p->d = ~q->d;
> + p->e = ~q->e;
> + p->f = ~q->f;
> + p->g = ~q->g;
> +}
> +
> +__attribute__((noipa)) void
> +f11 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a = p->a | (unsigned char) ~q->a;
> + p->b = p->b | (unsigned char) ~q->b;
> + p->c = p->c | (unsigned short) ~q->c;
> + p->d = p->d | (unsigned char) ~q->d;
> + p->e = p->e | (unsigned char) ~q->e;
> + p->f = p->f | (unsigned char) ~q->f;
> + p->g = p->g | (unsigned char) ~q->g;
> +}
> +
> +__attribute__((noipa)) void
> +f12 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a = p->a & (unsigned char) ~q->a;
> + p->b = p->b & (unsigned char) ~q->b;
> + p->c = p->c & (unsigned short) ~q->c;
> + p->d = p->d & (unsigned char) ~q->d;
> + p->e = p->e & (unsigned char) ~q->e;
> + p->f = p->f & (unsigned char) ~q->f;
> + p->g = p->g & (unsigned char) ~q->g;
> +}
> +
> +__attribute__((noipa)) void
> +f13 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a = p->a ^ (unsigned char) ~q->a;
> + p->b = p->b ^ (unsigned char) ~q->b;
> + p->c = p->c ^ (unsigned short) ~q->c;
> + p->d = p->d ^ (unsigned char) ~q->d;
> + p->e = p->e ^ (unsigned char) ~q->e;
> + p->f = p->f ^ (unsigned char) ~q->f;
> + p->g = p->g ^ (unsigned char) ~q->g;
> +}
> +
> struct S s = { 20, 21, 22, 23, 24, 25, 26, 27 };
> struct S t = { 0x71, 0x72, 0x7f04, 0x78, 0x31, 0x32, 0x34,
> 0xf1f2f3f4f5f6f7f8ULL };
> struct S u = { 28, 29, 30, 31, 32, 33, 34, 35 };
> @@ -151,7 +235,62 @@ main ()
> || s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
> || s.g != (42 ^ 0x34) || s.h != 27)
> __builtin_abort ();
> + f3 (&s, &v);
> + f7 (&s, &t);
> + asm volatile ("" : : : "memory");
> + if (s.a != (36 | 0x71) || s.b != (37 | 0x72)
> + || s.c != (38 | 0x7f04) || s.d != (39 | 0x78)
> + || s.e != (40 | 0x31) || s.f != (41 | 0x32)
> + || s.g != (42 | 0x34) || s.h != 27)
> + __builtin_abort ();
> + f3 (&s, &u);
> + f8 (&s, &t);
> + asm volatile ("" : : : "memory");
> + if (s.a != (28 & 0x71) || s.b != (29 & 0x72)
> + || s.c != (30 & 0x7f04) || s.d != (31 & 0x78)
> + || s.e != (32 & 0x31) || s.f != (33 & 0x32)
> + || s.g != (34 & 0x34) || s.h != 27)
> + __builtin_abort ();
> + f2 (&s, &v);
> + f9 (&s, &t);
> + asm volatile ("" : : : "memory");
> + if (s.a != (36 ^ 0x71) || s.b != (37 ^ 0x72)
> + || s.c != (38 ^ 0x7f04) || s.d != (39 ^ 0x78)
> + || s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
> + || s.g != (42 ^ 0x34) || s.h != 27)
> + __builtin_abort ();
> + f10 (&s, &u);
> + asm volatile ("" : : : "memory");
> + if (s.a != (unsigned char) ~28 || s.b != (unsigned char) ~29
> + || s.c != (unsigned short) ~30 || s.d != (unsigned char) ~31
> + || s.e != (unsigned char) ~32 || s.f != (unsigned char) ~33
> + || s.g != (unsigned char) ~34 || s.h != 27)
> + __builtin_abort ();
> + f3 (&s, &v);
> + f11 (&s, &t);
> + asm volatile ("" : : : "memory");
> + if (s.a != (36 | (unsigned char) ~0x71) || s.b != (37 | (unsigned char)
> ~0x72)
> + || s.c != (38 | (unsigned short) ~0x7f04) || s.d != (39 | (unsigned
> char) ~0x78)
> + || s.e != (40 | (unsigned char) ~0x31) || s.f != (41 | (unsigned char)
> ~0x32)
> + || s.g != (42 | (unsigned char) ~0x34) || s.h != 27)
> + __builtin_abort ();
> + f3 (&s, &u);
> + f12 (&s, &t);
> + asm volatile ("" : : : "memory");
> + if (s.a != (28 & (unsigned char) ~0x71) || s.b != (29 & (unsigned char)
> ~0x72)
> + || s.c != (30 & (unsigned short) ~0x7f04) || s.d != (31 & (unsigned
> char) ~0x78)
> + || s.e != (32 & (unsigned char) ~0x31) || s.f != (33 & (unsigned char)
> ~0x32)
> + || s.g != (34 & (unsigned char) ~0x34) || s.h != 27)
> + __builtin_abort ();
> + f2 (&s, &v);
> + f13 (&s, &t);
> + asm volatile ("" : : : "memory");
> + if (s.a != (36 ^ (unsigned char) ~0x71) || s.b != (37 ^ (unsigned char)
> ~0x72)
> + || s.c != (38 ^ (unsigned short) ~0x7f04) || s.d != (39 ^ (unsigned
> char) ~0x78)
> + || s.e != (40 ^ (unsigned char) ~0x31) || s.f != (41 ^ (unsigned char)
> ~0x32)
> + || s.g != (42 ^ (unsigned char) ~0x34) || s.h != 27)
> + __builtin_abort ();
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-times "Merging successful" 6 "store-merging"
> } } */
> +/* { dg-final { scan-tree-dump-times "Merging successful" 13 "store-merging"
> } } */
> --- gcc/testsuite/gcc.dg/store_merging_14.c.jj 2017-11-06
> 08:46:29.919608492 +0100
> +++ gcc/testsuite/gcc.dg/store_merging_14.c 2017-11-06 08:51:20.347926470
> +0100
> @@ -104,6 +104,42 @@ f6 (struct S *p, struct S *q)
> p->g = pg;
> }
>
> +__attribute__((noipa)) void
> +f7 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a |= q->a;
> + p->b |= q->b;
> + p->c |= q->c;
> + p->d |= q->d;
> + p->e |= q->e;
> + p->f |= q->f;
> + p->g |= q->g;
> +}
> +
> +__attribute__((noipa)) void
> +f8 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a &= q->a;
> + p->b &= q->b;
> + p->c &= q->c;
> + p->d &= q->d;
> + p->e &= q->e;
> + p->f &= q->f;
> + p->g &= q->g;
> +}
> +
> +__attribute__((noipa)) void
> +f9 (struct S *__restrict p, struct S *__restrict q)
> +{
> + p->a ^= q->a;
> + p->b ^= q->b;
> + p->c ^= q->c;
> + p->d ^= q->d;
> + p->e ^= q->e;
> + p->f ^= q->f;
> + p->g ^= q->g;
> +}
> +
> struct S s = { 72, 20, 21, 73, 22, 23, 24, 25, 26, 74, 27 };
> struct S t = { 75, 0x71, 0x72, 76, 0x7f04, 0x78, 0x31, 0x32, 0x34, 77,
> 0xf1f2f3f4f5f6f7f8ULL };
> struct S u = { 78, 28, 29, 79, 30, 31, 32, 33, 34, 80, 35 };
> @@ -151,7 +187,31 @@ main ()
> || s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
> || s.g != (42 ^ 0x34) || s.k != 74 || s.h != 27)
> __builtin_abort ();
> + f3 (&s, &v);
> + f7 (&s, &t);
> + asm volatile ("" : : : "memory");
> + if (s.i != 72 || s.a != (36 | 0x71) || s.b != (37 | 0x72) || s.j != 73
> + || s.c != (38 | 0x7f04) || s.d != (39 | 0x78)
> + || s.e != (40 | 0x31) || s.f != (41 | 0x32)
> + || s.g != (42 | 0x34) || s.k != 74 || s.h != 27)
> + __builtin_abort ();
> + f3 (&s, &u);
> + f8 (&s, &t);
> + asm volatile ("" : : : "memory");
> + if (s.i != 72 || s.a != (28 & 0x71) || s.b != (29 & 0x72) || s.j != 73
> + || s.c != (30 & 0x7f04) || s.d != (31 & 0x78)
> + || s.e != (32 & 0x31) || s.f != (33 & 0x32)
> + || s.g != (34 & 0x34) || s.k != 74 || s.h != 27)
> + __builtin_abort ();
> + f2 (&s, &v);
> + f9 (&s, &t);
> + asm volatile ("" : : : "memory");
> + if (s.i != 72 || s.a != (36 ^ 0x71) || s.b != (37 ^ 0x72) || s.j != 73
> + || s.c != (38 ^ 0x7f04) || s.d != (39 ^ 0x78)
> + || s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
> + || s.g != (42 ^ 0x34) || s.k != 74 || s.h != 27)
> + __builtin_abort ();
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-times "Merging successful" 6 "store-merging"
> } } */
> +/* { dg-final { scan-tree-dump-times "Merging successful" 9 "store-merging"
> } } */
>
> Jakub
>
>
--
Richard Biener <[email protected]>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB
21284 (AG Nuernberg)