Hi! As mentioned earlier, the !has_single_use checks disable store merging in many cases, it is enough to have a single multiple-use somewhere and all of sudden we break the group.
The following patch replaces it by heuristics, it is GIMPLE statement count based, but I think it should work pretty fine in general. The code counts how many statements there are for the group without store-merging (i.e. stores, if not storing constant, then associated loads as well, and bitwise stmts), and then counts how many are needed if store merging is performed and expected DCE gets rid of dead stmts - i.e. we count what we'll emit in the store merging sequences for the group (without the masking for bitfields with padding bits in between, as that is even normal stores to bitfields have that implicitly and only expansion makes it explicit into the IL) and whatever original statements had multiple uses (and stmts they use if any). So, e.g. if we have _1 = t.a; s.a = _1; _2 = t.b; s.b = _2; use (_1); we can still store merge it, as there are 4 original stmts and we'd replace it with 3 new stmts: _1 = t.a; _3 = [t.a;t.b]; [s.a;s.b] = _3; use (_1); while if we have _1 = t.a; s.a = _1; _2 = t.b; s.b = _2; use (_1, _2); we don't replace it anymore, as that would be trading 4 original for 4 new ones. During the combined {x86_64,i686}-linux bootstraps/regtests, without this and the today posted patch the counts were: integer_cst 215621 442752 mem_ref 12115 24919 bit_and_expr 37 88 bit_ior_expr 19 46 bit_xor_expr 27 58 and with the two patches: integer_cst 215605 442817 mem_ref 17320 36133 bit_and_expr 93 224 bit_ior_expr 66 153 bit_xor_expr 56 153 Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-11-06 Jakub Jelinek <ja...@redhat.com> * gimple-ssa-store-merging.c (count_multiple_uses): New function. (split_group): Add total_orig and total_new arguments, estimate the number of statements related to the store group without store merging and with store merging. (imm_store_chain_info::output_merged_store): Adjust split_group callers, punt if estimated number of statements with store merging is not smaller than estimated number of statements without it. Formatting fix. (handled_load): Remove has_single_use checks. (pass_store_merging::process_store): Likewise. --- gcc/gimple-ssa-store-merging.c.jj 2017-11-06 08:57:07.000000000 +0100 +++ gcc/gimple-ssa-store-merging.c 2017-11-06 16:35:38.122437951 +0100 @@ -1370,6 +1370,65 @@ find_constituent_stores (struct merged_s return ret; } +/* Return how many SSA_NAMEs used to compute value to store in the INFO + store have multiple uses. If any SSA_NAME has multiple uses, also + count statements needed to compute it. */ + +static unsigned +count_multiple_uses (store_immediate_info *info) +{ + gimple *stmt = info->stmt; + unsigned ret = 0; + switch (info->rhs_code) + { + case INTEGER_CST: + return 0; + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + if (!has_single_use (gimple_assign_rhs1 (stmt))) + { + ret += 1 + info->ops[0].bit_not_p; + if (info->ops[1].base_addr) + ret += 1 + info->ops[1].bit_not_p; + return ret + 1; + } + stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + /* stmt is now the BIT_*_EXPR. */ + if (!has_single_use (gimple_assign_rhs1 (stmt))) + ret += 1 + info->ops[0].bit_not_p; + else if (info->ops[0].bit_not_p) + { + gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + if (!has_single_use (gimple_assign_rhs1 (stmt2))) + ++ret; + } + if (info->ops[1].base_addr == NULL_TREE) + return ret; + if (!has_single_use (gimple_assign_rhs2 (stmt))) + ret += 1 + info->ops[1].bit_not_p; + else if (info->ops[1].bit_not_p) + { + gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); + if (!has_single_use (gimple_assign_rhs1 (stmt2))) + ++ret; + } + return ret; + case MEM_REF: + if (!has_single_use (gimple_assign_rhs1 (stmt))) + return 1 + info->ops[0].bit_not_p; + else if (info->ops[0].bit_not_p) + { + stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + if (!has_single_use (gimple_assign_rhs1 (stmt))) + return 1; + } + return 0; + default: + gcc_unreachable (); + } +} + /* Split a merged store described by GROUP by populating the SPLIT_STORES vector (if non-NULL) with split_store structs describing the byte offset (from the base), the bit size and alignment of each store as well as the @@ -1385,7 +1444,9 @@ find_constituent_stores (struct merged_s static unsigned int split_group (merged_store_group *group, bool allow_unaligned_store, bool allow_unaligned_load, - vec<struct split_store *> *split_stores) + vec<struct split_store *> *split_stores, + unsigned *total_orig, + unsigned *total_new) { unsigned HOST_WIDE_INT pos = group->bitregion_start; unsigned HOST_WIDE_INT size = group->bitregion_end - pos; @@ -1393,6 +1454,7 @@ split_group (merged_store_group *group, unsigned HOST_WIDE_INT group_align = group->align; unsigned HOST_WIDE_INT align_base = group->align_base; unsigned HOST_WIDE_INT group_load_align = group_align; + bool any_orig = false; gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0)); @@ -1400,6 +1462,34 @@ split_group (merged_store_group *group, unsigned HOST_WIDE_INT try_pos = bytepos; group->stores.qsort (sort_by_bitpos); + if (total_orig) + { + unsigned int i; + store_immediate_info *info = group->stores[0]; + + total_new[0] = 0; + total_orig[0] = 1; /* The orig store. */ + info = group->stores[0]; + if (info->ops[0].base_addr) + total_orig[0] += 1 + info->ops[0].bit_not_p; + if (info->ops[1].base_addr) + total_orig[0] += 1 + info->ops[1].bit_not_p; + switch (info->rhs_code) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + total_orig[0]++; /* The orig BIT_*_EXPR stmt. */ + break; + default: + break; + } + total_orig[0] *= group->stores.length (); + + FOR_EACH_VEC_ELT (group->stores, i, info) + total_new[0] += count_multiple_uses (info); + } + if (!allow_unaligned_load) for (int i = 0; i < 2; ++i) if (group->load_align[i]) @@ -1524,7 +1614,10 @@ split_group (merged_store_group *group, if (info && info->bitpos >= try_bitpos && info->bitpos + info->bitsize <= try_bitpos + try_size) - store->orig = true; + { + store->orig = true; + any_orig = true; + } split_stores->safe_push (store); } @@ -1532,6 +1625,37 @@ split_group (merged_store_group *group, size -= try_size; } + if (total_orig) + { + /* If we are reusing some original stores and any of the + original SSA_NAMEs had multiple uses, we need to subtract + those now before we add the new ones. */ + if (total_new[0] && any_orig) + { + unsigned int i; + struct split_store *store; + FOR_EACH_VEC_ELT (*split_stores, i, store) + if (store->orig) + total_new[0] -= count_multiple_uses (store->orig_stores[0]); + } + total_new[0] += ret; /* The new store. */ + store_immediate_info *info = group->stores[0]; + if (info->ops[0].base_addr) + total_new[0] += ret * (1 + info->ops[0].bit_not_p); + if (info->ops[1].base_addr) + total_new[0] += ret * (1 + info->ops[1].bit_not_p); + switch (info->rhs_code) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + total_new[0] += ret; /* The new BIT_*_EXPR stmt. */ + break; + default: + break; + } + } + return ret; } @@ -1564,26 +1688,35 @@ imm_store_chain_info::output_merged_stor for unaligned and how many stores we'd emit for aligned stores. Only use unaligned stores if it allows fewer stores than aligned. */ unsigned aligned_cnt - = split_group (group, false, allow_unaligned_load, NULL); + = split_group (group, false, allow_unaligned_load, NULL, NULL, NULL); unsigned unaligned_cnt - = split_group (group, true, allow_unaligned_load, NULL); + = split_group (group, true, allow_unaligned_load, NULL, NULL, NULL); if (aligned_cnt <= unaligned_cnt) allow_unaligned_store = false; } + unsigned total_orig, total_new; split_group (group, allow_unaligned_store, allow_unaligned_load, - &split_stores); + &split_stores, &total_orig, &total_new); if (split_stores.length () >= orig_num_stmts) { /* We didn't manage to reduce the number of statements. Bail out. */ if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Exceeded original number of stmts (%u)." - " Not profitable to emit new sequence.\n", - orig_num_stmts); - } + fprintf (dump_file, "Exceeded original number of stmts (%u)." + " Not profitable to emit new sequence.\n", + orig_num_stmts); return false; } + if (total_orig <= total_new) + { + /* If number of estimated new statements is above estimated original + statements, bail out too. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Estimated number of original stmts (%u)" + " not larger than estimated number of new" + " stmts (%u).\n", + total_orig, total_new); + } gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt); gimple_seq seq = NULL; @@ -2096,7 +2229,6 @@ handled_load (gimple *stmt, store_operan { tree rhs1 = gimple_assign_rhs1 (stmt); if (TREE_CODE (rhs1) == SSA_NAME - && has_single_use (rhs1) && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos, bitregion_start, bitregion_end)) { @@ -2159,7 +2291,7 @@ pass_store_merging::process_store (gimpl rhs_code = INTEGER_CST; ops[0].val = rhs; } - else if (TREE_CODE (rhs) != SSA_NAME || !has_single_use (rhs)) + else if (TREE_CODE (rhs) != SSA_NAME) invalid = true; else { @@ -2179,7 +2311,7 @@ pass_store_merging::process_store (gimpl rhs1 = gimple_assign_rhs1 (def_stmt); rhs2 = gimple_assign_rhs2 (def_stmt); invalid = true; - if (TREE_CODE (rhs1) != SSA_NAME || !has_single_use (rhs1)) + if (TREE_CODE (rhs1) != SSA_NAME) break; def_stmt1 = SSA_NAME_DEF_STMT (rhs1); if (!is_gimple_assign (def_stmt1) @@ -2188,7 +2320,7 @@ pass_store_merging::process_store (gimpl break; if (rhs_valid_for_store_merging_p (rhs2)) ops[1].val = rhs2; - else if (TREE_CODE (rhs2) != SSA_NAME || !has_single_use (rhs2)) + else if (TREE_CODE (rhs2) != SSA_NAME) break; else { Jakub