Robin Dapp <[email protected]> writes:
> A swap-style idiom like
> tmp = a
> a = b
> b = tmp
> would be transformed like
> tmp_tmp = cond ? a : tmp
> tmp_a = cond ? b : a
> tmp_b = cond ? tmp_tmp : b
> [...]
> including rewiring the first source operand to previous writes (e.g. tmp ->
> tmp_tmp).
>
> The code would recognize this, though, and change the last line to
> tmp_b = cond ? a : b.
>
> Without additional temporaries we can now emit the following sequence:
> tmp = a // (no condition here)
> a = cond ? b : a
> b = cond ? tmp : b
> avoiding any rewiring which would break things now.
>
> check_need_cmovs () finds swap-style idioms and marks the first of the
> three instructions as not needing a cmove.
Looks like a nice optimisation, but could we just test whether the
destination of a set isn't live on exit from the then block? I think
we could do that on the fly during the main noce_convert_multiple_sets
loop.
Thanks,
Richard
> ---
> gcc/ifcvt.c | 97 ++++++++++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 78 insertions(+), 19 deletions(-)
>
> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 955f9541f60..09bf443656c 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -103,6 +103,7 @@ static rtx_insn *block_has_only_trap (basic_block);
> static void check_need_temps (basic_block bb,
> hash_map<rtx, bool> *need_temp,
> rtx cond);
> +static void check_need_cmovs (basic_block bb, hash_map<rtx, bool>
> *need_cmov);
>
>
> /* Count the number of non-jump active insns in BB. */
> @@ -3207,6 +3208,7 @@ noce_convert_multiple_sets (struct noce_if_info
> *if_info)
> int count = 0;
>
> hash_map<rtx, bool> need_temps;
> + hash_map<rtx, bool> need_no_cmovs;
>
> /* If we newly set a CC before a cmov, we might need a temporary
> even though the compare will be removed by a later pass. Costing
> @@ -3214,6 +3216,9 @@ noce_convert_multiple_sets (struct noce_if_info
> *if_info)
>
> check_need_temps (then_bb, &need_temps, cond);
>
> + /* Identify swap-style idioms. */
> + check_need_cmovs (then_bb, &need_no_cmovs);
> +
> hash_map<rtx, bool> temps_created;
>
> FOR_BB_INSNS (then_bb, insn)
> @@ -3229,10 +3234,8 @@ noce_convert_multiple_sets (struct noce_if_info
> *if_info)
> rtx new_val = SET_SRC (set);
> rtx old_val = target;
>
> - rtx dest = SET_DEST (set);
> -
> rtx temp;
> - if (need_temps.get (dest))
> + if (need_temps.get (target))
> {
> temp = gen_reg_rtx (GET_MODE (target));
> temps_created.put (target, true);
> @@ -3241,18 +3244,11 @@ noce_convert_multiple_sets (struct noce_if_info
> *if_info)
> temp = target;
>
> /* If we were supposed to read from an earlier write in this block,
> - we've changed the register allocation. Rewire the read. While
> - we are looking, also try to catch a swap idiom. */
> + we've changed the register allocation. Rewire the read. */
> for (int i = count - 1; i >= 0; --i)
> if (reg_overlap_mentioned_p (new_val, targets[i]))
> {
> - /* Catch a "swap" style idiom. */
> - if (find_reg_note (insn, REG_DEAD, new_val) != NULL_RTX)
> - /* The write to targets[i] is only live until the read
> - here. As the condition codes match, we can propagate
> - the set to here. */
> - new_val = SET_SRC (single_set (unmodified_insns[i]));
> - else
> + if (find_reg_note (insn, REG_DEAD, new_val) == NULL_RTX)
> new_val = temporaries[i];
> break;
> }
> @@ -3324,8 +3320,11 @@ noce_convert_multiple_sets (struct noce_if_info
> *if_info)
>
> {
> start_sequence ();
> - temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
> - x, y, new_val, old_val, NULL_RTX);
> + if (!need_no_cmovs.get (insn))
> + temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
> + x, y, new_val, old_val, NULL_RTX);
> + else
> + noce_emit_move_insn (target, new_val);
>
> /* If we failed to expand the conditional move, drop out and don't
> try to continue. */
> @@ -3346,13 +3345,16 @@ noce_convert_multiple_sets (struct noce_if_info
> *if_info)
> end_sequence ();
> }
>
> - /* Now try emitting one passing a non-canonicalized cc comparison.
> - This allows the backend to emit a cmov directly without additional
> + /* Now try emitting a cmov passing a non-canonicalized cc comparison.
> + This allows backends to emit a cmov directly without additional
> compare. */
> {
> start_sequence ();
> - temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
> - x, y, new_val, old_val, cc_cmp);
> + if (!need_no_cmovs.get (insn))
> + temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
> + x, y, new_val, old_val, cc_cmp);
> + else
> + noce_emit_move_insn (target, new_val);
>
> /* If we failed to expand the conditional move, drop out and don't
> try to continue. */
> @@ -3931,6 +3933,7 @@ check_cond_move_block (basic_block bb,
>
> /* Check for which sets we need to emit temporaries to hold the destination
> of
> a conditional move. */
> +
> static void
> check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
> {
> @@ -3938,7 +3941,7 @@ check_need_temps (basic_block bb, hash_map<rtx, bool>
> *need_temp, rtx cond)
>
> FOR_BB_INSNS (bb, insn)
> {
> - rtx set, dest;
> + rtx set, src, dest;
>
> if (!active_insn_p (insn))
> continue;
> @@ -3947,12 +3950,68 @@ check_need_temps (basic_block bb, hash_map<rtx, bool>
> *need_temp, rtx cond)
> if (set == NULL_RTX)
> continue;
>
> + src = SET_SRC (set);
> dest = SET_DEST (set);
>
> /* noce_emit_cmove will emit the condition check every time it is
> called
> so we need a temp register if the destination is modified. */
> if (reg_overlap_mentioned_p (dest, cond))
> need_temp->put (dest, true);
> +
> + }
> +}
> +
> +/* Find local swap-style idioms in BB and mark the first insn (1)
> + that is only a temporary as not needing a conditional move as
> + it is going to be dead afterwards anyway.
> +
> + (1) int tmp = a;
> + a = b;
> + b = tmp;
> +
> +
> + ifcvt
> + -->
> +
> + load tmp,a
> + cmov a,b
> + cmov b,tmp */
> +
> +static void
> +check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov)
> +{
> + rtx_insn *insn;
> + int count = 0;
> + auto_vec<rtx> insns;
> + auto_vec<rtx> dests;
> +
> + FOR_BB_INSNS (bb, insn)
> + {
> + rtx set, src, dest;
> +
> + if (!active_insn_p (insn))
> + continue;
> +
> + set = single_set (insn);
> + if (set == NULL_RTX)
> + continue;
> +
> + src = SET_SRC (set);
> + dest = SET_DEST (set);
> +
> + for (int i = count - 1; i >= 0; --i)
> + {
> + if (reg_overlap_mentioned_p (src, dests[i])
> + && find_reg_note (insn, REG_DEAD, src) != NULL_RTX)
> + {
> + need_cmov->put (insns[i], false);
> + }
> + }
> +
> + insns.safe_push (insn);
> + dests.safe_push (dest);
> +
> + count++;
> }
> }