Insn combine may come up with superfluous reg-reg moves, where the combine people say that these are no problem since reg-alloc is supposed to optimize them. The issue is that the lower-subreg pass sitting between combine and reg-alloc may split such moves, coming up with a zoo of subregs which are only handled poorly by the register allocator.
This patch adds a new avr mini-pass that handles such cases. As an example, take int f_ffssi (long x) { return __builtin_ffsl (x); } where the two functions have the same interface, i.e. there are no extra moves required for the argument or for the return value. However, $ avr-gcc -S -Os -dp -mno-fuse-move ... f_ffssi: mov r20,r22 ; 29 [c=4 l=1] movqi_insn/0 mov r21,r23 ; 30 [c=4 l=1] movqi_insn/0 mov r22,r24 ; 31 [c=4 l=1] movqi_insn/0 mov r23,r25 ; 32 [c=4 l=1] movqi_insn/0 mov r25,r23 ; 33 [c=4 l=4] *movsi/0 mov r24,r22 mov r23,r21 mov r22,r20 rcall __ffssi2 ; 34 [c=16 l=1] *ffssihi2.libgcc ret ; 37 [c=0 l=1] return where all the moves add up to a no-op. The -mno-fuse-move option stops any attempts by the avr backend to clean up that mess. gcc/ * config/avr/avr-passes.def (avr_pass_2moves): Insert after combine. * config/avr/avr-passes.cc (make_avr_pass_2moves): New function. (pass_data avr_pass_data_2moves): New static variable. (avr_pass_2moves): New rtl_opt_pass. * config/avr/avr-protos.h (make_avr_pass_2moves): New proto. Ok for trunk? Johann --
AVR: New mini-pass to undo superfluous moves from insn combine. Insn combine may come up with superfluous reg-reg moves, where the combine people say that these are no problem since reg-alloc is supposed to optimize them. The issue is that the lower-subreg pass sitting between combine and reg-alloc may split such moves, coming up with a zoo of subregs which are only handled poorly by the register allocator. This patch adds a new avr mini-pass that handles such cases. As an example, take int f_ffssi (long x) { return __builtin_ffsl (x); } where the two functions have the same interface, i.e. there are no extra moves required for the argument or for the return value. However, $ avr-gcc -S -Os -dp -mno-fuse-move ... f_ffssi: mov r20,r22 ; 29 [c=4 l=1] movqi_insn/0 mov r21,r23 ; 30 [c=4 l=1] movqi_insn/0 mov r22,r24 ; 31 [c=4 l=1] movqi_insn/0 mov r23,r25 ; 32 [c=4 l=1] movqi_insn/0 mov r25,r23 ; 33 [c=4 l=4] *movsi/0 mov r24,r22 mov r23,r21 mov r22,r20 rcall __ffssi2 ; 34 [c=16 l=1] *ffssihi2.libgcc ret ; 37 [c=0 l=1] return where all the moves add up to a no-op. The -mno-fuse-move option stops any attempts by the avr backend to clean up that mess. gcc/ * config/avr/avr-passes.def (avr_pass_2moves): Insert after combine. * config/avr/avr-passes.cc (make_avr_pass_2moves): New function. (pass_data avr_pass_data_2moves): New static variable. (avr_pass_2moves): New rtl_opt_pass. * config/avr/avr-protos.h (make_avr_pass_2moves): New proto. diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index 6a88a2740bc..1bcf211358e 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -4841,6 +4841,137 @@ avr_pass_fuse_add::execute1 (function *func) } + +////////////////////////////////////////////////////////////////////////////// +// Fuse 2 move insns after combine. + +static const pass_data avr_pass_data_2moves = +{ + RTL_PASS, // type + "", // name (will be patched) + OPTGROUP_NONE, // optinfo_flags + TV_DF_SCAN, // tv_id + 0, // properties_required + 0, // properties_provided + 0, // properties_destroyed + 0, // todo_flags_start + 0 // todo_flags_finish +}; + +class avr_pass_2moves : public rtl_opt_pass +{ +public: + avr_pass_2moves (gcc::context *ctxt, const char *name) + : rtl_opt_pass (avr_pass_data_2moves, ctxt) + { + this->name = name; + } + + unsigned int execute (function *func) final override + { + if (optimize > 0 && avropt_fuse_move > 0) + { + bool changed = false; + basic_block bb; + + FOR_EACH_BB_FN (bb, func) + { + changed |= optimize_2moves_bb (bb); + } + + if (changed) + { + df_note_add_problem (); + df_analyze (); + } + } + + return 0; + } + + bool optimize_2moves (rtx_insn *, rtx_insn *); + bool optimize_2moves_bb (basic_block); +}; // avr_pass_2moves + +bool +avr_pass_2moves::optimize_2moves_bb (basic_block bb) +{ + bool changed = false; + rtx_insn *insn1 = nullptr; + rtx_insn *insn2 = nullptr; + rtx_insn *curr; + + FOR_BB_INSNS (bb, curr) + { + if (insn1 && INSN_P (insn1) + && insn2 && INSN_P (insn2)) + changed |= optimize_2moves (insn1, insn2); + + insn1 = insn2; + insn2 = curr; + } + + return changed; +} + +bool +avr_pass_2moves::optimize_2moves (rtx_insn *insn1, rtx_insn *insn2) +{ + bool good = false; + bool bad = false; + rtx set1, dest1, src1; + rtx set2, dest2, src2; + + if ((set1 = single_set (insn1)) + && (set2 = single_set (insn2)) + && (src1 = SET_SRC (set1)) + && REG_P (src2 = SET_SRC (set2)) + && REG_P (dest1 = SET_DEST (set1)) + && REG_P (dest2 = SET_DEST (set2)) + && rtx_equal_p (dest1, src2) + // Now we have: + // insn1: dest1 = src1 + // insn2: dest2 = dest1 + && REGNO (dest1) >= FIRST_PSEUDO_REGISTER + // Paranoia. + && GET_CODE (PATTERN (insn1)) != PARALLEL + && GET_CODE (PATTERN (insn2)) != PARALLEL + && (rtx_equal_p (dest2, src1) + || !reg_overlap_mentioned_p (dest2, src1))) + { + avr_dump ("\n;; Found 2moves:\n%r\n%r\n", insn1, insn2); + avr_dump (";; reg %d: insn uses uids:", REGNO (dest1)); + + // Go check that dest1 is used exactly once, namely by insn2. + + df_ref use = DF_REG_USE_CHAIN (REGNO (dest1)); + for (; use; use = DF_REF_NEXT_REG (use)) + { + rtx_insn *user = DF_REF_INSN (use); + avr_dump (" %d", INSN_UID (user)); + good |= INSN_UID (user) == INSN_UID (insn2); + bad |= INSN_UID (user) != INSN_UID (insn2); + } + avr_dump (".\n"); + + if (good && !bad + // Propagate src1 to insn2: + // insn1: # Deleted + // insn2: dest2 = src1 + && validate_change (insn2, &SET_SRC (set2), src1, false)) + { + SET_INSN_DELETED (insn1); + return true; + } + } + + if (good && !bad) + avr_dump (";; Failed\n"); + + return false; +} + + ////////////////////////////////////////////////////////////////////////////// // Split insns with nonzero_bits() after combine. @@ -5704,6 +5835,14 @@ make_avr_pass_casesi (gcc::context *ctxt) return new avr_pass_casesi (ctxt, "avr-casesi"); } +// Optimize 2 consecutive moves after combine. + +rtl_opt_pass * +make_avr_pass_2moves (gcc::context *ctxt) +{ + return new avr_pass_2moves (ctxt, "avr-2moves"); +} + rtl_opt_pass * make_avr_pass_split_nzb (gcc::context *ctxt) { diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def index eb60a93eeeb..d668c7fadfc 100644 --- a/gcc/config/avr/avr-passes.def +++ b/gcc/config/avr/avr-passes.def @@ -74,6 +74,14 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes); INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi); +/* Insn combine may come up with superfluous reg-reg moves, where the combine + people say that these are no problem since reg-alloc is supposed to optimize + them. The issue is that the lower-subreg pass sitting between combine and + reg-alloc may split such moves, coming up with a zoo of subregs which are + only handled poorly by the register allocator. */ + +INSERT_PASS_AFTER (pass_combine, 1, avr_pass_2moves); + /* Some combine insns have nonzero_bits() in their condition, though insns should not use such stuff in their condition. Therefore, we split such insn into something without nonzero_bits() in their condition right after diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index ca30136797d..37911e7d275 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -208,6 +208,7 @@ extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *); extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *); extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *); extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *); +extern rtl_opt_pass *make_avr_pass_2moves (gcc::context *); #ifdef RTX_CODE extern bool avr_casei_sequence_check_operands (rtx *xop); extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);