Insn combine may come up with superfluous reg-reg moves, where the
combine people say that these are no problem since reg-alloc is supposed
to optimize them.  The issue is that the lower-subreg pass sitting
between combine and reg-alloc may split such moves, coming up with a zoo
of subregs which are only handled poorly by the register allocator.

This patch adds a new avr mini-pass that handles such cases.

As an example, take

int f_ffssi (long x)
{
    return __builtin_ffsl (x);
}

where the two functions have the same interface, i.e. there are no extra
moves required for the argument or for the return value. However,

$ avr-gcc -S -Os -dp -mno-fuse-move ...

f_ffssi:
        mov r20,r22      ;  29  [c=4 l=1]  movqi_insn/0
        mov r21,r23      ;  30  [c=4 l=1]  movqi_insn/0
        mov r22,r24      ;  31  [c=4 l=1]  movqi_insn/0
        mov r23,r25      ;  32  [c=4 l=1]  movqi_insn/0
        mov r25,r23      ;  33  [c=4 l=4]  *movsi/0
        mov r24,r22
        mov r23,r21
        mov r22,r20
        rcall __ffssi2   ;  34  [c=16 l=1]  *ffssihi2.libgcc
        ret              ;  37  [c=0 l=1]  return

where all the moves add up to a no-op.  The -mno-fuse-move option
stops any attempts by the avr backend to clean up that mess.

gcc/
        * config/avr/avr-passes.def (avr_pass_2moves): Insert after combine.
        * config/avr/avr-passes.cc (make_avr_pass_2moves): New function.
        (pass_data avr_pass_data_2moves): New static variable.
        (avr_pass_2moves): New rtl_opt_pass.
        * config/avr/avr-protos.h (make_avr_pass_2moves): New proto.

Ok for trunk?

Johann

--
    AVR: New mini-pass to undo superfluous moves from insn combine.
    
    Insn combine may come up with superfluous reg-reg moves, where the combine
    people say that these are no problem since reg-alloc is supposed to optimize
    them.  The issue is that the lower-subreg pass sitting between combine and
    reg-alloc may split such moves, coming up with a zoo of subregs which are
    only handled poorly by the register allocator.
    
    This patch adds a new avr mini-pass that handles such cases.
    
    As an example, take
    
    int f_ffssi (long x)
    {
        return __builtin_ffsl (x);
    }
    
    where the two functions have the same interface, i.e. there are no extra
    moves required for the argument or for the return value. However,
    
    $ avr-gcc -S -Os -dp -mno-fuse-move ...
    
    f_ffssi:
            mov r20,r22      ;  29  [c=4 l=1]  movqi_insn/0
            mov r21,r23      ;  30  [c=4 l=1]  movqi_insn/0
            mov r22,r24      ;  31  [c=4 l=1]  movqi_insn/0
            mov r23,r25      ;  32  [c=4 l=1]  movqi_insn/0
            mov r25,r23      ;  33  [c=4 l=4]  *movsi/0
            mov r24,r22
            mov r23,r21
            mov r22,r20
            rcall __ffssi2   ;  34  [c=16 l=1]  *ffssihi2.libgcc
            ret              ;  37  [c=0 l=1]  return
    
    where all the moves add up to a no-op.  The -mno-fuse-move option
    stops any attempts by the avr backend to clean up that mess.
    
    gcc/
            * config/avr/avr-passes.def (avr_pass_2moves): Insert after combine.
            * config/avr/avr-passes.cc (make_avr_pass_2moves): New function.
            (pass_data avr_pass_data_2moves): New static variable.
            (avr_pass_2moves): New rtl_opt_pass.
            * config/avr/avr-protos.h (make_avr_pass_2moves): New proto.

diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 6a88a2740bc..1bcf211358e 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4841,6 +4841,137 @@ avr_pass_fuse_add::execute1 (function *func)
 }
 
 
+
+//////////////////////////////////////////////////////////////////////////////
+// Fuse 2 move insns after combine.
+
+static const pass_data avr_pass_data_2moves =
+{
+  RTL_PASS,	    // type
+  "",		    // name (will be patched)
+  OPTGROUP_NONE,    // optinfo_flags
+  TV_DF_SCAN,	    // tv_id
+  0,		    // properties_required
+  0,		    // properties_provided
+  0,		    // properties_destroyed
+  0,		    // todo_flags_start
+  0		    // todo_flags_finish
+};
+
+class avr_pass_2moves : public rtl_opt_pass
+{
+public:
+  avr_pass_2moves (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_2moves, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (function *func) final override
+  {
+    if (optimize > 0 && avropt_fuse_move > 0)
+      {
+	bool changed = false;
+	basic_block bb;
+
+	FOR_EACH_BB_FN (bb, func)
+	  {
+	    changed |= optimize_2moves_bb (bb);
+	  }
+
+	if (changed)
+	  {
+	    df_note_add_problem ();
+	    df_analyze ();
+	  }
+      }
+
+    return 0;
+  }
+
+  bool optimize_2moves (rtx_insn *, rtx_insn *);
+  bool optimize_2moves_bb (basic_block);
+}; // avr_pass_2moves
+
+bool
+avr_pass_2moves::optimize_2moves_bb (basic_block bb)
+{
+  bool changed = false;
+  rtx_insn *insn1 = nullptr;
+  rtx_insn *insn2 = nullptr;
+  rtx_insn *curr;
+
+  FOR_BB_INSNS (bb, curr)
+    {
+      if (insn1 && INSN_P (insn1)
+	  && insn2 && INSN_P (insn2))
+	changed |= optimize_2moves (insn1, insn2);
+
+      insn1 = insn2;
+      insn2 = curr;
+    }
+
+  return changed;
+}
+
+bool
+avr_pass_2moves::optimize_2moves (rtx_insn *insn1, rtx_insn *insn2)
+{
+  bool good = false;
+  bool bad = false;
+  rtx set1, dest1, src1;
+  rtx set2, dest2, src2;
+
+  if ((set1 = single_set (insn1))
+      && (set2 = single_set (insn2))
+      && (src1 = SET_SRC (set1))
+      && REG_P (src2 = SET_SRC (set2))
+      && REG_P (dest1 = SET_DEST (set1))
+      && REG_P (dest2 = SET_DEST (set2))
+      && rtx_equal_p (dest1, src2)
+      // Now we have:
+      // insn1: dest1 = src1
+      // insn2: dest2 = dest1
+      && REGNO (dest1) >= FIRST_PSEUDO_REGISTER
+      // Paranoia.
+      && GET_CODE (PATTERN (insn1)) != PARALLEL
+      && GET_CODE (PATTERN (insn2)) != PARALLEL
+      && (rtx_equal_p (dest2, src1)
+	  || !reg_overlap_mentioned_p (dest2, src1)))
+    {
+      avr_dump ("\n;; Found 2moves:\n%r\n%r\n", insn1, insn2);
+      avr_dump (";; reg %d: insn uses uids:", REGNO (dest1));
+
+      // Go check that dest1 is used exactly once, namely by insn2.
+
+      df_ref use = DF_REG_USE_CHAIN (REGNO (dest1));
+      for (; use; use = DF_REF_NEXT_REG (use))
+	{
+	  rtx_insn *user = DF_REF_INSN (use);
+	  avr_dump (" %d", INSN_UID (user));
+	  good |= INSN_UID (user) == INSN_UID (insn2);
+	  bad |= INSN_UID (user) != INSN_UID (insn2);
+	}
+      avr_dump (".\n");
+
+      if (good && !bad
+	  // Propagate src1 to insn2:
+	  // insn1: # Deleted
+	  // insn2: dest2 = src1
+	  && validate_change (insn2, &SET_SRC (set2), src1, false))
+	{
+	  SET_INSN_DELETED (insn1);
+	  return true;
+	}
+    }
+
+  if (good && !bad)
+    avr_dump (";; Failed\n");
+
+  return false;
+}
+
+
 
 //////////////////////////////////////////////////////////////////////////////
 // Split insns with nonzero_bits() after combine.
@@ -5704,6 +5835,14 @@ make_avr_pass_casesi (gcc::context *ctxt)
   return new avr_pass_casesi (ctxt, "avr-casesi");
 }
 
+// Optimize 2 consecutive moves after combine.
+
+rtl_opt_pass *
+make_avr_pass_2moves (gcc::context *ctxt)
+{
+  return new avr_pass_2moves (ctxt, "avr-2moves");
+}
+
 rtl_opt_pass *
 make_avr_pass_split_nzb (gcc::context *ctxt)
 {
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index eb60a93eeeb..d668c7fadfc 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -74,6 +74,14 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes);
 
 INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
 
+/* Insn combine may come up with superfluous reg-reg moves, where the combine
+   people say that these are no problem since reg-alloc is supposed to optimize
+   them.  The issue is that the lower-subreg pass sitting between combine and
+   reg-alloc may split such moves, coming up with a zoo of subregs which are
+   only handled poorly by the register allocator.  */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_2moves);
+
 /* Some combine insns have nonzero_bits() in their condition, though insns
    should not use such stuff in their condition.  Therefore, we split such
    insn into something without nonzero_bits() in their condition right after
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ca30136797d..37911e7d275 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -208,6 +208,7 @@ extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_2moves (gcc::context *);
 #ifdef RTX_CODE
 extern bool avr_casei_sequence_check_operands (rtx *xop);
 extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);

Reply via email to