https://gcc.gnu.org/g:07a0baa7609a5a78d8b4a3c17897768fb2119605

commit r16-3237-g07a0baa7609a5a78d8b4a3c17897768fb2119605
Author: Artemiy Volkov <artem...@acm.org>
Date:   Sat Aug 16 20:40:28 2025 -0600

    ira: tie output allocnos for fused instruction pairs
    
    Some of the instruction pairs recognized as fusible by a preceding
    invocation of the dep_fusion pass require that both components of a pair
    have the same hard register output for the fusion to work in hardware.
    (An example of this would be a multiply-add operation, or a zero-extract
    operation composed of two shifts.)
    
    For all such pairs, the following conditions will hold:
      (a) Both insns are single_sets
      (b) Both insns have a register destination
      (c) The pair has been marked as fusible by setting the second insn's
    SCHED_GROUP flag
      (d) Additionally, post-RA, both instructions' destination regnos are
    equal
    
    (All of these conditions are encapsulated in the newly created
    single_output_fused_pair_p () predicate.)
    
    During IRA, if conditions (a)-(c) above hold, we need to tie the two
    instructions' destination allocnos together so that they are allocated
    to the same hard register.  We do this in add_insn_allocno_copies () by
    adding a constraint conflict to the output operands of the two
    instructions.
    
    gcc/ChangeLog:
    
            * ira-conflicts.cc (add_insn_allocno_copies): Handle fused insn 
pairs.
            * rtl.h (single_output_fused_pair_p): Declare new function.
            * rtlanal.cc (single_output_fused_pair_p): Define it.

Diff:
---
 gcc/ira-conflicts.cc | 12 ++++++++++--
 gcc/rtl.h            |  1 +
 gcc/rtlanal.cc       | 20 ++++++++++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/gcc/ira-conflicts.cc b/gcc/ira-conflicts.cc
index d8f7c1e1c379..e9ab16a5d20a 100644
--- a/gcc/ira-conflicts.cc
+++ b/gcc/ira-conflicts.cc
@@ -448,7 +448,7 @@ process_reg_shuffles (rtx_insn *insn, rtx reg, int op_num, 
int freq,
 static void
 add_insn_allocno_copies (rtx_insn *insn)
 {
-  rtx set, operand, dup;
+  rtx set = single_set (insn), operand, dup;
   bool bound_p[MAX_RECOG_OPERANDS];
   int i, n, freq;
   alternative_mask alts;
@@ -456,7 +456,15 @@ add_insn_allocno_copies (rtx_insn *insn)
   freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn));
   if (freq == 0)
     freq = 1;
-  if ((set = single_set (insn)) != NULL_RTX
+
+  /* Tie output register operands of two consecutive single_sets
+     marked as a fused pair.  */
+  if (single_output_fused_pair_p (insn))
+    process_regs_for_copy (SET_DEST (set),
+                  SET_DEST (single_set (prev_nonnote_nondebug_insn (insn))),
+                  true, NULL, freq);
+
+  if (set != NULL_RTX
       && REG_SUBREG_P (SET_DEST (set)) && REG_SUBREG_P (SET_SRC (set))
       && ! side_effects_p (set)
       && find_reg_note (insn, REG_DEAD,
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 5bd0bd4d168b..9684b45f2a5b 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3670,6 +3670,7 @@ extern bool contains_symbol_ref_p (const_rtx);
 extern bool contains_symbolic_reference_p (const_rtx);
 extern bool contains_constant_pool_address_p (const_rtx);
 extern void add_auto_inc_notes (rtx_insn *, rtx);
+extern bool single_output_fused_pair_p (rtx_insn *);
 
 /* Handle the cheap and common cases inline for performance.  */
 
diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc
index 87332ffebcec..19b66456e45c 100644
--- a/gcc/rtlanal.cc
+++ b/gcc/rtlanal.cc
@@ -6976,6 +6976,26 @@ add_auto_inc_notes (rtx_insn *insn, rtx x)
     }
 }
 
+/* Return true if INSN is the second element of a pair of macro-fused
+   single_sets, both of which having the same register output as another.  */
+bool
+single_output_fused_pair_p (rtx_insn *insn)
+{
+  rtx set, prev_set;
+  rtx_insn *prev;
+
+  return INSN_P (insn)
+        && SCHED_GROUP_P (insn)
+        && (prev = prev_nonnote_nondebug_insn (insn))
+        && (set = single_set (insn)) != NULL_RTX
+        && (prev_set = single_set (prev))
+            != NULL_RTX
+        && REG_P (SET_DEST (set))
+        && REG_P (SET_DEST (prev_set))
+        && (!reload_completed
+            || REGNO (SET_DEST (set)) == REGNO (SET_DEST (prev_set)));
+}
+
 /* Return true if X is register asm.  */
 
 bool

Reply via email to