Hello,

a while ago Andrew Stubbs noticed that the lower-subreg pass seems a little
too aggressive in splitting up DImode moves into SImode moves.  See the
discussion starting at:
http://gcc.gnu.org/ml/gcc/2012-04/msg00676.html

The conclusion of this thread was that the mere presence of a DImode move
between pseudo registers should *not* be sufficient on its own (i.e. if
the pseudos in question are nowhere accessed via subreg) to trigger a
split of the pseudo during the *first* pass of lower-subreg.  However,
Andrew never got around to submitting a patch implementing this
suggestion.

The patch below does so.  As Andrew already noted, this gives a significant
improvement on ARM with NEON, and a slight improvement in size on ARM
without NEON overall.  In addition, I've done size testing on PowerPC
and System z and found a slight but consistent improvement there as well.

Note that the patch disables the lower-subreg-1.c test case on ARM targets,
since the test case assumes a 64-bit logical or ought to be split; this is
currently not the case on ARM if NEON is available, since we provide an
iordi3 pattern in that case.  (Without the patch, some spurious moves
trigger the split anyway, hiding the issue.)

Tested on arm-linux-gnueabi, powerpc(64)-linux and s390(x)-linux.
OK for mainline?

Bye,
Ulrich


ChangeLog:

        * lower-subreg.c (enum classify_move_insn): Rename
        SIMPLE_PSEUDO_REG_MOVE to DECOMPOSABLE_SIMPLE_MOVE.
        (find_decomposable_subregs): Update.
        (decompose_multiword_subregs): Add DECOMPOSE_COPIES parameter.
        Only mark pseudo-to-pseudo copies as DECOMPOSABLE_SIMPLE_MOVE
        if that parameter is true.
        (rest_of_handle_lower_subreg): Call decompose_multiword_subregs
        with DECOMPOSE_COPIES false.
        (rest_of_handle_lower_subreg2): Call decompose_multiword_subregs
        with DECOMPOSE_COPIES true.

testsuite/ChangeLog:

        * gcc.dg/lower-subreg-1.c: Disable on arm-*-* targets.


Index: gcc/testsuite/gcc.dg/lower-subreg-1.c
===================================================================
*** gcc/testsuite/gcc.dg/lower-subreg-1.c       (revision 191254)
--- gcc/testsuite/gcc.dg/lower-subreg-1.c       (working copy)
***************
*** 1,4 ****
! /* { dg-do compile { target { ! { mips64 || { ia64-*-* spu-*-* tilegx-*-* } } 
} } } */
  /* { dg-options "-O -fdump-rtl-subreg1" } */
  /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */
  /* { dg-require-effective-target ilp32 } */
--- 1,4 ----
! /* { dg-do compile { target { ! { mips64 || { arm-*-* ia64-*-* spu-*-* 
tilegx-*-* } } } } } */
  /* { dg-options "-O -fdump-rtl-subreg1" } */
  /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */
  /* { dg-require-effective-target ilp32 } */
Index: gcc/lower-subreg.c
===================================================================
*** gcc/lower-subreg.c  (revision 191254)
--- gcc/lower-subreg.c  (working copy)
*************** enum classify_move_insn
*** 440,448 ****
  {
    /* Not a simple move from one location to another.  */
    NOT_SIMPLE_MOVE,
!   /* A simple move from one pseudo-register to another.  */
!   SIMPLE_PSEUDO_REG_MOVE,
!   /* A simple move involving a non-pseudo-register.  */
    SIMPLE_MOVE
  };
  
--- 440,448 ----
  {
    /* Not a simple move from one location to another.  */
    NOT_SIMPLE_MOVE,
!   /* A simple move we want to decompose.  */
!   DECOMPOSABLE_SIMPLE_MOVE,
!   /* Any other simple move.  */
    SIMPLE_MOVE
  };
  
*************** find_decomposable_subregs (rtx *px, void
*** 518,524 ****
  
         If this is not a simple copy from one location to another,
         then we can not decompose this register.  If this is a simple
!        copy from one pseudo-register to another, and the mode is right
         then we mark the register as decomposable.
         Otherwise we don't say anything about this register --
         it could be decomposed, but whether that would be
--- 518,524 ----
  
         If this is not a simple copy from one location to another,
         then we can not decompose this register.  If this is a simple
!        copy we want to decompose, and the mode is right,
         then we mark the register as decomposable.
         Otherwise we don't say anything about this register --
         it could be decomposed, but whether that would be
*************** find_decomposable_subregs (rtx *px, void
*** 537,543 ****
            case NOT_SIMPLE_MOVE:
              bitmap_set_bit (non_decomposable_context, regno);
              break;
!           case SIMPLE_PSEUDO_REG_MOVE:
              if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
                bitmap_set_bit (decomposable_context, regno);
              break;
--- 537,543 ----
            case NOT_SIMPLE_MOVE:
              bitmap_set_bit (non_decomposable_context, regno);
              break;
!           case DECOMPOSABLE_SIMPLE_MOVE:
              if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
                bitmap_set_bit (decomposable_context, regno);
              break;
*************** find_decomposable_subregs (rtx *px, void
*** 553,559 ****
        enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
  
        /* Any registers used in a MEM do not participate in a
!        SIMPLE_MOVE or SIMPLE_PSEUDO_REG_MOVE.  Do our own recursion
         here, and return -1 to block the parent's recursion.  */
        for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
        return -1;
--- 553,559 ----
        enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
  
        /* Any registers used in a MEM do not participate in a
!        SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
         here, and return -1 to block the parent's recursion.  */
        for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
        return -1;
*************** dump_choices (bool speed_p, const char *
*** 1336,1346 ****
  }
  
  /* Look for registers which are always accessed via word-sized SUBREGs
!    or via copies.  Decompose these registers into several word-sized
!    pseudo-registers.  */
  
  static void
! decompose_multiword_subregs (void)
  {
    unsigned int max;
    basic_block bb;
--- 1336,1346 ----
  }
  
  /* Look for registers which are always accessed via word-sized SUBREGs
!    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
!    registers into several word-sized pseudo-registers.  */
  
  static void
! decompose_multiword_subregs (bool decompose_copies)
  {
    unsigned int max;
    basic_block bb;
*************** decompose_multiword_subregs (void)
*** 1438,1445 ****
            cmi = NOT_SIMPLE_MOVE;
          else
            {
              if (find_pseudo_copy (set))
!               cmi = SIMPLE_PSEUDO_REG_MOVE;
              else
                cmi = SIMPLE_MOVE;
            }
--- 1438,1452 ----
            cmi = NOT_SIMPLE_MOVE;
          else
            {
+             /* We mark pseudo-to-pseudo copies as decomposable during the
+                second pass only.  The first pass is so early that there is
+                good chance such moves will be optimized away completely by
+                subsequent optimizations anyway.
+ 
+                However, we call find_pseudo_copy even during the first pass
+                so as to properly set up the reg_copy_graph.  */
              if (find_pseudo_copy (set))
!               cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
              else
                cmi = SIMPLE_MOVE;
            }
*************** gate_handle_lower_subreg (void)
*** 1640,1646 ****
  static unsigned int
  rest_of_handle_lower_subreg (void)
  {
!   decompose_multiword_subregs ();
    return 0;
  }
  
--- 1647,1653 ----
  static unsigned int
  rest_of_handle_lower_subreg (void)
  {
!   decompose_multiword_subregs (false);
    return 0;
  }
  
*************** rest_of_handle_lower_subreg (void)
*** 1649,1655 ****
  static unsigned int
  rest_of_handle_lower_subreg2 (void)
  {
!   decompose_multiword_subregs ();
    return 0;
  }
  
--- 1656,1662 ----
  static unsigned int
  rest_of_handle_lower_subreg2 (void)
  {
!   decompose_multiword_subregs (true);
    return 0;
  }
  
-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com

Reply via email to