Hi Guys,

  I am applying the patch below to fix a couple of performance
  regressions for the RX toolchain on the 4.6 branch (when compared to
  the 4.5 branch).  These include aligning jumps, loops and labels, and
  combining extending loads and simple arithmetic operations.

Cheers
  Nick

gcc/ChangeLog
2011-03-29  Nick Clifton  <ni...@redhat.com>

        * config/rx/rx.h (LABEL_ALIGN_AFTER_BARRIER): Define.
        (ASM_OUTPUT_MAX_SKIP): Define.
        * config/rx/predicates.md (rx_zs_comparison_operator): Do not
        allow LT aor GE comparisons.
        * config/rx/rx-protos.h (rx_align_for_label): Prototype.
        * config/rx/rx.md: Add peepholes and patterns to combine extending
        loads with simple arithmetic instructions.
        * config/rx/rx.c (rx_is_legitimate_address): Allow QI and HI modes
        to use pre-decrement and post-increment addressing.
        (rx_is_restricted_memory_address): For REG+INT addressing, ensure
        that the INT is a valid offset.
        (rx_print_operand): Handle %R.
        Fix %Q's handling of MEMs.
        (rx_option_override): Set alignments.
        (rx_align_for_label): New function.
        (rx_max_skip_for_label): New function.
        (TARGET_ASM_JUMP_ALIGN_MAX_SKIP): Define.
        (TARGET_ASM_LOOP_ALIGN_MAX_SKIP): Define.
        (TARGET_ASM_LABEL_ALIGN_MAX_SKIP): Define.
        (TARGET_ASM_LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Define.

Index: gcc/config/rx/rx.h
===================================================================
--- gcc/config/rx/rx.h  (revision 171651)
+++ gcc/config/rx/rx.h  (working copy)
@@ -615,4 +615,23 @@
 #define BRANCH_COST(SPEED,PREDICT)       1
 #define REGISTER_MOVE_COST(MODE,FROM,TO) 2
 
-#define SELECT_CC_MODE(OP,X,Y)  rx_select_cc_mode(OP, X, Y)
+#define SELECT_CC_MODE(OP,X,Y)  rx_select_cc_mode((OP), (X), (Y))
+
+#define LABEL_ALIGN_AFTER_BARRIER(x)           rx_align_for_label ()
+
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(STREAM, LOG, MAX_SKIP)       \
+  do                                           \
+    {                                          \
+      if ((LOG) == 0 || (MAX_SKIP) == 0)       \
+        break;                                 \
+      if (TARGET_AS100_SYNTAX)                 \
+       {                                       \
+         if ((LOG) >= 2)                       \
+           fprintf (STREAM, "\t.ALIGN 4\t; %d alignment actually requested\n", 
1 << (LOG)); \
+         else                                  \
+           fprintf (STREAM, "\t.ALIGN 2\n");   \
+       }                                       \
+      else                                     \
+       fprintf (STREAM, "\t.balign %d,3,%d\n", 1 << (LOG), (MAX_SKIP));        
\
+    }                                          \
+  while (0)
Index: gcc/config/rx/predicates.md
===================================================================
--- gcc/config/rx/predicates.md (revision 171651)
+++ gcc/config/rx/predicates.md (working copy)
@@ -284,7 +284,7 @@
 )
 
 (define_predicate "rx_zs_comparison_operator"
-  (match_code "eq,ne,lt,ge")
+  (match_code "eq,ne")
 )
 
 ;; GT and LE omitted due to operand swap required.
Index: gcc/config/rx/rx-protos.h
===================================================================
--- gcc/config/rx/rx-protos.h   (revision 171651)
+++ gcc/config/rx/rx-protos.h   (working copy)
@@ -30,16 +30,17 @@
 extern int             rx_initial_elimination_offset (int, int);
 
 #ifdef RTX_CODE
+extern int             rx_align_for_label (void);
 extern void             rx_emit_stack_popm (rtx *, bool);
 extern void             rx_emit_stack_pushm (rtx *);
 extern void            rx_expand_epilogue (bool);
 extern char *          rx_gen_move_template (rtx *, bool);
 extern bool            rx_is_legitimate_constant (rtx);
 extern bool            rx_is_restricted_memory_address (rtx, Mmode);
+extern bool            rx_match_ccmode (rtx, Mmode);
 extern void            rx_notice_update_cc (rtx body, rtx insn);
 extern void            rx_split_cbranch (Mmode, Rcode, rtx, rtx, rtx);
 extern Mmode           rx_select_cc_mode (Rcode, rtx, rtx);
-extern bool            rx_match_ccmode (rtx, Mmode);
 #endif
 
 #endif /* GCC_RX_PROTOS_H */
Index: gcc/config/rx/rx.md
===================================================================
--- gcc/config/rx/rx.md (revision 171651)
+++ gcc/config/rx/rx.md (working copy)
@@ -1545,6 +1545,139 @@
    (set_attr "length" "3,4,5,6,7,6")]
 )
 
+;; A set of peepholes to catch extending loads followed by arithmetic 
operations.
+;; We use iterators where possible to reduce the amount of typing and hence the
+;; possibilities for typos.
+
+(define_code_iterator extend_types [(zero_extend "") (sign_extend "")])
+(define_code_attr     letter       [(zero_extend "R") (sign_extend "Q")])
+
+(define_code_iterator memex_commutative [(plus "") (and "") (ior "") (xor "")])
+(define_code_iterator memex_noncomm     [(div "") (udiv "") (minus "")])
+(define_code_iterator memex_nocc        [(smax "") (smin "") (mult "")])
+
+(define_code_attr     op                [(plus "add") (and "and") (div "div") 
(udiv "divu") (smax "max") (smin "min") (mult "mul") (ior "or") (minus "sub") 
(xor "xor")])
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+       (extend_types:SI (match_operand:small_int_modes 1 
"rx_restricted_mem_operand")))
+   (parallel [(set (match_operand:SI                    2 "register_operand")
+                  (memex_commutative:SI (match_dup 0)
+                                        (match_dup 2)))
+             (clobber (reg:CC CC_REG))])]
+  "peep2_regno_dead_p (2, REGNO (operands[0]))"
+  [(parallel [(set:SI (match_dup 2)
+                     (memex_commutative:SI (match_dup 2)
+                                           (extend_types:SI (match_dup 1))))
+             (clobber (reg:CC CC_REG))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+       (extend_types:SI (match_operand:small_int_modes 1 
"rx_restricted_mem_operand")))
+   (parallel [(set (match_operand:SI                    2 "register_operand")
+                  (memex_commutative:SI (match_dup 2)
+                                        (match_dup 0)))
+             (clobber (reg:CC CC_REG))])]
+  "peep2_regno_dead_p (2, REGNO (operands[0]))"
+  [(parallel [(set:SI (match_dup 2)
+                     (memex_commutative:SI (match_dup 2)
+                                           (extend_types:SI (match_dup 1))))
+             (clobber (reg:CC CC_REG))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+       (extend_types:SI (match_operand:small_int_modes 1 
"rx_restricted_mem_operand")))
+   (parallel [(set (match_operand:SI                    2 "register_operand")
+                  (memex_noncomm:SI (match_dup 2)
+                                    (match_dup 0)))
+             (clobber (reg:CC CC_REG))])]
+  "peep2_regno_dead_p (2, REGNO (operands[0]))"
+  [(parallel [(set:SI (match_dup 2)
+                     (memex_noncomm:SI (match_dup 2)
+                                       (extend_types:SI (match_dup 1))))
+             (clobber (reg:CC CC_REG))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+       (extend_types:SI (match_operand:small_int_modes 1 
"rx_restricted_mem_operand")))
+   (set (match_operand:SI                               2 "register_operand")
+       (memex_nocc:SI (match_dup 0)
+                      (match_dup 2)))]
+  "peep2_regno_dead_p (2, REGNO (operands[0]))"
+  [(set:SI (match_dup 2)
+          (memex_nocc:SI (match_dup 2)
+                         (extend_types:SI (match_dup 1))))]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+       (extend_types:SI (match_operand:small_int_modes 1 
"rx_restricted_mem_operand")))
+   (set (match_operand:SI                               2 "register_operand")
+       (memex_nocc:SI (match_dup 2)
+                      (match_dup 0)))]
+  "peep2_regno_dead_p (2, REGNO (operands[0]))"
+  [(set:SI (match_dup 2)
+          (memex_nocc:SI (match_dup 2)
+                         (extend_types:SI (match_dup 1))))]
+)
+
+(define_insn 
"*<memex_commutative:code>si3_<extend_types:code><small_int_modes:mode>"
+  [(set (match_operand:SI                                                     
0 "register_operand" "=r")
+       (memex_commutative:SI (match_operand:SI                               1 
"register_operand" "%0")
+                             (extend_types:SI (match_operand:small_int_modes 2 
"rx_restricted_mem_operand" "Q"))))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "<memex_commutative:op>\t%<extend_types:letter>2, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; Worst case sceanario.  FIXME: If we defined 
separate patterns 
+)                            ;; rather than using iterators we could specify 
exact sizes.
+
+(define_insn 
"*<memex_noncomm:code>si3_<extend_types:code><small_int_modes:mode>"
+  [(set (match_operand:SI                                                 0 
"register_operand" "=r")
+       (memex_noncomm:SI (match_operand:SI                               1 
"register_operand" "0")
+                          (extend_types:SI (match_operand:small_int_modes 2 
"rx_restricted_mem_operand" "Q"))))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "<memex_noncomm:op>\t%<extend_types:letter>2, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; Worst case sceanario.  FIXME: If we defined 
separate patterns 
+)                            ;; rather than using iterators we could specify 
exact sizes.
+
+(define_insn "*<memex_nocc:code>si3_<extend_types:code><small_int_modes:mode>"
+  [(set (match_operand:SI                                              0 
"register_operand" "=r")
+       (memex_nocc:SI (match_operand:SI                               1 
"register_operand" "%0")
+                      (extend_types:SI (match_operand:small_int_modes 2 
"rx_restricted_mem_operand" "Q"))))]
+  ""
+  "<memex_nocc:op>\t%<extend_types:letter>2, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; Worst case sceanario.  FIXME: If we defined 
separate patterns 
+)                            ;; rather than using iterators we could specify 
exact sizes.
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+       (extend_types:SI (match_operand:small_int_modes 1 
"rx_restricted_mem_operand")))
+   (set (reg:CC CC_REG)
+       (compare:CC (match_operand:SI                   2 "register_operand")
+                   (match_dup 0)))]
+  "peep2_regno_dead_p (2, REGNO (operands[0]))"
+  [(set (reg:CC CC_REG)
+       (compare:CC (match_dup 2)
+                   (extend_types:SI (match_dup 1))))]
+)
+
+(define_insn "*comparesi3_<extend_types:code><small_int_modes:mode>"
+  [(set (reg:CC CC_REG)
+       (compare:CC (match_operand:SI                               0 
"register_operand" "=r")
+                   (extend_types:SI (match_operand:small_int_modes 1 
"rx_restricted_mem_operand" "Q"))))]
+  ""
+  "cmp\t%<extend_types:letter>1, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; Worst case sceanario.  FIXME: If we defined 
separate patterns 
+)                            ;; rather than using iterators we could specify 
exact sizes.
+
 ;; Floating Point Instructions
 
 (define_insn "addsf3"
Index: gcc/config/rx/rx.c
===================================================================
--- gcc/config/rx/rx.c  (revision 171651)
+++ gcc/config/rx/rx.c  (working copy)
@@ -57,7 +57,7 @@
 #define CC_FLAG_Z      (1 << 1)
 #define CC_FLAG_O      (1 << 2)
 #define CC_FLAG_C      (1 << 3)
-#define CC_FLAG_FP     (1 << 4)        /* fake, to differentiate CC_Fmode */
+#define CC_FLAG_FP     (1 << 4)        /* Fake, to differentiate CC_Fmode.  */
 
 static unsigned int flags_from_mode (enum machine_mode mode);
 static unsigned int flags_from_code (enum rtx_code code);
@@ -85,7 +85,9 @@
     /* Register Indirect.  */
     return true;
 
-  if (GET_MODE_SIZE (mode) == 4
+  if ((GET_MODE_SIZE (mode) == 4
+       || GET_MODE_SIZE (mode) == 2
+       || GET_MODE_SIZE (mode) == 1)
       && (GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC))
     /* Pre-decrement Register Indirect or
        Post-increment Register Indirect.  */
@@ -187,8 +189,11 @@
       base = XEXP (mem, 0);
       index = XEXP (mem, 1);
 
-      return RX_REG_P (base) && CONST_INT_P (index);
+      if (! RX_REG_P (base) || ! CONST_INT_P (index))
+         return false;
 
+      return IN_RANGE (INTVAL (index), 0, (0x10000 * GET_MODE_SIZE (mode)) - 
1);
+
     case SYMBOL_REF:
       /* Can happen when small data is being supported.
          Assume that it will be resolved into GP+INT.  */
@@ -386,11 +391,14 @@
      %L  Print low part of a DImode register, integer or address.
      %N  Print the negation of the immediate value.
      %Q  If the operand is a MEM, then correctly generate
-         register indirect or register relative addressing.  */
+         register indirect or register relative addressing.
+     %R  Like %Q but for zero-extending loads.  */
 
 static void
 rx_print_operand (FILE * file, rtx op, int letter)
 {
+  bool unsigned_load = false;
+
   switch (letter)
     {
     case 'A':
@@ -450,6 +458,7 @@
        else
          {
            unsigned int flags = flags_from_mode (mode);
+
            switch (code)
              {
              case LT:
@@ -588,10 +597,15 @@
       rx_print_integer (file, - INTVAL (op));
       break;
 
+    case 'R':
+      gcc_assert (GET_MODE_SIZE (GET_MODE (op)) < 4);
+      unsigned_load = true;
+      /* Fall through.  */
     case 'Q':
       if (MEM_P (op))
        {
          HOST_WIDE_INT offset;
+         rtx mem = op;
 
          op = XEXP (op, 0);
 
@@ -626,22 +640,24 @@
          rx_print_operand (file, op, 0);
          fprintf (file, "].");
 
-         switch (GET_MODE_SIZE (GET_MODE (op)))
+         switch (GET_MODE_SIZE (GET_MODE (mem)))
            {
            case 1:
-             gcc_assert (offset < 65535 * 1);
-             fprintf (file, "B");
+             gcc_assert (offset <= 65535 * 1);
+             fprintf (file, unsigned_load ? "UB" : "B");
              break;
            case 2:
              gcc_assert (offset % 2 == 0);
-             gcc_assert (offset < 65535 * 2);
-             fprintf (file, "W");
+             gcc_assert (offset <= 65535 * 2);
+             fprintf (file, unsigned_load ? "UW" : "W");
              break;
-           default:
+           case 4:
              gcc_assert (offset % 4 == 0);
-             gcc_assert (offset < 65535 * 4);
+             gcc_assert (offset <= 65535 * 4);
              fprintf (file, "L");
              break;
+           default:
+             gcc_unreachable ();
            }
          break;
        }
@@ -2336,6 +2352,13 @@
     flag_strict_volatile_bitfields = 1;
 
   rx_override_options_after_change ();
+
+  if (align_jumps == 0 && ! optimize_size)
+    align_jumps = 3;
+  if (align_loops == 0 && ! optimize_size)
+    align_loops = 3;
+  if (align_labels == 0 && ! optimize_size)
+    align_labels = 3;
 }
 
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
@@ -2728,6 +2751,45 @@
 }
 
 
+int
+rx_align_for_label (void)
+{
+  return optimize_size ? 1 : 3;
+}
+
+static int
+rx_max_skip_for_label (rtx lab)
+{
+  int opsize;
+  rtx op;
+
+  if (lab == NULL_RTX)
+    return 0;
+  op = lab;
+  do
+    {
+      op = next_nonnote_insn (op);
+    }
+  while (op && (LABEL_P (op)
+               || (INSN_P (op) && GET_CODE (PATTERN (op)) == USE)));
+  if (!op)
+    return 0;
+
+  opsize = get_attr_length (op);
+  if (opsize >= 0 && opsize < 8)
+    return opsize - 1;
+  return 0;
+}
+
+#undef  TARGET_ASM_JUMP_ALIGN_MAX_SKIP
+#define TARGET_ASM_JUMP_ALIGN_MAX_SKIP                 rx_max_skip_for_label
+#undef  TARGET_ASM_LOOP_ALIGN_MAX_SKIP
+#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP                 rx_max_skip_for_label
+#undef  TARGET_LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP
+#define TARGET_LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP      rx_max_skip_for_label
+#undef  TARGET_ASM_LABEL_ALIGN_MAX_SKIP
+#define TARGET_ASM_LABEL_ALIGN_MAX_SKIP                        
rx_max_skip_for_label
+
 #undef  TARGET_FUNCTION_VALUE
 #define TARGET_FUNCTION_VALUE          rx_function_value
 

Reply via email to