This is a fix for an optimization flaw when a long value is composed
from byte values.

For -fsplit-wide-types (which is still default for avr) the code is
worse than with -fno-split-wide-types. The code for the test case is
better in either situations, i.e. compared to code without the patch,
but it is still not optimal.

Fixing this by some combine patterns is the only thing the BE can do.
I did not write more complex patterns because things get too complex
with little performance gain.

Tested without regressions.

Johann

2011-05-02  Georg-Johann Lay  <a...@gjlay.de>

        PR target/27663
        * config/avr/predicates.md (const_8_16_24_operand): New predicate.
        * config/avr/avr.md ("*ior<mode>qi.byte0",
        "*ior<mode>qi.byte1-3"): New define_insn_and_split patterns.
Index: config/avr/predicates.md
===================================================================
--- config/avr/predicates.md	(Revision 172902)
+++ config/avr/predicates.md	(Arbeitskopie)
@@ -138,3 +138,10 @@ (define_predicate "call_insn_operand"
 (define_predicate "pseudo_register_operand"
   (and (match_code "reg")
        (match_test "!HARD_REGISTER_P (op)")))
+
+;; Return true if OP is a constant integer that is either
+;; 8 or 16 or 24.
+(define_predicate "const_8_16_24_operand"
+  (and (match_code "const_int")
+       (match_test "8 == INTVAL(op) || 16 == INTVAL(op) || 24 == INTVAL(op)")))
+
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(Revision 172902)
+++ config/avr/avr.md	(Arbeitskopie)
@@ -3388,3 +3388,42 @@ (define_insn "fmulsu"
 	clr __zero_reg__"
   [(set_attr "length" "3")
    (set_attr "cc" "clobber")])
+
+
+;; Some combine patterns that try to fix bad code when a value is composed
+;; from byte parts like in PR27663.
+;; The patterns give some release but the code still is not optimal,
+;; in particular when subreg lowering (-fsplit-wide-types) is turned on.
+;; That switch obfuscates things here and in many other places.
+
+(define_insn_and_split "*ior<mode>qi.byte0"
+  [(set (match_operand:HISI 0 "register_operand"                 "=r")
+        (ior:HISI
+         (zero_extend:HISI (match_operand:QI 1 "register_operand" "r"))
+         (match_operand:HISI 2 "register_operand"                 "0")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 3)
+        (ior:QI (match_dup 3)
+                (match_dup 1)))]
+  {
+    operands[3] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, 0);
+  })
+
+(define_insn_and_split "*ior<mode>qi.byte1-3"
+  [(set (match_operand:HISI 0 "register_operand"                              "=r")
+        (ior:HISI
+         (ashift:HISI (zero_extend:HISI (match_operand:QI 1 "register_operand" "r"))
+                      (match_operand:QI 2 "const_8_16_24_operand"              "n"))
+         (match_operand:HISI 3 "register_operand"                              "0")))]
+  "INTVAL(operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+        (ior:QI (match_dup 4)
+                (match_dup 1)))]
+  {
+    int byteno = INTVAL(operands[2]) / BITS_PER_UNIT;
+    operands[4] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, byteno);
+  })

Reply via email to