Andrew Stubbs wrote: > On 12/07/11 11:35, Georg-Johann Lay wrote: >> +(define_insn "*mulsu" >> + [(set (match_operand:HI 0 >> "register_operand" "=r") >> + (mult:HI (sign_extend:HI (match_operand:QI 1 >> "register_operand" "a")) >> + (zero_extend:HI (match_operand:QI 2 >> "register_operand" "a"))))] >> + "AVR_HAVE_MUL" >> + "mulsu %1,%2 >> + movw %0,r0 >> + clr __zero_reg__" >> + [(set_attr "length" "3") >> + (set_attr "cc" "clobber")]) >> + >> +(define_insn "*mulus" >> + [(set (match_operand:HI 0 >> "register_operand" "=r") >> + (mult:HI (zero_extend:HI (match_operand:QI 1 >> "register_operand" "a")) >> + (sign_extend:HI (match_operand:QI 2 >> "register_operand" "a"))))] >> + "AVR_HAVE_MUL" >> + "mulsu %2,%1 >> + movw %0,r0 >> + clr __zero_reg__" >> + [(set_attr "length" "3") >> + (set_attr "cc" "clobber")]) > > 1. You should name that "usmulqihi3" (no star), so the optimizers can > see it.
Thanks for pointing this out! I confused "us" with unsigned saturate. > 2. There's no need to define both of these. For one thing, putting a '%' > at the start of the constraint list for operand 1 does precisely this, > but more importantly, I'm pretty sure one form or the other is the > canonical form and the other should never occur. If it's not, maybe it > should be? I don't know if combine does any canonicalization for that. The % is not correct because the insn is not commutative. AFAIK for abelian operation there's no advantage if constrains are the same. > > Andrew Attached revised patch. Ok to commit? Johann PR target/49687 * config/avr/avr.md (mulhi3): Use register_or_s8_u8_operand for operand2 and expand appropriately if there is a CONST_INT in operand2. (usmulqihi3): New insn. (mulsqihi3): New insn. (muluqihi3): New insn. (*muluqihi3.uconst): New insn_and_split. (*muluqihi3.sconst): New insn_and_split. (*mulsqihi3.sconst): New insn_and_split. (*mulsqihi3.uconst): New insn_and_split. (*ashifthi3.signx.const): New insn_and_split. (*ashifthi3.signx.const7): New insn_and_split. (*ashifthi3.zerox.const): New insn_and_split. * config/avr/avr.c (avr_rtx_costs): Report costs of above insns. (avr_gate_split1): New function. * config/avr/avr-protos.h (avr_gate_split1): New prototype. * config/avr/predicates.md (const_2_to_7_operand): New. (const_2_to_6_operand): New. (u8_operand): New. (s8_operand): New. (register_or_s8_u8_operand): New.
Index: config/avr/predicates.md =================================================================== --- config/avr/predicates.md (revision 176136) +++ config/avr/predicates.md (working copy) @@ -73,6 +73,16 @@ (define_predicate "const_0_to_7_operand" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) +;; Return 1 if OP is constant integer 2..7 for MODE. +(define_predicate "const_2_to_7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 7)"))) + +;; Return 1 if OP is constant integer 2..6 for MODE. +(define_predicate "const_2_to_6_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 6)"))) + ;; Returns true if OP is either the constant zero or a register. (define_predicate "reg_or_0_operand" (ior (match_operand 0 "register_operand") @@ -156,3 +166,17 @@ (define_predicate "const_8_16_24_operand (and (match_code "const_int") (match_test "8 == INTVAL(op) || 16 == INTVAL(op) || 24 == INTVAL(op)"))) +;; Unsigned CONST_INT that fits in 8 bits, i.e. 0..255. +(define_predicate "u8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 255)"))) + +;; Signed CONST_INT that fits in 8 bits, i.e. -128..127. +(define_predicate "s8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -128, 127)"))) + +(define_predicate "register_or_s8_u8_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "u8_operand") + (match_operand 0 "s8_operand"))) Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 176136) +++ config/avr/avr.md (working copy) @@ -1017,19 +1017,235 @@ (define_insn "umulqihi3" [(set_attr "length" "3") (set_attr "cc" "clobber")]) +(define_insn "usmulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a")) + (sign_extend:HI (match_operand:QI 2 "register_operand" "a"))))] + "AVR_HAVE_MUL" + "mulsu %2,%1 + movw %0,r0 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +;****************************************************************************** +; mul HI: $1 = sign/zero-extend, $2 = small constant +;****************************************************************************** + +(define_insn_and_split "*muluqihi3.uconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "u8_operand" "M")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; umulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 1)) + (zero_extend:HI (match_dup 3))))] + { + operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), + QImode)); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*muluqihi3.sconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "s8_operand" "n")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; usmulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 1)) + (sign_extend:HI (match_dup 3))))] + { + operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), + QImode)); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*mulsqihi3.sconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) + (match_operand:HI 2 "s8_operand" "n")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; mulqihi3 + (set (match_dup 0) + (mult:HI (sign_extend:HI (match_dup 1)) + (sign_extend:HI (match_dup 3))))] + { + operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), + QImode)); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*mulsqihi3.uconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "u8_operand" "M")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; usmulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 3)) + (sign_extend:HI (match_dup 1))))] + { + operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), + QImode)); + operands[3] = gen_reg_rtx (QImode); + }) + + +;; The EXTEND of $1 only appears in combine, we don't see it in expand so that +;; expand decides to use ASHIFT instead of MUL because ASHIFT costs are cheaper +;; at that time. Fix that. + +(define_insn_and_split "*ashifthi3.signx.const" + [(set (match_operand:HI 0 "register_operand" "=r") + (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) + (match_operand:HI 2 "const_2_to_6_operand" "I")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; mulqihi3 + (set (match_dup 0) + (mult:HI (sign_extend:HI (match_dup 1)) + (sign_extend:HI (match_dup 3))))] + { + operands[2] = GEN_INT (1 << INTVAL (operands[2])); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*ashifthi3.signx.const7" + [(set (match_operand:HI 0 "register_operand" "=r") + (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (const_int 7)))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; usmulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 3)) + (sign_extend:HI (match_dup 1))))] + { + operands[2] = GEN_INT (trunc_int_for_mode (1 << 7, QImode)); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*ashifthi3.zerox.const" + [(set (match_operand:HI 0 "register_operand" "=r") + (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "const_2_to_7_operand" "I")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; umulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 1)) + (zero_extend:HI (match_dup 3))))] + { + operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), + QImode)); + operands[3] = gen_reg_rtx (QImode); + }) + +;****************************************************************************** +; mul HI: $1 = sign/zero-extend, $2 = reg +;****************************************************************************** + +(define_insn "mulsqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "register_operand" "a")))] + "AVR_HAVE_MUL" + "mulsu %1,%A2 + movw %0,r0 + mul %1,%B2 + add %B0,r0 + clr __zero_reg__" + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +(define_insn "muluqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + "mul %1,%A2 + movw %0,r0 + mul %1,%B2 + add %B0,r0 + clr __zero_reg__" + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +;****************************************************************************** + (define_expand "mulhi3" [(set (match_operand:HI 0 "register_operand" "") (mult:HI (match_operand:HI 1 "register_operand" "") - (match_operand:HI 2 "register_operand" "")))] + (match_operand:HI 2 "register_or_s8_u8_operand" "")))] "" - " -{ - if (!AVR_HAVE_MUL) - { - emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2])); - DONE; - } -}") + { + if (!AVR_HAVE_MUL) + { + if (!register_operand (operands[2], HImode)) + operands[2] = force_reg (HImode, operands[2]); + + emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2])); + DONE; + } + + /* For small constants we can do better by extending them on the fly. + The constant can be loaded in one instruction and the widening + multiplication is shorter. First try the unsigned variant because it + allows constraint "d" instead of "a" for the signed version. */ + + if (u8_operand (operands[2], HImode)) + { + rtx x = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode)); + emit_insn (gen_muluqihi3 (operands[0], + force_reg (QImode, x), operands[1])); + DONE; + } + + if (s8_operand (operands[2], HImode)) + { + rtx x = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode)); + emit_insn (gen_mulsqihi3 (operands[0], + force_reg (QImode, x), operands[1])); + DONE; + } + + if (!register_operand (operands[2], HImode)) + operands[2] = force_reg (HImode, operands[2]); + }) (define_insn "*mulhi3_enh" [(set (match_operand:HI 0 "register_operand" "=&r") Index: config/avr/avr-protos.h =================================================================== --- config/avr/avr-protos.h (revision 176136) +++ config/avr/avr-protos.h (working copy) @@ -117,3 +117,4 @@ extern int class_max_nregs (enum reg_cla #ifdef REAL_VALUE_TYPE extern void asm_output_float (FILE *file, REAL_VALUE_TYPE n); #endif +extern bool avr_gate_split1(void); Index: config/avr/avr.c =================================================================== --- config/avr/avr.c (revision 176141) +++ config/avr/avr.c (working copy) @@ -5466,7 +5466,42 @@ avr_rtx_costs (rtx x, int codearg, int o case HImode: if (AVR_HAVE_MUL) - *total = COSTS_N_INSNS (!speed ? 7 : 10); + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + enum rtx_code code0 = GET_CODE (op0); + enum rtx_code code1 = GET_CODE (op1); + bool ex0 = SIGN_EXTEND == code0 || ZERO_EXTEND == code0; + bool ex1 = SIGN_EXTEND == code1 || ZERO_EXTEND == code1; + + if (ex0 + && (u8_operand (op1, HImode) + || s8_operand (op1, HImode))) + { + *total = COSTS_N_INSNS (!speed ? 4 : 6); + return true; + } + if (ex0 + && register_operand (op1, HImode)) + { + *total = COSTS_N_INSNS (!speed ? 5 : 8); + return true; + } + else if (ex0 || ex1) + { + *total = COSTS_N_INSNS (!speed ? 3 : 5); + return true; + } + else if (register_operand (op0, HImode) + && (u8_operand (op1, HImode) + || s8_operand (op1, HImode))) + { + *total = COSTS_N_INSNS (!speed ? 6 : 9); + return true; + } + else + *total = COSTS_N_INSNS (!speed ? 7 : 10); + } else if (!speed) *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); else @@ -5549,6 +5584,17 @@ avr_rtx_costs (rtx x, int codearg, int o break; case HImode: + if (AVR_HAVE_MUL) + { + if (const_2_to_7_operand (XEXP (x, 1), HImode) + && (SIGN_EXTEND == GET_CODE (XEXP (x, 0)) + || ZERO_EXTEND == GET_CODE (XEXP (x, 0)))) + { + *total = COSTS_N_INSNS (!speed ? 4 : 6); + return true; + } + } + if (GET_CODE (XEXP (x, 1)) != CONST_INT) { *total = COSTS_N_INSNS (!speed ? 5 : 41); @@ -6881,4 +6927,29 @@ avr_expand_builtin (tree exp, rtx target } +/* FIXME: We compose some insns by means of insn combine + and split them in split1. We don't want IRA/reload + to combine them to the original insns again because + that avoid some CSE optimizations if constants are + involved. If IRA/reload combines, the recombined + insns get split again after reload, but then CSE + does not take place. + It appears that at present there is no other way + to take away the insn from IRA. Notice that split1 + runs unconditionally so that all our insns will get + split no matter of command line options. */ + +#include "tree-pass.h" + +bool +avr_gate_split1 (void) +{ + if (current_pass->static_pass_number + < pass_match_asm_constraints.pass.static_pass_number) + return true; + + return false; +} + + #include "gt-avr.h"