Denis Chertykov schrieb: > 2011/9/12 Georg-Johann Lay <a...@gjlay.de>: >> This patch introduces patterns for multiply-add and multiply-sub. >> >> On the enhanced core, these operations can be performed with the product in >> R0; >> there is no need to MOVW it out of that register. The code is smaller and >> faster and has lower register pressure. >> >> Tested without regressions. >> >> Ok to commit? > > Ok. > > Denis.
This is the second part to fix this PR; it introduced multiply-add/-sub for QImode and one insn for HI = sign_extend (QI << 1). With this patch PR50358 is fixed up to some corner cases. The insns with CONST_INT split the load of the constant after reload. avr_rtx_costs describes these costs, but it would be advantageous to do the split pre-reload because IRA/reload could reuse constants. The trouble is that reload_in_progress is false in IRA and therefore the patterns match in IRA, so here is the same trouble I faced in the patch for widening multiply where a function like avr_gate_split1() was regarded as to hackish because it tested the pass-number to help out. Without such a function in the insn condition, the insn matches in IRA and a register is re-replaced with CONST_INT again, leading to crash in split2 because of gen_reg_rtx or because of !reload_completed in the insn condition. So this patch comes without reusing constants. Besides that, the "Write as one pattern" changes gather two insns and write them down as one; that's no functional change, it's just about using iterators to reduce lines of code. The order of insns changes, but that does not matter here. I didn't make an extra patch for that. Passed without regression. Ok to install? Johann PR target/50358 * config/avr/avr.md (*ashiftqihi2.signx.1): New insn. (*maddqi4, *maddqi4.const): New insns. (*msubqi4, *msubqi4.const): New insns. (umulqihi3, mulqihi3): Write as one pattern. (umulqi3_highpart, smulqi3_highpart): Ditto. (*maddqihi4.const, *umaddqihi4.uconst): Ditto. (*msubqihi4.const, *umsubqihi4.uconst): Ditto. (*muluqihi3.uconst, *mulsqihi3.sconst): Ditto. * config/avr/avr.c (avr_rtx_costs): Record costs of above in cases PLUS:QI and MINUS:QI. Increase costs of multiply-add/-sub for HImode by 1 in the case of multiplying with a CONST_INT. Record cost of *ashiftqihi2.signx.1 in case ASHIFT:QI.
Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 178806) +++ config/avr/avr.md (working copy) @@ -1027,31 +1027,21 @@ (define_insn "*mulqi3_call" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) -(define_insn "smulqi3_highpart" - [(set (match_operand:QI 0 "register_operand" "=r") - (truncate:QI - (lshiftrt:HI (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) - (sign_extend:HI (match_operand:QI 2 "register_operand" "d"))) +;; "umulqi3_highpart" +;; "smulqi3_highpart" +(define_insn "<extend_su>mulqi3_highpart" + [(set (match_operand:QI 0 "register_operand" "=r") + (truncate:QI + (lshiftrt:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>")) + (any_extend:HI (match_operand:QI 2 "register_operand" "<mul_r_d>"))) (const_int 8))))] "AVR_HAVE_MUL" - "muls %1,%2 + "mul<extend_s> %1,%2 mov %0,r1 clr __zero_reg__" [(set_attr "length" "3") (set_attr "cc" "clobber")]) -(define_insn "umulqi3_highpart" - [(set (match_operand:QI 0 "register_operand" "=r") - (truncate:QI - (lshiftrt:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) - (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))) - (const_int 8))))] - "AVR_HAVE_MUL" - "mul %1,%2 - mov %0,r1 - clr __zero_reg__" - [(set_attr "length" "3") - (set_attr "cc" "clobber")]) ;; Used when expanding div or mod inline for some special values (define_insn "*subqi3.ashiftrt7" @@ -1064,25 +1054,16 @@ (define_insn "*subqi3.ashiftrt7" [(set_attr "length" "2") (set_attr "cc" "clobber")]) -(define_insn "mulqihi3" - [(set (match_operand:HI 0 "register_operand" "=r") - (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) - (sign_extend:HI (match_operand:QI 2 "register_operand" "d"))))] - "AVR_HAVE_MUL" - "muls %1,%2 - movw %0,r0 - clr r1" - [(set_attr "length" "3") - (set_attr "cc" "clobber")]) - -(define_insn "umulqihi3" - [(set (match_operand:HI 0 "register_operand" "=r") - (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) - (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))] +;; "umulqihi3" +;; "mulqihi3" +(define_insn "<extend_u>mulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>")) + (any_extend:HI (match_operand:QI 2 "register_operand" "<mul_r_d>"))))] "AVR_HAVE_MUL" - "mul %1,%2 + "mul<extend_s> %1,%2 movw %0,r0 - clr r1" + clr __zero_reg__" [(set_attr "length" "3") (set_attr "cc" "clobber")]) @@ -1138,6 +1119,72 @@ (define_insn "*oumulqihi3" (set_attr "cc" "clobber")]) ;****************************************************************************** +; multiply-add/sub QI: $0 = $3 +/- $1*$2 +;****************************************************************************** + +(define_insn "*maddqi4" + [(set (match_operand:QI 0 "register_operand" "=r") + (plus:QI (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")) + (match_operand:QI 3 "register_operand" "0")))] + + "AVR_HAVE_MUL" + "mul %1,%2 + add %A0,r0 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +(define_insn "*msubqi4" + [(set (match_operand:QI 0 "register_operand" "=r") + (minus:QI (match_operand:QI 3 "register_operand" "0") + (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r"))))] + "AVR_HAVE_MUL" + "mul %1,%2 + sub %A0,r0 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "*maddqi4.const" + [(set (match_operand:QI 0 "register_operand" "=r") + (plus:QI (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "const_int_operand" "n")) + (match_operand:QI 3 "register_operand" "0"))) + (clobber (match_scratch:QI 4 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *maddqi4 + (set (match_dup 0) + (plus:QI (mult:QI (match_dup 1) + (match_dup 4)) + (match_dup 3)))] + "") + +(define_insn_and_split "*msubqi4.const" + [(set (match_operand:QI 0 "register_operand" "=r") + (minus:QI (match_operand:QI 3 "register_operand" "0") + (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "const_int_operand" "n")))) + (clobber (match_scratch:QI 4 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *msubqi4 + (set (match_dup 0) + (minus:QI (match_dup 3) + (mult:QI (match_dup 1) + (match_dup 4))))] + "") + + +;****************************************************************************** ; multiply-add/sub HI: $0 = $3 +/- $1*$2 with 8-bit values $1, $2 ;****************************************************************************** @@ -1227,42 +1274,46 @@ (define_insn "*<any_extend:extend_su><an ;; Handle small constants -(define_insn_and_split "*umaddqihi4.uconst" - [(set (match_operand:HI 0 "register_operand" "=r") - (plus:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) - (match_operand:HI 2 "u8_operand" "M")) - (match_operand:HI 3 "register_operand" "0"))) +;; "umaddqihi4.uconst" +;; "maddqihi4.sconst" +(define_insn_and_split "*<extend_u>maddqihi4.<extend_su>const" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>")) + (match_operand:HI 2 "<extend_su>8_operand" "n")) + (match_operand:HI 3 "register_operand" "0"))) (clobber (match_scratch:QI 4 "=&d"))] "AVR_HAVE_MUL" "#" "&& reload_completed" [(set (match_dup 4) (match_dup 2)) - ; *umaddqihi4 + ; *umaddqihi4 resp. *maddqihi4 (set (match_dup 0) - (plus:HI (mult:HI (zero_extend:HI (match_dup 1)) - (zero_extend:HI (match_dup 4))) + (plus:HI (mult:HI (any_extend:HI (match_dup 1)) + (any_extend:HI (match_dup 4))) (match_dup 3)))] { operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); }) -(define_insn_and_split "*umsubqihi4.uconst" - [(set (match_operand:HI 0 "register_operand" "=r") - (minus:HI (match_operand:HI 3 "register_operand" "0") - (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) - (match_operand:HI 2 "u8_operand" "M")))) +;; "*umsubqihi4.uconst" +;; "*msubqihi4.sconst" +(define_insn_and_split "*<extend_u>msubqihi4.<extend_su>const" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 3 "register_operand" "0") + (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>")) + (match_operand:HI 2 "<extend_su>8_operand" "n")))) (clobber (match_scratch:QI 4 "=&d"))] "AVR_HAVE_MUL" "#" "&& reload_completed" [(set (match_dup 4) (match_dup 2)) - ; *umsubqihi4 + ; *umsubqihi4 resp. *msubqihi4 (set (match_dup 0) (minus:HI (match_dup 3) - (mult:HI (zero_extend:HI (match_dup 1)) - (zero_extend:HI (match_dup 4)))))] + (mult:HI (any_extend:HI (match_dup 1)) + (any_extend:HI (match_dup 4)))))] { operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); }) @@ -1290,46 +1341,6 @@ (define_insn_and_split "*umsubqihi4.ucon operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode); }) -(define_insn_and_split "*maddqihi4.sconst" - [(set (match_operand:HI 0 "register_operand" "=r") - (plus:HI (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) - (match_operand:HI 2 "s8_operand" "n")) - (match_operand:HI 3 "register_operand" "0"))) - (clobber (match_scratch:QI 4 "=&d"))] - "AVR_HAVE_MUL" - "#" - "&& reload_completed" - [(set (match_dup 4) - (match_dup 2)) - ; *maddqihi4 - (set (match_dup 0) - (plus:HI (mult:HI (sign_extend:HI (match_dup 1)) - (sign_extend:HI (match_dup 4))) - (match_dup 3)))] - { - operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); - }) - -(define_insn_and_split "*msubqihi4.sconst" - [(set (match_operand:HI 0 "register_operand" "=r") - (minus:HI (match_operand:HI 3 "register_operand" "0") - (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) - (match_operand:HI 2 "s8_operand" "M")))) - (clobber (match_scratch:QI 4 "=&d"))] - "AVR_HAVE_MUL" - "#" - "&& reload_completed" - [(set (match_dup 4) - (match_dup 2)) - ; *smsubqihi4 - (set (match_dup 0) - (minus:HI (match_dup 3) - (mult:HI (sign_extend:HI (match_dup 1)) - (sign_extend:HI (match_dup 4)))))] - { - operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); - }) - ;; Same as the insn above, but combiner tries versions canonicalized to ASHIFT ;; for MULT with power of 2 and skips trying MULT insn above. We omit 128 ;; because this would require an extra pattern for just one value. @@ -1403,20 +1414,22 @@ (define_insn_and_split "*sumsubqihi4.uco ; mul HI: $1 = sign/zero-extend, $2 = small constant ;****************************************************************************** -(define_insn_and_split "*muluqihi3.uconst" +;; "*muluqihi3.uconst" +;; "*mulsqihi3.sconst" +(define_insn_and_split "*mul<extend_su>qihi3.<extend_su>const" [(set (match_operand:HI 0 "register_operand" "=r") - (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) - (match_operand:HI 2 "u8_operand" "M"))) + (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>")) + (match_operand:HI 2 "<extend_su>8_operand" "n"))) (clobber (match_scratch:QI 3 "=&d"))] "AVR_HAVE_MUL" "#" "&& reload_completed" [(set (match_dup 3) (match_dup 2)) - ; umulqihi3 + ; umulqihi3 resp. mulqihi3 (set (match_dup 0) - (mult:HI (zero_extend:HI (match_dup 1)) - (zero_extend:HI (match_dup 3))))] + (mult:HI (any_extend:HI (match_dup 1)) + (any_extend:HI (match_dup 3))))] { operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); }) @@ -1439,24 +1452,6 @@ (define_insn_and_split "*muluqihi3.scons operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); }) -(define_insn_and_split "*mulsqihi3.sconst" - [(set (match_operand:HI 0 "register_operand" "=r") - (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) - (match_operand:HI 2 "s8_operand" "n"))) - (clobber (match_scratch:QI 3 "=&d"))] - "AVR_HAVE_MUL" - "#" - "&& reload_completed" - [(set (match_dup 3) - (match_dup 2)) - ; mulqihi3 - (set (match_dup 0) - (mult:HI (sign_extend:HI (match_dup 1)) - (sign_extend:HI (match_dup 3))))] - { - operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); - }) - (define_insn_and_split "*mulsqihi3.uconst" [(set (match_operand:HI 0 "register_operand" "=r") (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) @@ -1497,6 +1492,17 @@ (define_insn_and_split "*mulsqihi3.ocons ;; expand decides to use ASHIFT instead of MUL because ASHIFT costs are cheaper ;; at that time. Fix that. +(define_insn "*ashiftqihi2.signx.1" + [(set (match_operand:HI 0 "register_operand" "=r,*r") + (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "0,r")) + (const_int 1)))] + "" + "@ + lsl %A0\;sbc %B0,%B0 + mov %A0,%1\;lsl %A0\;sbc %B0,%B0" + [(set_attr "length" "2,3") + (set_attr "cc" "clobber")]) + (define_insn_and_split "*ashifthi3.signx.const" [(set (match_operand:HI 0 "register_operand" "=r") (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) Index: config/avr/avr.c =================================================================== --- config/avr/avr.c (revision 178806) +++ config/avr/avr.c (working copy) @@ -1528,7 +1528,7 @@ notice_update_cc (rtx body ATTRIBUTE_UNU /* Insn doesn't leave CC in a usable state. */ CC_STATUS_INIT; - /* Correct CC for the ashrqi3 with the shift count as CONST_INT != 6 */ + /* Correct CC for the ashrqi3 with the shift count as CONST_INT < 6 */ set = single_set (insn); if (set) { @@ -5570,6 +5570,17 @@ avr_rtx_costs (rtx x, int codearg, int o switch (mode) { case QImode: + if (AVR_HAVE_MUL + && MULT == GET_CODE (XEXP (x, 0)) + && register_operand (XEXP (x, 1), QImode)) + { + /* multiply-add */ + *total = COSTS_N_INSNS (speed ? 4 : 3); + /* multiply-add with constant: will be split and load constant. */ + if (CONST_INT_P (XEXP (XEXP (x, 0), 1))) + *total = COSTS_N_INSNS (1) + *total; + return true; + } *total = COSTS_N_INSNS (1); if (GET_CODE (XEXP (x, 1)) != CONST_INT) *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed); @@ -5583,7 +5594,11 @@ avr_rtx_costs (rtx x, int codearg, int o && (ZERO_EXTEND == GET_CODE (XEXP (XEXP (x, 0), 0)) || SIGN_EXTEND == GET_CODE (XEXP (XEXP (x, 0), 0)))) { + /* multiply-add */ *total = COSTS_N_INSNS (speed ? 5 : 4); + /* multiply-add with constant: will be split and load constant. */ + if (CONST_INT_P (XEXP (XEXP (x, 0), 1))) + *total = COSTS_N_INSNS (1) + *total; return true; } if (GET_CODE (XEXP (x, 1)) != CONST_INT) @@ -5619,6 +5634,18 @@ avr_rtx_costs (rtx x, int codearg, int o case MINUS: if (AVR_HAVE_MUL + && QImode == mode + && register_operand (XEXP (x, 0), QImode) + && MULT == GET_CODE (XEXP (x, 1))) + { + /* multiply-sub */ + *total = COSTS_N_INSNS (speed ? 4 : 3); + /* multiply-sub with constant: will be split and load constant. */ + if (CONST_INT_P (XEXP (XEXP (x, 1), 1))) + *total = COSTS_N_INSNS (1) + *total; + return true; + } + if (AVR_HAVE_MUL && HImode == mode && register_operand (XEXP (x, 0), HImode) && (MULT == GET_CODE (XEXP (x, 1)) @@ -5626,7 +5653,11 @@ avr_rtx_costs (rtx x, int codearg, int o && (ZERO_EXTEND == GET_CODE (XEXP (XEXP (x, 1), 0)) || SIGN_EXTEND == GET_CODE (XEXP (XEXP (x, 1), 0)))) { + /* multiply-sub */ *total = COSTS_N_INSNS (speed ? 5 : 4); + /* multiply-sub with constant: will be split and load constant. */ + if (CONST_INT_P (XEXP (XEXP (x, 1), 1))) + *total = COSTS_N_INSNS (1) + *total; return true; } case AND: @@ -5815,6 +5846,13 @@ avr_rtx_costs (rtx x, int codearg, int o } } + if (const1_rtx == (XEXP (x, 1)) + && SIGN_EXTEND == GET_CODE (XEXP (x, 0))) + { + *total = COSTS_N_INSNS (2); + return true; + } + if (GET_CODE (XEXP (x, 1)) != CONST_INT) { *total = COSTS_N_INSNS (!speed ? 5 : 41);