Denis Chertykov schrieb:
> 2011/9/12 Georg-Johann Lay <[email protected]>:
>> This patch introduces patterns for multiply-add and multiply-sub.
>>
>> On the enhanced core, these operations can be performed with the product in
>> R0;
>> there is no need to MOVW it out of that register. The code is smaller and
>> faster and has lower register pressure.
>>
>> Tested without regressions.
>>
>> Ok to commit?
>
> Ok.
>
> Denis.
This is the second part to fix this PR; it introduced multiply-add/-sub for
QImode and one insn for HI = sign_extend (QI << 1).
With this patch PR50358 is fixed up to some corner cases.
The insns with CONST_INT split the load of the constant after reload.
avr_rtx_costs describes these costs, but it would be advantageous to do the
split pre-reload because IRA/reload could reuse constants.
The trouble is that reload_in_progress is false in IRA and therefore the
patterns match in IRA, so here is the same trouble I faced in the patch for
widening multiply where a function like avr_gate_split1() was regarded as to
hackish because it tested the pass-number to help out. Without such a function
in the insn condition, the insn matches in IRA and a register is re-replaced
with CONST_INT again, leading to crash in split2 because of gen_reg_rtx or
because of !reload_completed in the insn condition.
So this patch comes without reusing constants.
Besides that, the "Write as one pattern" changes gather two insns and write
them down as one; that's no functional change, it's just about using iterators
to reduce lines of code. The order of insns changes, but that does not matter
here. I didn't make an extra patch for that.
Passed without regression.
Ok to install?
Johann
PR target/50358
* config/avr/avr.md (*ashiftqihi2.signx.1): New insn.
(*maddqi4, *maddqi4.const): New insns.
(*msubqi4, *msubqi4.const): New insns.
(umulqihi3, mulqihi3): Write as one pattern.
(umulqi3_highpart, smulqi3_highpart): Ditto.
(*maddqihi4.const, *umaddqihi4.uconst): Ditto.
(*msubqihi4.const, *umsubqihi4.uconst): Ditto.
(*muluqihi3.uconst, *mulsqihi3.sconst): Ditto.
* config/avr/avr.c (avr_rtx_costs): Record costs of above in cases
PLUS:QI and MINUS:QI. Increase costs of multiply-add/-sub for
HImode by 1 in the case of multiplying with a CONST_INT.
Record cost of *ashiftqihi2.signx.1 in case ASHIFT:QI.
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md (revision 178806)
+++ config/avr/avr.md (working copy)
@@ -1027,31 +1027,21 @@ (define_insn "*mulqi3_call"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
-(define_insn "smulqi3_highpart"
- [(set (match_operand:QI 0 "register_operand" "=r")
- (truncate:QI
- (lshiftrt:HI (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
- (sign_extend:HI (match_operand:QI 2 "register_operand" "d")))
+;; "umulqi3_highpart"
+;; "smulqi3_highpart"
+(define_insn "<extend_su>mulqi3_highpart"
+ [(set (match_operand:QI 0 "register_operand" "=r")
+ (truncate:QI
+ (lshiftrt:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+ (any_extend:HI (match_operand:QI 2 "register_operand" "<mul_r_d>")))
(const_int 8))))]
"AVR_HAVE_MUL"
- "muls %1,%2
+ "mul<extend_s> %1,%2
mov %0,r1
clr __zero_reg__"
[(set_attr "length" "3")
(set_attr "cc" "clobber")])
-(define_insn "umulqi3_highpart"
- [(set (match_operand:QI 0 "register_operand" "=r")
- (truncate:QI
- (lshiftrt:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
- (zero_extend:HI (match_operand:QI 2 "register_operand" "r")))
- (const_int 8))))]
- "AVR_HAVE_MUL"
- "mul %1,%2
- mov %0,r1
- clr __zero_reg__"
- [(set_attr "length" "3")
- (set_attr "cc" "clobber")])
;; Used when expanding div or mod inline for some special values
(define_insn "*subqi3.ashiftrt7"
@@ -1064,25 +1054,16 @@ (define_insn "*subqi3.ashiftrt7"
[(set_attr "length" "2")
(set_attr "cc" "clobber")])
-(define_insn "mulqihi3"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
- (sign_extend:HI (match_operand:QI 2 "register_operand" "d"))))]
- "AVR_HAVE_MUL"
- "muls %1,%2
- movw %0,r0
- clr r1"
- [(set_attr "length" "3")
- (set_attr "cc" "clobber")])
-
-(define_insn "umulqihi3"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
- (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+;; "umulqihi3"
+;; "mulqihi3"
+(define_insn "<extend_u>mulqihi3"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+ (any_extend:HI (match_operand:QI 2 "register_operand" "<mul_r_d>"))))]
"AVR_HAVE_MUL"
- "mul %1,%2
+ "mul<extend_s> %1,%2
movw %0,r0
- clr r1"
+ clr __zero_reg__"
[(set_attr "length" "3")
(set_attr "cc" "clobber")])
@@ -1138,6 +1119,72 @@ (define_insn "*oumulqihi3"
(set_attr "cc" "clobber")])
;******************************************************************************
+; multiply-add/sub QI: $0 = $3 +/- $1*$2
+;******************************************************************************
+
+(define_insn "*maddqi4"
+ [(set (match_operand:QI 0 "register_operand" "=r")
+ (plus:QI (mult:QI (match_operand:QI 1 "register_operand" "r")
+ (match_operand:QI 2 "register_operand" "r"))
+ (match_operand:QI 3 "register_operand" "0")))]
+
+ "AVR_HAVE_MUL"
+ "mul %1,%2
+ add %A0,r0
+ clr __zero_reg__"
+ [(set_attr "length" "4")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*msubqi4"
+ [(set (match_operand:QI 0 "register_operand" "=r")
+ (minus:QI (match_operand:QI 3 "register_operand" "0")
+ (mult:QI (match_operand:QI 1 "register_operand" "r")
+ (match_operand:QI 2 "register_operand" "r"))))]
+ "AVR_HAVE_MUL"
+ "mul %1,%2
+ sub %A0,r0
+ clr __zero_reg__"
+ [(set_attr "length" "4")
+ (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*maddqi4.const"
+ [(set (match_operand:QI 0 "register_operand" "=r")
+ (plus:QI (mult:QI (match_operand:QI 1 "register_operand" "r")
+ (match_operand:QI 2 "const_int_operand" "n"))
+ (match_operand:QI 3 "register_operand" "0")))
+ (clobber (match_scratch:QI 4 "=&d"))]
+ "AVR_HAVE_MUL"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 4)
+ (match_dup 2))
+ ; *maddqi4
+ (set (match_dup 0)
+ (plus:QI (mult:QI (match_dup 1)
+ (match_dup 4))
+ (match_dup 3)))]
+ "")
+
+(define_insn_and_split "*msubqi4.const"
+ [(set (match_operand:QI 0 "register_operand" "=r")
+ (minus:QI (match_operand:QI 3 "register_operand" "0")
+ (mult:QI (match_operand:QI 1 "register_operand" "r")
+ (match_operand:QI 2 "const_int_operand" "n"))))
+ (clobber (match_scratch:QI 4 "=&d"))]
+ "AVR_HAVE_MUL"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 4)
+ (match_dup 2))
+ ; *msubqi4
+ (set (match_dup 0)
+ (minus:QI (match_dup 3)
+ (mult:QI (match_dup 1)
+ (match_dup 4))))]
+ "")
+
+
+;******************************************************************************
; multiply-add/sub HI: $0 = $3 +/- $1*$2 with 8-bit values $1, $2
;******************************************************************************
@@ -1227,42 +1274,46 @@ (define_insn "*<any_extend:extend_su><an
;; Handle small constants
-(define_insn_and_split "*umaddqihi4.uconst"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (plus:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
- (match_operand:HI 2 "u8_operand" "M"))
- (match_operand:HI 3 "register_operand" "0")))
+;; "umaddqihi4.uconst"
+;; "maddqihi4.sconst"
+(define_insn_and_split "*<extend_u>maddqihi4.<extend_su>const"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (plus:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+ (match_operand:HI 2 "<extend_su>8_operand" "n"))
+ (match_operand:HI 3 "register_operand" "0")))
(clobber (match_scratch:QI 4 "=&d"))]
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
[(set (match_dup 4)
(match_dup 2))
- ; *umaddqihi4
+ ; *umaddqihi4 resp. *maddqihi4
(set (match_dup 0)
- (plus:HI (mult:HI (zero_extend:HI (match_dup 1))
- (zero_extend:HI (match_dup 4)))
+ (plus:HI (mult:HI (any_extend:HI (match_dup 1))
+ (any_extend:HI (match_dup 4)))
(match_dup 3)))]
{
operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
})
-(define_insn_and_split "*umsubqihi4.uconst"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (minus:HI (match_operand:HI 3 "register_operand" "0")
- (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
- (match_operand:HI 2 "u8_operand" "M"))))
+;; "*umsubqihi4.uconst"
+;; "*msubqihi4.sconst"
+(define_insn_and_split "*<extend_u>msubqihi4.<extend_su>const"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (minus:HI (match_operand:HI 3 "register_operand" "0")
+ (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+ (match_operand:HI 2 "<extend_su>8_operand" "n"))))
(clobber (match_scratch:QI 4 "=&d"))]
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
[(set (match_dup 4)
(match_dup 2))
- ; *umsubqihi4
+ ; *umsubqihi4 resp. *msubqihi4
(set (match_dup 0)
(minus:HI (match_dup 3)
- (mult:HI (zero_extend:HI (match_dup 1))
- (zero_extend:HI (match_dup 4)))))]
+ (mult:HI (any_extend:HI (match_dup 1))
+ (any_extend:HI (match_dup 4)))))]
{
operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
})
@@ -1290,46 +1341,6 @@ (define_insn_and_split "*umsubqihi4.ucon
operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode);
})
-(define_insn_and_split "*maddqihi4.sconst"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (plus:HI (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
- (match_operand:HI 2 "s8_operand" "n"))
- (match_operand:HI 3 "register_operand" "0")))
- (clobber (match_scratch:QI 4 "=&d"))]
- "AVR_HAVE_MUL"
- "#"
- "&& reload_completed"
- [(set (match_dup 4)
- (match_dup 2))
- ; *maddqihi4
- (set (match_dup 0)
- (plus:HI (mult:HI (sign_extend:HI (match_dup 1))
- (sign_extend:HI (match_dup 4)))
- (match_dup 3)))]
- {
- operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
- })
-
-(define_insn_and_split "*msubqihi4.sconst"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (minus:HI (match_operand:HI 3 "register_operand" "0")
- (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
- (match_operand:HI 2 "s8_operand" "M"))))
- (clobber (match_scratch:QI 4 "=&d"))]
- "AVR_HAVE_MUL"
- "#"
- "&& reload_completed"
- [(set (match_dup 4)
- (match_dup 2))
- ; *smsubqihi4
- (set (match_dup 0)
- (minus:HI (match_dup 3)
- (mult:HI (sign_extend:HI (match_dup 1))
- (sign_extend:HI (match_dup 4)))))]
- {
- operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
- })
-
;; Same as the insn above, but combiner tries versions canonicalized to ASHIFT
;; for MULT with power of 2 and skips trying MULT insn above. We omit 128
;; because this would require an extra pattern for just one value.
@@ -1403,20 +1414,22 @@ (define_insn_and_split "*sumsubqihi4.uco
; mul HI: $1 = sign/zero-extend, $2 = small constant
;******************************************************************************
-(define_insn_and_split "*muluqihi3.uconst"
+;; "*muluqihi3.uconst"
+;; "*mulsqihi3.sconst"
+(define_insn_and_split "*mul<extend_su>qihi3.<extend_su>const"
[(set (match_operand:HI 0 "register_operand" "=r")
- (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
- (match_operand:HI 2 "u8_operand" "M")))
+ (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "<mul_r_d>"))
+ (match_operand:HI 2 "<extend_su>8_operand" "n")))
(clobber (match_scratch:QI 3 "=&d"))]
"AVR_HAVE_MUL"
"#"
"&& reload_completed"
[(set (match_dup 3)
(match_dup 2))
- ; umulqihi3
+ ; umulqihi3 resp. mulqihi3
(set (match_dup 0)
- (mult:HI (zero_extend:HI (match_dup 1))
- (zero_extend:HI (match_dup 3))))]
+ (mult:HI (any_extend:HI (match_dup 1))
+ (any_extend:HI (match_dup 3))))]
{
operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
})
@@ -1439,24 +1452,6 @@ (define_insn_and_split "*muluqihi3.scons
operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
})
-(define_insn_and_split "*mulsqihi3.sconst"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
- (match_operand:HI 2 "s8_operand" "n")))
- (clobber (match_scratch:QI 3 "=&d"))]
- "AVR_HAVE_MUL"
- "#"
- "&& reload_completed"
- [(set (match_dup 3)
- (match_dup 2))
- ; mulqihi3
- (set (match_dup 0)
- (mult:HI (sign_extend:HI (match_dup 1))
- (sign_extend:HI (match_dup 3))))]
- {
- operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
- })
-
(define_insn_and_split "*mulsqihi3.uconst"
[(set (match_operand:HI 0 "register_operand" "=r")
(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
@@ -1497,6 +1492,17 @@ (define_insn_and_split "*mulsqihi3.ocons
;; expand decides to use ASHIFT instead of MUL because ASHIFT costs are cheaper
;; at that time. Fix that.
+(define_insn "*ashiftqihi2.signx.1"
+ [(set (match_operand:HI 0 "register_operand" "=r,*r")
+ (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "0,r"))
+ (const_int 1)))]
+ ""
+ "@
+ lsl %A0\;sbc %B0,%B0
+ mov %A0,%1\;lsl %A0\;sbc %B0,%B0"
+ [(set_attr "length" "2,3")
+ (set_attr "cc" "clobber")])
+
(define_insn_and_split "*ashifthi3.signx.const"
[(set (match_operand:HI 0 "register_operand" "=r")
(ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c (revision 178806)
+++ config/avr/avr.c (working copy)
@@ -1528,7 +1528,7 @@ notice_update_cc (rtx body ATTRIBUTE_UNU
/* Insn doesn't leave CC in a usable state. */
CC_STATUS_INIT;
- /* Correct CC for the ashrqi3 with the shift count as CONST_INT != 6 */
+ /* Correct CC for the ashrqi3 with the shift count as CONST_INT < 6 */
set = single_set (insn);
if (set)
{
@@ -5570,6 +5570,17 @@ avr_rtx_costs (rtx x, int codearg, int o
switch (mode)
{
case QImode:
+ if (AVR_HAVE_MUL
+ && MULT == GET_CODE (XEXP (x, 0))
+ && register_operand (XEXP (x, 1), QImode))
+ {
+ /* multiply-add */
+ *total = COSTS_N_INSNS (speed ? 4 : 3);
+ /* multiply-add with constant: will be split and load constant. */
+ if (CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+ *total = COSTS_N_INSNS (1) + *total;
+ return true;
+ }
*total = COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
@@ -5583,7 +5594,11 @@ avr_rtx_costs (rtx x, int codearg, int o
&& (ZERO_EXTEND == GET_CODE (XEXP (XEXP (x, 0), 0))
|| SIGN_EXTEND == GET_CODE (XEXP (XEXP (x, 0), 0))))
{
+ /* multiply-add */
*total = COSTS_N_INSNS (speed ? 5 : 4);
+ /* multiply-add with constant: will be split and load constant. */
+ if (CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+ *total = COSTS_N_INSNS (1) + *total;
return true;
}
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
@@ -5619,6 +5634,18 @@ avr_rtx_costs (rtx x, int codearg, int o
case MINUS:
if (AVR_HAVE_MUL
+ && QImode == mode
+ && register_operand (XEXP (x, 0), QImode)
+ && MULT == GET_CODE (XEXP (x, 1)))
+ {
+ /* multiply-sub */
+ *total = COSTS_N_INSNS (speed ? 4 : 3);
+ /* multiply-sub with constant: will be split and load constant. */
+ if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
+ *total = COSTS_N_INSNS (1) + *total;
+ return true;
+ }
+ if (AVR_HAVE_MUL
&& HImode == mode
&& register_operand (XEXP (x, 0), HImode)
&& (MULT == GET_CODE (XEXP (x, 1))
@@ -5626,7 +5653,11 @@ avr_rtx_costs (rtx x, int codearg, int o
&& (ZERO_EXTEND == GET_CODE (XEXP (XEXP (x, 1), 0))
|| SIGN_EXTEND == GET_CODE (XEXP (XEXP (x, 1), 0))))
{
+ /* multiply-sub */
*total = COSTS_N_INSNS (speed ? 5 : 4);
+ /* multiply-sub with constant: will be split and load constant. */
+ if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
+ *total = COSTS_N_INSNS (1) + *total;
return true;
}
case AND:
@@ -5815,6 +5846,13 @@ avr_rtx_costs (rtx x, int codearg, int o
}
}
+ if (const1_rtx == (XEXP (x, 1))
+ && SIGN_EXTEND == GET_CODE (XEXP (x, 0)))
+ {
+ *total = COSTS_N_INSNS (2);
+ return true;
+ }
+
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (!speed ? 5 : 41);