This is a tweak for signed 16- and 32-bit division routines. The old code called subroutine __divmod{si|hi}4_neg1 and returned if T-flag is not set. This is costly. By shuffling the instructions the test can be moved up without increasing the code size but saving calls here and there.
The speed gain is 1..17 ticks for ATmega88 which is a speed-up of up to 7% for 16-bit division (formerly about 230-240 ticks). For 16-bit division the absolute speed gain is the same. Moreover, addqi3 can handle +/-2 now which saves reload if the constant in non-d register. The new *negqihi2 insn is for code like int minus (char a) { return -a; } that compiled to minus: clr r25 ; 6 extendqihi2/1 [length = 3] sbrc r24,7 com r25 com r25 ; 7 neghi2/1 [length = 3] neg r24 sbci r25,lo8(-1) ret ; 25 return [length = 1] and now is compiled to a shorter, faster sequence without need of d-register: minus: clr r25 ; 7 *negqihi2 [length = 4] neg r24 brge .+2 com r25 ret ; 25 return [length = 1] Tested without regressions. Moreover, the new sequences are tested individually against the old code. The patch is against the old infrastructure but the changelog is already for the new libgcc layout. Ok for trunk? Johann gcc/ * config/avr/constraints.md (Cm2): New constraint for int -2. * config/avr/avr.md (addqi3): Use it. New alternatives for +/-2. (*negqihi2): New insn. libgcc/ * config/avr/lib1funcs.S (__divmodhi4, __divmodsi4): Tweak speed.
Index: config/avr/libgcc.S =================================================================== --- config/avr/libgcc.S (revision 180738) +++ config/avr/libgcc.S (working copy) @@ -565,27 +565,28 @@ DEFUN __divmodhi4 .global _div _div: bst r_arg1H,7 ; store sign of dividend - mov __tmp_reg__,r_arg1H - eor __tmp_reg__,r_arg2H ; r0.7 is sign of result + mov __tmp_reg__,r_arg2H + brtc 0f + com __tmp_reg__ ; r0.7 is sign of result rcall __divmodhi4_neg1 ; dividend negative : negate +0: sbrc r_arg2H,7 rcall __divmodhi4_neg2 ; divisor negative : negate XCALL __udivmodhi4 ; do the unsigned div/mod - rcall __divmodhi4_neg1 ; correct remainder sign - tst __tmp_reg__ - brpl __divmodhi4_exit + sbrc __tmp_reg__,7 + rcall __divmodhi4_neg2 ; correct remainder sign + brtc __divmodhi4_exit +__divmodhi4_neg1: + com r_arg1H + neg r_arg1L ; correct dividend/remainder sign + sbci r_arg1H,0xff + ret __divmodhi4_neg2: com r_arg2H neg r_arg2L ; correct divisor/result sign sbci r_arg2H,0xff __divmodhi4_exit: ret -__divmodhi4_neg1: - brtc __divmodhi4_exit - com r_arg1H - neg r_arg1L ; correct dividend/remainder sign - sbci r_arg1H,0xff - ret ENDF __divmodhi4 #endif /* defined (L_divmodhi4) */ @@ -672,16 +673,27 @@ ENDF __udivmodsi4 #if defined (L_divmodsi4) DEFUN __divmodsi4 - bst r_arg1HH,7 ; store sign of dividend - mov __tmp_reg__,r_arg1HH - eor __tmp_reg__,r_arg2HH ; r0.7 is sign of result + mov __tmp_reg__,r_arg2HH + bst r_arg1HH,7 ; store sign of dividend + brtc 0f + com __tmp_reg__ ; r0.7 is sign of result rcall __divmodsi4_neg1 ; dividend negative : negate +0: sbrc r_arg2HH,7 rcall __divmodsi4_neg2 ; divisor negative : negate XCALL __udivmodsi4 ; do the unsigned div/mod - rcall __divmodsi4_neg1 ; correct remainder sign - rol __tmp_reg__ - brcc __divmodsi4_exit + sbrc __tmp_reg__, 7 ; correct quotient sign + rcall __divmodsi4_neg2 + brtc __divmodsi4_exit ; correct remainder sign +__divmodsi4_neg1: + com r_arg1HH + com r_arg1HL + com r_arg1H + neg r_arg1L ; correct dividend/remainder sign + sbci r_arg1H, 0xff + sbci r_arg1HL,0xff + sbci r_arg1HH,0xff + ret __divmodsi4_neg2: com r_arg2HH com r_arg2HL @@ -692,16 +704,6 @@ __divmodsi4_neg2: sbci r_arg2HH,0xff __divmodsi4_exit: ret -__divmodsi4_neg1: - brtc __divmodsi4_exit - com r_arg1HH - com r_arg1HL - com r_arg1H - neg r_arg1L ; correct dividend/remainder sign - sbci r_arg1H, 0xff - sbci r_arg1HL,0xff - sbci r_arg1HH,0xff - ret ENDF __divmodsi4 #endif /* defined (L_divmodsi4) */ Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 180739) +++ config/avr/avr.md (working copy) @@ -739,17 +739,19 @@ (define_insn "*strlenhi" ; add bytes (define_insn "addqi3" - [(set (match_operand:QI 0 "register_operand" "=r,d,r,r") - (plus:QI (match_operand:QI 1 "register_operand" "%0,0,0,0") - (match_operand:QI 2 "nonmemory_operand" "r,i,P,N")))] + [(set (match_operand:QI 0 "register_operand" "=r,d,r,r,r,r") + (plus:QI (match_operand:QI 1 "register_operand" "%0,0,0,0,0,0") + (match_operand:QI 2 "nonmemory_operand" "r,i,P,N,K,Cm2")))] "" "@ add %0,%2 subi %0,lo8(-(%2)) inc %0 - dec %0" - [(set_attr "length" "1,1,1,1") - (set_attr "cc" "set_czn,set_czn,set_zn,set_zn")]) + dec %0 + inc %0\;inc %0 + dec %0\;dec %0" + [(set_attr "length" "1,1,1,1,2,2") + (set_attr "cc" "set_czn,set_czn,set_zn,set_zn,set_zn,set_zn")]) (define_expand "addhi3" @@ -3089,6 +3091,14 @@ (define_insn "negqi2" [(set_attr "length" "1") (set_attr "cc" "set_zn")]) +(define_insn "*negqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (neg:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "0"))))] + "" + "clr %B0\;neg %A0\;brge .+2\;com %B0" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + (define_insn "neghi2" [(set (match_operand:HI 0 "register_operand" "=!d,r,&r") (neg:HI (match_operand:HI 1 "register_operand" "0,0,r")))] Index: config/avr/constraints.md =================================================================== --- config/avr/constraints.md (revision 180738) +++ config/avr/constraints.md (working copy) @@ -103,6 +103,11 @@ (define_memory_constraint "Q" (and (match_code "mem") (match_test "extra_constraint_Q (op)"))) +(define_constraint "Cm2" + "Constant integer @minus{}2." + (and (match_code "const_int") + (match_test "ival == -2"))) + (define_constraint "C03" "Constant integer 3." (and (match_code "const_int")