This patch is to finalize the work on PR49313, i.e. better libgcc
implementation of some functions like bswap, counting zeros,
parity and popcount.
These functions are already implemented in libgcc.
This patch now provides a better integration of these functions:
the calls are no more emit as ordinary black box calls by optabs,
instead there are insns to describe the exact register usage of
the functions which are represented as implicit library calls.
This is advantageous because some call-clobbered registers are not
touched and there are more leaf-functions.
Some libgcc functions have minor changes to reduce register
footprint.
Besides that, copysignsf3 is implemented which is easy on avr.
Ok to commit?
Johann
PR target/49313
* config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
(__ctzsi2): Result for 0 may be undefined.
(__ctzhi2): Result for 0 may be undefined.
(__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
(__popcountsi2): Ditto. And don't clobber r26.
(__popcountdi2): Ditto. And don't clobber r27.
* config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
(parityhi2): New expand.
(paritysi2): New expand.
(popcounthi2): New expand.
(popcountsi2): New expand.
(clzhi2): New expand.
(clzsi2): New expand.
(ctzhi2): New expand.
(ctzsi2): New expand.
(ffshi2): New expand.
(ffssi2): New expand.
(copysignsf2): New insn.
(bswapsi2): New expand.
(*parityhi2.libgcc): New insn.
(*parityqihi2.libgcc): New insn.
(*paritysihi2.libgcc): New insn.
(*popcounthi2.libgcc): New insn.
(*popcountsi2.libgcc): New insn.
(*popcountqi2.libgcc): New insn.
(*popcountqihi2.libgcc): New insn-and-split.
(*clzhi2.libgcc): New insn.
(*clzsihi2.libgcc): New insn.
(*ctzhi2.libgcc): New insn.
(*ctzsihi2.libgcc): New insn.
(*ffshi2.libgcc): New insn.
(*ffssihi2.libgcc): New insn.
(*bswapsi2.libgcc): New insn.
Index: config/avr/libgcc.S
===================================================================
--- config/avr/libgcc.S (revision 176818)
+++ config/avr/libgcc.S (working copy)
@@ -1061,9 +1061,15 @@ ENDF __ffssi2
;; clobbers: r26
DEFUN __ffshi2
clr r26
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst r24
+ breq 2f
+#else
cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
1: XJMP __loop_ffsqi2
- ldi r26, 8
+2: ldi r26, 8
or r24, r25
brne 1b
ret
@@ -1093,12 +1099,12 @@ ENDF __loop_ffsqi2
#if defined (L_ctzsi2)
;; count trailing zeros
;; r25:r24 = ctz32 (r25:r22)
-;; ctz(0) = 32
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
DEFUN __ctzsi2
XCALL __ffssi2
dec r24
- sbrc r24, 7
- ldi r24, 32
ret
ENDF __ctzsi2
#endif /* defined (L_ctzsi2) */
@@ -1106,12 +1112,12 @@ ENDF __ctzsi2
#if defined (L_ctzhi2)
;; count trailing zeros
;; r25:r24 = ctz16 (r25:r24)
-;; ctz(0) = 16
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
DEFUN __ctzhi2
XCALL __ffshi2
dec r24
- sbrc r24, 7
- ldi r24, 16
ret
ENDF __ctzhi2
#endif /* defined (L_ctzhi2) */
@@ -1245,47 +1251,50 @@ ENDF __parityqi2
#if defined (L_popcounthi2)
;; population count
;; r25:r24 = popcount16 (r25:r24)
-;; clobbers: r30, __tmp_reg__
+;; clobbers: __tmp_reg__
DEFUN __popcounthi2
XCALL __popcountqi2
- mov r30, r24
+ push r24
mov r24, r25
XCALL __popcountqi2
- add r24, r30
clr r25
- ret
+ ;; FALLTHRU
ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+ pop __tmp_reg__
+ add r24, __tmp_reg__
+ ret
+ENDF __popcounthi2_tail
#endif /* defined (L_popcounthi2) */
#if defined (L_popcountsi2)
;; population count
;; r25:r24 = popcount32 (r25:r22)
-;; clobbers: r26, r30, __tmp_reg__
+;; clobbers: __tmp_reg__
DEFUN __popcountsi2
XCALL __popcounthi2
- mov r26, r24
+ push r24
mov_l r24, r22
mov_h r25, r23
XCALL __popcounthi2
- add r24, r26
- ret
+ XJMP __popcounthi2_tail
ENDF __popcountsi2
#endif /* defined (L_popcountsi2) */
#if defined (L_popcountdi2)
;; population count
;; r25:r24 = popcount64 (r25:r18)
-;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__
+;; clobbers: r22, r23, __tmp_reg__
DEFUN __popcountdi2
XCALL __popcountsi2
- mov r27, r24
+ push r24
mov_l r22, r18
mov_h r23, r19
mov_l r24, r20
mov_h r25, r21
XCALL __popcountsi2
- add r24, r27
- ret
+ XJMP __popcounthi2_tail
ENDF __popcountdi2
#endif /* defined (L_popcountdi2) */
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md (revision 176818)
+++ config/avr/avr.md (working copy)
@@ -55,6 +55,7 @@ (define_c_enum "unspec"
UNSPEC_FMUL
UNSPEC_FMULS
UNSPEC_FMULSU
+ UNSPEC_COPYSIGN
])
(define_c_enum "unspecv"
@@ -3680,6 +3681,275 @@ (define_insn "delay_cycles_4"
[(set_attr "length" "9")
(set_attr "cc" "clobber")])
+
+;; Parity
+
+(define_expand "parityhi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (parity:HI (reg:HI 24)))
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "paritysi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (parity:HI (reg:SI 22)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*parityhi2.libgcc"
+ [(set (reg:HI 24)
+ (parity:HI (reg:HI 24)))]
+ ""
+ "%~call __parityhi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*parityqihi2.libgcc"
+ [(set (reg:HI 24)
+ (parity:HI (reg:QI 24)))]
+ ""
+ "%~call __parityqi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*paritysihi2.libgcc"
+ [(set (reg:HI 24)
+ (parity:HI (reg:SI 22)))]
+ ""
+ "%~call __paritysi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+
+;; Popcount
+
+(define_expand "popcounthi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (popcount:HI (reg:HI 24)))
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "popcountsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (popcount:HI (reg:SI 22)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*popcounthi2.libgcc"
+ [(set (reg:HI 24)
+ (popcount:HI (reg:HI 24)))]
+ ""
+ "%~call __popcounthi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*popcountsi2.libgcc"
+ [(set (reg:HI 24)
+ (popcount:HI (reg:SI 22)))]
+ ""
+ "%~call __popcountsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*popcountqi2.libgcc"
+ [(set (reg:QI 24)
+ (popcount:QI (reg:QI 24)))]
+ ""
+ "%~call __popcountqi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*popcountqihi2.libgcc"
+ [(set (reg:HI 24)
+ (popcount:HI (reg:QI 24)))]
+ ""
+ "#"
+ ""
+ [(set (reg:QI 24)
+ (popcount:QI (reg:QI 24)))
+ (set (reg:QI 25)
+ (const_int 0))]
+ "")
+
+;; Count Leading Zeros
+
+(define_expand "clzhi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (clz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "clzsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (clz:HI (reg:SI 22)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*clzhi2.libgcc"
+ [(parallel [(set (reg:HI 24)
+ (clz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])]
+ ""
+ "%~call __clzhi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*clzsihi2.libgcc"
+ [(parallel [(set (reg:HI 24)
+ (clz:HI (reg:SI 22)))
+ (clobber (reg:QI 26))])]
+ ""
+ "%~call __clzsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+;; Count Trailing Zeros
+
+(define_expand "ctzhi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ctz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "ctzsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ctz:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))])
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*ctzhi2.libgcc"
+ [(set (reg:HI 24)
+ (ctz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __ctzhi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*ctzsihi2.libgcc"
+ [(set (reg:HI 24)
+ (ctz:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __ctzsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+;; Find First Set
+
+(define_expand "ffshi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ffs:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "ffssi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ffs:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))])
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*ffshi2.libgcc"
+ [(parallel [(set (reg:HI 24)
+ (ffs:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])]
+ ""
+ "%~call __ffshi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*ffssihi2.libgcc"
+ [(parallel [(set (reg:HI 24)
+ (ffs:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))])]
+ ""
+ "%~call __ffssi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+;; Copysign
+
+(define_insn "copysignsf3"
+ [(set (match_operand:SF 0 "register_operand" "=r")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "r")]
+ UNSPEC_COPYSIGN))]
+ ""
+ "bst %D2,7\;bld %D0,7"
+ [(set_attr "length" "2")
+ (set_attr "cc" "none")])
+
+;; Swap Bytes (change byte-endianess)
+
+(define_expand "bswapsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (set (reg:SI 22)
+ (bswap:SI (reg:SI 22)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (reg:SI 22))]
+ ""
+ "")
+
+(define_insn "*bswapsi2.libgcc"
+ [(set (reg:SI 22)
+ (bswap:SI (reg:SI 22)))]
+ ""
+ "%~call __bswapsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+
;; CPU instructions
;; NOP taking 1 or 2 Ticks