From: Daniel Engel <g...@danielengel.com> The new functions provide an efficient tail call to __ctzsi2(), making them rather smaller and faster than the C versions.
gcc/libgcc/ChangeLog: 2021-01-07 Daniel Engel <g...@danielengel.com> * config/arm/bits/ctz2.S: Add __ffssi2() and __ffsdi2(). * config/arm/t-elf: Add _ffssi2 and _ffsdi2. --- libgcc/config/arm/bits/ctz2.S | 77 ++++++++++++++++++++++++++++++++++- libgcc/config/arm/t-elf | 2 + 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/libgcc/config/arm/bits/ctz2.S b/libgcc/config/arm/bits/ctz2.S index 4241fdad283..609b61cb6f3 100644 --- a/libgcc/config/arm/bits/ctz2.S +++ b/libgcc/config/arm/bits/ctz2.S @@ -1,4 +1,4 @@ -/* ctz2.S: ARM optimized 'ctz' functions +/* ctz2.S: ARM optimized 'ctz' and related functions Copyright (C) 2020-2021 Free Software Foundation, Inc. Contributed by Daniel Engel (g...@danielengel.com) @@ -228,3 +228,78 @@ FUNC_END ctzdi2 #endif /* L_ctzsi2 || L_ctzdi2 */ + +#ifdef L_ffsdi2 + +// int __ffsdi2(int) +// Return the index of the least significant 1-bit in $r1:r0, +// or zero if $r1:r0 is zero. The least significant bit is index 1. +// Returns the result in $r0. +// Uses $r2 and possibly $r3 as scratch space. +// Same section as __ctzsi2() for sake of the tail call branches. +FUNC_START_SECTION ffsdi2 .text.sorted.libgcc.ctz2.ffsdi2 + CFI_START_FUNCTION + + // Simplify branching by assuming a non-zero lower word. + // For all such, ffssi2(x) == ctzsi2(x) + 1. + movs r2, #(33 - CTZ_RESULT_OFFSET) + + #if defined(__ARMEB__) && __ARMEB__ + // HACK: Save the upper word in a scratch register. + movs r3, r0 + + // Test the lower word. + movs r0, r1 + bne SYM(__internal_ctzsi2) + + // Test the upper word. + movs r2, #(65 - CTZ_RESULT_OFFSET) + movs r0, r3 + bne SYM(__internal_ctzsi2) + + #else /* !__ARMEB__ */ + // Test the lower word. + cmp r0, #0 + bne SYM(__internal_ctzsi2) + + // Test the upper word. + movs r2, #(65 - CTZ_RESULT_OFFSET) + movs r0, r1 + bne SYM(__internal_ctzsi2) + + #endif /* !__ARMEB__ */ + + // Upper and lower words are both zero. + RET + + CFI_END_FUNCTION +FUNC_END ffsdi2 + +#endif /* L_ffsdi2 */ + + +#ifdef L_ffssi2 + +// int __ffssi2(int) +// Return the index of the least significant 1-bit in $r0, +// or zero if $r0 is zero. The least significant bit is index 1. +// Returns the result in $r0. +// Uses $r2 and possibly $r3 as scratch space. +// Same section as __ctzsi2() for sake of the tail call branches. +FUNC_START_SECTION ffssi2 .text.sorted.libgcc.ctz2.ffssi2 + CFI_START_FUNCTION + + // Simplify branching by assuming a non-zero argument. + // For all such, ffssi2(x) == ctzsi2(x) + 1. + movs r2, #(33 - CTZ_RESULT_OFFSET) + + // Test for zero, return unmodified. + cmp r0, #0 + bne SYM(__internal_ctzsi2) + RET + + CFI_END_FUNCTION +FUNC_END ffssi2 + +#endif /* L_ffssi2 */ + diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf index 88ea869eea7..32de63f4c64 100644 --- a/libgcc/config/arm/t-elf +++ b/libgcc/config/arm/t-elf @@ -36,6 +36,8 @@ LIB1ASMFUNCS += \ _clrsbdi2 \ _clzdi2 \ _ctzdi2 \ + _ffssi2 \ + _ffsdi2 \ _dvmd_tls \ _divsi3 \ _modsi3 \ -- 2.25.1