gcc/libgcc/ChangeLog: 2021-01-16 Daniel Engel <g...@danielengel.com> Makefile.in (MPURE_CODE): New macro defines __PURE_CODE__. (gcc_compile): Appended MPURE_CODE. lib1funcs.S (FUNC_START_SECTION): Set flags for __PURE_CODE__. clz2.S (__clzsi2): Added -mpure-code compatible instructions. ctz2.S (__ctzsi2): Same. popcnt.S (__popcountsi2, __popcountdi2): Same. --- libgcc/Makefile.in | 5 ++++- libgcc/config/arm/clz2.S | 25 ++++++++++++++++++++++- libgcc/config/arm/ctz2.S | 38 +++++++++++++++++++++++++++++++++-- libgcc/config/arm/lib1funcs.S | 7 ++++++- libgcc/config/arm/popcnt.S | 33 +++++++++++++++++++++++++----- 5 files changed, 98 insertions(+), 10 deletions(-)
diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in index 32e329f7764..e6b2ce5c6d7 100644 --- a/libgcc/Makefile.in +++ b/libgcc/Makefile.in @@ -307,6 +307,9 @@ CRTSTUFF_CFLAGS = -O2 $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -g0 \ # Extra flags to use when compiling crt{begin,end}.o. CRTSTUFF_T_CFLAGS = +# Pass the -mpure-code flag into assembly for conditional compilation. +MPURE_CODE = $(if $(findstring -mpure-code,$(CFLAGS)), -D__PURE_CODE__) + MULTIDIR := $(shell $(CC) $(CFLAGS) -print-multi-directory) MULTIOSDIR := $(shell $(CC) $(CFLAGS) -print-multi-os-directory) @@ -316,7 +319,7 @@ inst_slibdir = $(slibdir)$(MULTIOSSUBDIR) gcc_compile_bare = $(CC) $(INTERNAL_CFLAGS) $(CFLAGS-$(<F)) compile_deps = -MT $@ -MD -MP -MF $(basename $@).dep -gcc_compile = $(gcc_compile_bare) -o $@ $(compile_deps) +gcc_compile = $(gcc_compile_bare) -o $@ $(compile_deps) $(MPURE_CODE) gcc_s_compile = $(gcc_compile) -DSHARED objects = $(filter %$(objext),$^) diff --git a/libgcc/config/arm/clz2.S b/libgcc/config/arm/clz2.S index a2de45ff651..97a44f5d187 100644 --- a/libgcc/config/arm/clz2.S +++ b/libgcc/config/arm/clz2.S @@ -214,17 +214,40 @@ FUNC_ENTRY clzsi2 IT(sub,ne) r2, #4 LLSYM(__clz2): + #if defined(__PURE_CODE__) && __PURE_CODE__ + // Without access to table data, continue unrolling the loop. + lsrs r1, r0, #2 + + #ifdef __HAVE_FEATURE_IT + do_it ne,t + #else + beq LLSYM(__clz1) + #endif + + // Out of 4 bits, the first '1' is somewhere in the highest 2, + // so the lower 2 bits are no longer interesting. + IT(mov,ne) r0, r1 + IT(sub,ne) r2, #2 + + LLSYM(__clz1): + // Convert remainder {0,1,2,3} to {0,1,2,2}. + lsrs r1, r0, #1 + bics r0, r1 + + #else /* !__PURE_CODE__ */ // Load the remainder by index adr r1, LLSYM(__clz_remainder) ldrb r0, [r1, r0] + #endif /* !__PURE_CODE__ */ #endif /* !__OPTIMIZE_SIZE__ */ // Account for the remainder. subs r0, r2, r0 RET - #if !defined(__OPTIMIZE_SIZE__) || !__OPTIMIZE_SIZE__ + #if !(defined(__OPTIMIZE_SIZE__) && __OPTIMIZE_SIZE__) && \ + !(defined(__PURE_CODE__) && __PURE_CODE__) .align 2 LLSYM(__clz_remainder): .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 diff --git a/libgcc/config/arm/ctz2.S b/libgcc/config/arm/ctz2.S index b9528a061a2..6a49d64f3a6 100644 --- a/libgcc/config/arm/ctz2.S +++ b/libgcc/config/arm/ctz2.S @@ -209,11 +209,44 @@ FUNC_ENTRY ctzsi2 IT(sub,ne) r2, #4 LLSYM(__ctz2): + #if defined(__PURE_CODE__) && __PURE_CODE__ + // Without access to table data, continue unrolling the loop. + lsls r1, r0, #2 + + #ifdef __HAVE_FEATURE_IT + do_it ne, t + #else + beq LLSYM(__ctz1) + #endif + + // Out of 4 bits, the first '1' is somewhere in the lowest 2, + // so the higher 2 bits are no longer interesting. + IT(mov,ne) r0, r1 + IT(sub,ne) r2, #2 + + LLSYM(__ctz1): + // Convert remainder {0,1,2,3} in $r0[31:30] to {0,2,1,2}. + lsrs r0, #31 + + #ifdef __HAVE_FEATURE_IT + do_it cs, t + #else + bcc LLSYM(__ctz_zero) + #endif + + // If bit[30] of the remainder is set, neither of these bits count + // towards the result. Bit[31] must be cleared. + // Otherwise, bit[31] becomes the final remainder. + IT(sub,cs) r2, #2 + IT(eor,cs) r0, r0 + + #else /* !__PURE_CODE__ */ // Look up the remainder by index. lsrs r0, #28 adr r3, LLSYM(__ctz_remainder) ldrb r0, [r3, r0] + #endif /* !__PURE_CODE__ */ #endif /* !__OPTIMIZE_SIZE__ */ LLSYM(__ctz_zero): @@ -221,8 +254,9 @@ FUNC_ENTRY ctzsi2 subs r0, r2, r0 RET - #if (!defined(__ARM_FEATURE_CLZ) || !__ARM_FEATURE_CLZ) && \ - (!defined(__OPTIMIZE_SIZE__) || !__OPTIMIZE_SIZE__) + #if !(defined(__ARM_FEATURE_CLZ) && __ARM_FEATURE_CLZ) && \ + !(defined(__OPTIMIZE_SIZE__) && __OPTIMIZE_SIZE__) && \ + !(defined(__PURE_CODE__) && __PURE_CODE__) .align 2 LLSYM(__ctz_remainder): .byte 0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4 diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S index 5148957144b..59b2370e160 100644 --- a/libgcc/config/arm/lib1funcs.S +++ b/libgcc/config/arm/lib1funcs.S @@ -454,7 +454,12 @@ SYM (\name): Use the *_START_SECTION macros for declarations that the linker should place in a non-defailt section (e.g. ".rodata", ".text.subsection"). */ .macro FUNC_START_SECTION name section - .section \section,"x" +#ifdef __PURE_CODE__ + /* SHF_ARM_PURECODE | SHF_ALLOC | SHF_EXECINSTR */ + .section \section,"0x20000006",%progbits +#else + .section \section,"ax",%progbits +#endif .align 0 FUNC_ENTRY \name .endm diff --git a/libgcc/config/arm/popcnt.S b/libgcc/config/arm/popcnt.S index 51b1ed745ee..d6f65403b5d 100644 --- a/libgcc/config/arm/popcnt.S +++ b/libgcc/config/arm/popcnt.S @@ -23,6 +23,29 @@ <http://www.gnu.org/licenses/>. */ +#if defined(L_popcountdi2) || defined(L_popcountsi2) + +.macro ldmask reg, temp, value + #if defined(__PURE_CODE__) && (__PURE_CODE__) + #ifdef NOT_ISA_TARGET_32BIT + movs \reg, \value + lsls \temp, \reg, #8 + orrs \reg, \temp + lsls \temp, \reg, #16 + orrs \reg, \temp + #else + // Assumption: __PURE_CODE__ only support M-profile. + movw \reg ((\value) * 0x101) + movt \reg ((\value) * 0x101) + #endif + #else + ldr \reg, =((\value) * 0x1010101) + #endif +.endm + +#endif + + #ifdef L_popcountdi2 // int __popcountdi2(int) @@ -49,7 +72,7 @@ FUNC_START_SECTION popcountdi2 .text.sorted.libgcc.popcountdi2 #else /* !__OPTIMIZE_SIZE__ */ // Load the one-bit alternating mask. - ldr r3, =0x55555555 + ldmask r3, r2, 0x55 // Reduce the second word. lsrs r2, r1, #1 @@ -62,7 +85,7 @@ FUNC_START_SECTION popcountdi2 .text.sorted.libgcc.popcountdi2 subs r0, r2 // Load the two-bit alternating mask. - ldr r3, =0x33333333 + ldmask r3, r2, 0x33 // Reduce the second word. lsrs r2, r1, #2 @@ -140,7 +163,7 @@ FUNC_ENTRY popcountsi2 #else /* !__OPTIMIZE_SIZE__ */ // Load the one-bit alternating mask. - ldr r3, =0x55555555 + ldmask r3, r2, 0x55 // Reduce the word. lsrs r1, r0, #1 @@ -148,7 +171,7 @@ FUNC_ENTRY popcountsi2 subs r0, r1 // Load the two-bit alternating mask. - ldr r3, =0x33333333 + ldmask r3, r2, 0x33 // Reduce the word. lsrs r1, r0, #2 @@ -158,7 +181,7 @@ FUNC_ENTRY popcountsi2 adds r0, r1 // Load the four-bit alternating mask. - ldr r3, =0x0F0F0F0F + ldmask r3, r2, 0x0F // Reduce the word. lsrs r1, r0, #4 -- 2.25.1