Straightforward leaf-optimization, except is there a reason to use jmp %o7 + 8 instead of ret?
On Fri, May 29, 2015 at 8:54 AM, Sebastian Huber <sebastian.hu...@embedded-brains.de> wrote: > Update #2270. > --- > cpukit/score/cpu/sparc/cpu_asm.S | 102 > ++++++++++++++++++--------------------- > 1 file changed, 48 insertions(+), 54 deletions(-) > > diff --git a/cpukit/score/cpu/sparc/cpu_asm.S > b/cpukit/score/cpu/sparc/cpu_asm.S > index 92674de..ddb2a13 100644 > --- a/cpukit/score/cpu/sparc/cpu_asm.S > +++ b/cpukit/score/cpu/sparc/cpu_asm.S > @@ -44,38 +44,35 @@ > .align 4 > PUBLIC(_CPU_Context_save_fp) > SYM(_CPU_Context_save_fp): > - save %sp, -CPU_MINIMUM_STACK_FRAME_SIZE, %sp > - > /* > * The following enables the floating point unit. > */ > > - mov %psr, %l0 > - sethi %hi(SPARC_PSR_EF_MASK), %l1 > - or %l1, %lo(SPARC_PSR_EF_MASK), %l1 > - or %l0, %l1, %l0 > - mov %l0, %psr ! **** ENABLE FLOAT ACCESS **** > + mov %psr, %o1 > + sethi %hi(SPARC_PSR_EF_MASK), %o2 > + or %o2, %lo(SPARC_PSR_EF_MASK), %o2 > + or %o1, %o2, %o1 > + mov %o1, %psr ! **** ENABLE FLOAT ACCESS **** > nop; nop; nop; ! Need three nops before EF is > - ld [%i0], %l0 ! active due to pipeline delay!!! > - std %f0, [%l0 + FO_F1_OFFSET] > - std %f2, [%l0 + F2_F3_OFFSET] > - std %f4, [%l0 + F4_F5_OFFSET] > - std %f6, [%l0 + F6_F7_OFFSET] > - std %f8, [%l0 + F8_F9_OFFSET] > - std %f10, [%l0 + F1O_F11_OFFSET] > - std %f12, [%l0 + F12_F13_OFFSET] > - std %f14, [%l0 + F14_F15_OFFSET] > - std %f16, [%l0 + F16_F17_OFFSET] > - std %f18, [%l0 + F18_F19_OFFSET] > - std %f20, [%l0 + F2O_F21_OFFSET] > - std %f22, [%l0 + F22_F23_OFFSET] > - std %f24, [%l0 + F24_F25_OFFSET] > - std %f26, [%l0 + F26_F27_OFFSET] > - std %f28, [%l0 + F28_F29_OFFSET] > - std %f30, [%l0 + F3O_F31_OFFSET] > - st %fsr, [%l0 + FSR_OFFSET] > - ret > - restore > + ld [%o0], %o1 ! active due to pipeline delay!!! > + std %f0, [%o1 + FO_F1_OFFSET] > + std %f2, [%o1 + F2_F3_OFFSET] > + std %f4, [%o1 + F4_F5_OFFSET] > + std %f6, [%o1 + F6_F7_OFFSET] > + std %f8, [%o1 + F8_F9_OFFSET] > + std %f10, [%o1 + F1O_F11_OFFSET] > + std %f12, [%o1 + F12_F13_OFFSET] > + std %f14, [%o1 + F14_F15_OFFSET] > + std %f16, [%o1 + F16_F17_OFFSET] > + std %f18, [%o1 + F18_F19_OFFSET] > + std %f20, [%o1 + F2O_F21_OFFSET] > + std %f22, [%o1 + F22_F23_OFFSET] > + std %f24, [%o1 + F24_F25_OFFSET] > + std %f26, [%o1 + F26_F27_OFFSET] > + std %f28, [%o1 + F28_F29_OFFSET] > + std %f30, [%o1 + F3O_F31_OFFSET] > + jmp %o7 + 8 > + st %fsr, [%o1 + FSR_OFFSET] > > /* > * void _CPU_Context_restore_fp( > @@ -93,38 +90,35 @@ SYM(_CPU_Context_save_fp): > .align 4 > PUBLIC(_CPU_Context_restore_fp) > SYM(_CPU_Context_restore_fp): > - save %sp, -CPU_MINIMUM_STACK_FRAME_SIZE , %sp > - > /* > * The following enables the floating point unit. > */ > > - mov %psr, %l0 > - sethi %hi(SPARC_PSR_EF_MASK), %l1 > - or %l1, %lo(SPARC_PSR_EF_MASK), %l1 > - or %l0, %l1, %l0 > - mov %l0, %psr ! **** ENABLE FLOAT ACCESS **** > + mov %psr, %o1 > + sethi %hi(SPARC_PSR_EF_MASK), %o2 > + or %o2, %lo(SPARC_PSR_EF_MASK), %o2 > + or %o1, %o2, %o1 > + mov %o1, %psr ! **** ENABLE FLOAT ACCESS **** > nop; nop; nop; ! Need three nops before EF is > - ld [%i0], %l0 ! active due to pipeline delay!!! > - ldd [%l0 + FO_F1_OFFSET], %f0 > - ldd [%l0 + F2_F3_OFFSET], %f2 > - ldd [%l0 + F4_F5_OFFSET], %f4 > - ldd [%l0 + F6_F7_OFFSET], %f6 > - ldd [%l0 + F8_F9_OFFSET], %f8 > - ldd [%l0 + F1O_F11_OFFSET], %f10 > - ldd [%l0 + F12_F13_OFFSET], %f12 > - ldd [%l0 + F14_F15_OFFSET], %f14 > - ldd [%l0 + F16_F17_OFFSET], %f16 > - ldd [%l0 + F18_F19_OFFSET], %f18 > - ldd [%l0 + F2O_F21_OFFSET], %f20 > - ldd [%l0 + F22_F23_OFFSET], %f22 > - ldd [%l0 + F24_F25_OFFSET], %f24 > - ldd [%l0 + F26_F27_OFFSET], %f26 > - ldd [%l0 + F28_F29_OFFSET], %f28 > - ldd [%l0 + F3O_F31_OFFSET], %f30 > - ld [%l0 + FSR_OFFSET], %fsr > - ret > - restore > + ld [%o0], %o1 ! active due to pipeline delay!!! > + ldd [%o1 + FO_F1_OFFSET], %f0 > + ldd [%o1 + F2_F3_OFFSET], %f2 > + ldd [%o1 + F4_F5_OFFSET], %f4 > + ldd [%o1 + F6_F7_OFFSET], %f6 > + ldd [%o1 + F8_F9_OFFSET], %f8 > + ldd [%o1 + F1O_F11_OFFSET], %f10 > + ldd [%o1 + F12_F13_OFFSET], %f12 > + ldd [%o1 + F14_F15_OFFSET], %f14 > + ldd [%o1 + F16_F17_OFFSET], %f16 > + ldd [%o1 + F18_F19_OFFSET], %f18 > + ldd [%o1 + F2O_F21_OFFSET], %f20 > + ldd [%o1 + F22_F23_OFFSET], %f22 > + ldd [%o1 + F24_F25_OFFSET], %f24 > + ldd [%o1 + F26_F27_OFFSET], %f26 > + ldd [%o1 + F28_F29_OFFSET], %f28 > + ldd [%o1 + F3O_F31_OFFSET], %f30 > + jmp %o7 + 8 > + ld [%o1 + FSR_OFFSET], %fsr > > #endif /* SPARC_HAS_FPU */ > > -- > 1.8.4.5 > > _______________________________________________ > devel mailing list > devel@rtems.org > http://lists.rtems.org/mailman/listinfo/devel _______________________________________________ devel mailing list devel@rtems.org http://lists.rtems.org/mailman/listinfo/devel