On Mon, 7 Sep 2015 14:40:48 +0300 Pekka Paalanen <[email protected]> wrote:
> From: Ben Avison <[email protected]> > > This new fast path is initially disabled by putting the entries in the > lookup table after the sentinel. The compiler cannot tell the new code > is not used, so it cannot eliminate the code. Also the lookup table size > will include the new fast path. When the follow-up patch then enables > the new fast path, the binary layout (alignments, size, etc.) will stay > the same compared to the disabled case. > > Keeping the binary layout identical is important for benchmarking on > Raspberry Pi 1. The addresses at which functions are loaded will have a > significant impact on benchmark results, causing unexpected performance > changes. Keeping all function addresses the same across the patch > enabling a new fast path improves the reliability of benchmarks. Don't we already have the PIXMAN_DISABLE environment variable exactly for this purpose (testing different implementations without recompiling the library)? > Benchmark results are included in the patch enabling this fast path. > > [Pekka: disabled the fast path, commit message] > Signed-off-by: Pekka Paalanen <[email protected]> > > --- > pixman/pixman-arm-simd-asm.S | 41 +++++++++++++++++++++++++++++++++++++++++ > pixman/pixman-arm-simd.c | 7 +++++++ > 2 files changed, 48 insertions(+) > > diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S > index 7b0727b..a74a0a8 100644 > --- a/pixman/pixman-arm-simd-asm.S > +++ b/pixman/pixman-arm-simd-asm.S > @@ -1136,3 +1136,44 @@ generate_composite_function \ > in_reverse_8888_8888_process_tail > > > /******************************************************************************/ > + > +.macro over_n_8888_init > + ldr SRC, [sp, #ARGS_STACK_OFFSET] > + /* Hold loop invariant in MASK */ > + ldr MASK, =0x00800080 > + /* Hold multiplier for destination in STRIDE_M */ > + mov STRIDE_M, #255 > + sub STRIDE_M, STRIDE_M, SRC, lsr #24 > + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ > + uadd8 SCRATCH, MASK, MASK > +.endm > + > +.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, > unaligned_mask, preload > + pixld , numbytes, firstreg, DST, 0 > +.endm > + > +.macro over_n_8888_1pixel dst > + mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK > + uqadd8 WK&dst, WK&dst, SRC > +.endm > + > +.macro over_n_8888_process_tail cond, numbytes, firstreg > + .set PROCESS_REG, firstreg > + .rept numbytes / 4 > + over_n_8888_1pixel %(PROCESS_REG) > + .set PROCESS_REG, PROCESS_REG+1 > + .endr > + pixst , numbytes, firstreg, DST > +.endm > + > +generate_composite_function \ > + pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \ > + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \ > + 2, /* prefetch distance */ \ > + over_n_8888_init, \ > + nop_macro, /* newline */ \ > + nop_macro, /* cleanup */ \ > + over_n_8888_process_head, \ > + over_n_8888_process_tail > + > +/******************************************************************************/ > diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c > index f40ff36..62c0f41 100644 > --- a/pixman/pixman-arm-simd.c > +++ b/pixman/pixman-arm-simd.c > @@ -51,6 +51,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, > PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888, > uint32_t, 1, uint32_t, 1) > > +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888, > + uint32_t, 1) > PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888, > uint32_t, 1) > > @@ -271,6 +273,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = > SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), > > { PIXMAN_OP_NONE }, > + > + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, > armv6_composite_over_n_8888), > + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, > armv6_composite_over_n_8888), > + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, > armv6_composite_over_n_8888), > + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, > armv6_composite_over_n_8888), > }; > > pixman_implementation_t * -- Best regards, Siarhei Siamashka _______________________________________________ Pixman mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/pixman
