On Fri, 27 Jul 2012, Ronald S. Bultje wrote:

> From: "Ronald S. Bultje" <[email protected]>
>
> This completes the conversion of h264dsp to yasm; note that h264 also
> uses some dsputil functions, most notably qpel. Performance-wise, the
> yasm-version is ~10 cycles faster (182->172) on x86-64, and ~8 cycles
> faster (201->193) on x86-32.
> ---
>  libavcodec/x86/h264_deblock.asm |  168 
> +++++++++++++++++++++++++++++++++++++++
>  libavcodec/x86/h264dsp_mmx.c    |  162 ++-----------------------------------
>  2 files changed, 175 insertions(+), 155 deletions(-)
>
> diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
> index 1982dc4..77b25d2 100644
> --- a/libavcodec/x86/h264_deblock.asm
> +++ b/libavcodec/x86/h264_deblock.asm
> @@ -27,6 +27,10 @@
>  %include "x86inc.asm"
>  %include "x86util.asm"
>
> +SECTION_RODATA
> +
> +pb_3_1: times 4 db 3, 1
> +
>  SECTION .text
>
>  cextern pb_0
> @@ -911,3 +915,167 @@ ff_chroma_intra_body_mmxext:
>      paddb  m1, m5
>      paddb  m2, m6
>      ret
> +
> +;-----------------------------------------------------------------------------
> +; void h264_loop_filter_strength(int16_t bs[2][4][4], uint8_t nnz[40],
> +;                                int8_t ref[2][40], int16_t mv[2][40][2],
> +;                                int bidir,    int edges,    int step,
> +;                                int mask_mv0, int mask_mv1, int field);
> +;
> +; bidir    is 0 or 1
> +; edges    is 1 or 4
> +; step     is 1 or 2
> +; mask_mv0 is 0 or 3
> +; mask_mv1 is 0 or 1
> +; field    is 0 or 1
> +;-----------------------------------------------------------------------------
> +%macro loop_filter_strength_iteration 7 ; edges, step, mask_mv,
> +                                        ; dir, d_idx, mask_dir, bidir
> +%define edgesd    %1
> +%define stepd     %2
> +%define mask_mvd  %3
> +%define dir       %4
> +%define d_idx     %5
> +%define mask_dir  %6
> +%define bidir     %7
> +    xor          b_idxd, b_idxd ; for (b_idx = 0; b_idx < edges; b_idx += 
> step)
> +.b_idx_loop_ %+ dir %+ _ %+ bidir:

%%.b_idx_loop:
Automatically generates a different label for each instantiation of the macro.

--Loren Merritt
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to