On date Friday 2011-04-15 02:23:50 +0200, Janne Grunau wrote:
> From: Janne Grunau <[email protected]>
>
> port swscale to AV_CPU_FLAGS* and use av_get_cpu_flags() for runtime cpu
> detection. Still only used with --enable-runtime-cpudetect. Next step is
> getting rid of all #if !CONFIG_RUNTIME_CPUDETECT code in libswscale or
> enable it by default to get the same behaviour as in libavcodec.
>
> Is there a way to enable deprecation warnings for defines?
>
> APIChanges entry is missing.
>
> Janne
> ---8<---
> use AV_CPU_FLAG_* internally and deprecate SWS_CPU_CAPS_*
> add dsp_mask to swsContext and AVOptions to fill them
> auto detection is only done if no SWS_CPU_CAPS are set in flags and
> dsp_mask does not contain AV_CPU_FLAG_FORCE
> ---
> libswscale/bfin/swscale_bfin.c | 1 -
> libswscale/colorspace-test.c | 8 +-
> libswscale/options.c | 11 +++
> libswscale/ppc/yuv2rgb_altivec.c | 2 +-
> libswscale/rgb2rgb.c | 5 +-
> libswscale/rgb2rgb.h | 4 +-
> libswscale/swscale.c | 27 +++----
> libswscale/swscale.h | 6 ++
> libswscale/swscale_internal.h | 1 +
> libswscale/utils.c | 138
> ++++++++++++++++++++++++++++---------
> libswscale/x86/rgb2rgb.c | 11 ++--
> libswscale/x86/swscale_template.c | 7 +-
> libswscale/x86/yuv2rgb_mmx.c | 5 +-
> libswscale/yuv2rgb.c | 6 +-
> 14 files changed, 158 insertions(+), 74 deletions(-)
>
> diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c
> index fa3c03b..02fcfb5 100644
> --- a/libswscale/bfin/swscale_bfin.c
> +++ b/libswscale/bfin/swscale_bfin.c
> @@ -79,7 +79,6 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t*
> src[], int srcStride[], i
> void ff_bfin_get_unscaled_swscale(SwsContext *c)
> {
> SwsFunc swScale = c->swScale;
> - if (c->flags & SWS_CPU_CAPS_BFIN)
> if (c->dstFormat == PIX_FMT_YUV420P)
> if (c->srcFormat == PIX_FMT_UYVY422) {
> av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized
> uyvytoyv12_unscaled\n");
> diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
> index 4e7116f..914b824 100644
> --- a/libswscale/colorspace-test.c
> +++ b/libswscale/colorspace-test.c
> @@ -33,7 +33,7 @@
>
> #define FUNC(s,d,n) {s,d,#n,n}
>
> -static int cpu_caps;
> +static unsigned cpu_caps;
What's the rationale of the int -> unsigned change?
Also it should be possibly done in a separate patch, current patch is
pretty hard to read.
> static char *args_parse(int argc, char *argv[])
> {
> @@ -42,13 +42,13 @@ static char *args_parse(int argc, char *argv[])
> while ((o = getopt(argc, argv, "m23")) != -1) {
> switch (o) {
> case 'm':
> - cpu_caps |= SWS_CPU_CAPS_MMX;
> + cpu_caps |= AV_CPU_FLAG_MMX;
> break;
> case '2':
> - cpu_caps |= SWS_CPU_CAPS_MMX2;
> + cpu_caps |= AV_CPU_FLAG_MMX2;
> break;
> case '3':
> - cpu_caps |= SWS_CPU_CAPS_3DNOW;
> + cpu_caps |= AV_CPU_FLAG_3DNOW;
> break;
> default:
> av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o);
> diff --git a/libswscale/options.c b/libswscale/options.c
> index 59b9994..92b9789 100644
> --- a/libswscale/options.c
> +++ b/libswscale/options.c
> @@ -19,6 +19,7 @@
> */
>
> #include "libavutil/avutil.h"
> +#include "libavutil/cpu.h"
> #include "libavutil/pixfmt.h"
> #include "libavcodec/opt.h"
> #include "swscale.h"
> @@ -48,12 +49,14 @@ static const AVOption options[] = {
> { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, SWS_SPLINE,
> INT_MIN, INT_MAX, VE, "sws_flags" },
> { "print_info", "print info", 0, FF_OPT_TYPE_CONST, SWS_PRINT_INFO,
> INT_MIN, INT_MAX, VE, "sws_flags" },
> { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST,
> SWS_ACCURATE_RND, INT_MIN, INT_MAX, VE, "sws_flags" },
> +#if FF_API_SWS_CPUFLAGS
> { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> SWS_CPU_CAPS_MMX, INT_MIN, INT_MAX, VE, "sws_flags" },
> { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> SWS_CPU_CAPS_MMX2, INT_MIN, INT_MAX, VE, "sws_flags" },
> { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> SWS_CPU_CAPS_SSE2, INT_MIN, INT_MAX, VE, "sws_flags" },
> { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> SWS_CPU_CAPS_3DNOW, INT_MIN, INT_MAX, VE, "sws_flags" },
> { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> SWS_CPU_CAPS_ALTIVEC, INT_MIN, INT_MAX, VE, "sws_flags" },
> { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
> +#endif /* FF_API_SWS_CPUFLAGS */
> { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST,
> SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
> { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST,
> SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
> { "bitexact", "", 0 , FF_OPT_TYPE_CONST, SWS_BITEXACT, INT_MIN, INT_MAX,
> VE, "sws_flags" },
> @@ -69,6 +72,14 @@ static const AVOption options[] = {
> { "param0" , "scaler param 0" , OFFSET(param[0]) , FF_OPT_TYPE_DOUBLE,
> SWS_PARAM_DEFAULT, INT_MIN, INT_MAX, VE },
> { "param1" , "scaler param 1" , OFFSET(param[1]) , FF_OPT_TYPE_DOUBLE,
> SWS_PARAM_DEFAULT, INT_MIN, INT_MAX, VE },
>
> + { "sws_dsp_mask", "override cpu flags", OFFSET(dsp_mask),
> FF_OPT_TYPE_FLAGS, DEFAULT, 0, UINT_MAX, VE, "sws_dsp_mask" },
confusing description name/description mismatch (is it a mask, is it a
flag set, why "dsp"?)
> + { "null", "no SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> AV_CPU_FLAG_FORCE, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
> + { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> AV_CPU_FLAG_FORCE|AV_CPU_FLAG_MMX, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
> + { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> AV_CPU_FLAG_FORCE|AV_CPU_FLAG_MMX2, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
> + { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> AV_CPU_FLAG_FORCE|AV_CPU_FLAG_SSE2, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
> + { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> AV_CPU_FLAG_FORCE|AV_CPU_FLAG_3DNOW, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
> + { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST,
> AV_CPU_FLAG_FORCE|AV_CPU_FLAG_ALTIVEC, INT_MIN, UINT_MAX, VE, "sws_dsp_mask"
> },
> +
> { NULL }
> };
>
> diff --git a/libswscale/ppc/yuv2rgb_altivec.c
> b/libswscale/ppc/yuv2rgb_altivec.c
> index 0113c8d..6d15cb9 100644
> --- a/libswscale/ppc/yuv2rgb_altivec.c
> +++ b/libswscale/ppc/yuv2rgb_altivec.c
> @@ -692,7 +692,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c,
> */
> SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
> {
> - if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
> + if (!(c->dsp_mask & AV_CPU_FLAG_ALTIVEC))
> return NULL;
>
> /*
> diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
> index 12d6287..92a4b37 100644
> --- a/libswscale/rgb2rgb.c
> +++ b/libswscale/rgb2rgb.c
> @@ -25,6 +25,7 @@
> #include <inttypes.h>
> #include "config.h"
> #include "libavutil/bswap.h"
> +#include "libavutil/cpu.h"
> #include "rgb2rgb.h"
> #include "swscale.h"
> #include "swscale_internal.h"
> @@ -116,11 +117,11 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst,
> uint8_t *vdst, const uint8_t
> 32-bit C version, and and&add trick by Michael Niedermayer
> */
>
> -void sws_rgb2rgb_init(int flags)
> +void sws_rgb2rgb_init(unsigned dsp_mask)
> {
> rgb2rgb_init_c();
> #if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
> - rgb2rgb_init_x86(flags);
> + rgb2rgb_init_x86(dsp_mask);
> #endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
> }
>
> diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
> index bde1134..8028cc0 100644
> --- a/libswscale/rgb2rgb.h
> +++ b/libswscale/rgb2rgb.h
> @@ -166,8 +166,8 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst,
> uint8_t *vdst, const u
> long width, long height,
> long lumStride, long chromStride, long
> srcStride);
>
> -void sws_rgb2rgb_init(int flags);
> +void sws_rgb2rgb_init(unsigned dsp_mask);
>
> -void rgb2rgb_init_x86(int flags);
> +void rgb2rgb_init_x86(unsigned dsp_mask);
>
> #endif /* SWSCALE_RGB2RGB_H */
> diff --git a/libswscale/swscale.c b/libswscale/swscale.c
> index b77cc2e..2c22077 100644
> --- a/libswscale/swscale.c
> +++ b/libswscale/swscale.c
> @@ -65,6 +65,7 @@ untested special converters
> #include "libavutil/avutil.h"
> #include "libavutil/mathematics.h"
> #include "libavutil/bswap.h"
> +#include "libavutil/cpu.h"
> #include "libavutil/pixdesc.h"
>
> #undef MOVNTQ
> @@ -1200,24 +1201,22 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
> sws_init_swScale_c(c);
>
> #if CONFIG_RUNTIME_CPUDETECT
> - int flags = c->flags;
> -
> #if ARCH_X86
> // ordered per speed fastest first
> - if (flags & SWS_CPU_CAPS_MMX2) {
> + if (c->dsp_mask & AV_CPU_FLAG_MMX2) {
> sws_init_swScale_MMX2(c);
> return swScale_MMX2;
> - } else if (flags & SWS_CPU_CAPS_3DNOW) {
> + } else if (c->dsp_mask & AV_CPU_FLAG_3DNOW) {
> sws_init_swScale_3DNow(c);
> return swScale_3DNow;
> - } else if (flags & SWS_CPU_CAPS_MMX) {
> + } else if (c->dsp_mask & AV_CPU_FLAG_MMX) {
> sws_init_swScale_MMX(c);
> return swScale_MMX;
> }
>
> #else
> #if COMPILE_ALTIVEC
> - if (flags & SWS_CPU_CAPS_ALTIVEC) {
> + if (c->dsp_mask & AV_CPU_FLAG_ALTIVEC) {
> sws_init_swScale_altivec(c);
> return swScale_altivec;
> }
> @@ -1665,15 +1664,13 @@ int ff_hardcodedcpuflags(void)
> {
> int flags = 0;
> #if COMPILE_TEMPLATE_MMX2
> - flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
> + flags |= AV_CPU_FLAG_MMX|AV_CPU_FLAG_MMX2;
> #elif COMPILE_TEMPLATE_AMD3DNOW
> - flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
> + flags |= AV_CPU_FLAG_MMX|AV_CPU_FLAG_3DNOW;
> #elif COMPILE_TEMPLATE_MMX
> - flags |= SWS_CPU_CAPS_MMX;
> + flags |= AV_CPU_FLAG_MMX;
> #elif COMPILE_TEMPLATE_ALTIVEC
> - flags |= SWS_CPU_CAPS_ALTIVEC;
> -#elif ARCH_BFIN
> - flags |= SWS_CPU_CAPS_BFIN;
> + flags |= AV_CPU_FLAG_ALTIVEC;
> #endif
> return flags;
> }
> @@ -1760,7 +1757,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
> c->swScale= uyvyToYuv422Wrapper;
>
> #if COMPILE_ALTIVEC
> - if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
> + if ((c->dsp_mask & AV_CPU_FLAG_ALTIVEC) &&
> !(c->flags & SWS_BITEXACT) &&
> srcFormat == PIX_FMT_YUV420P) {
> // unscaled YV12 -> packed YUV, we want speed
> @@ -1789,10 +1786,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
> else /* Planar YUV or gray */
> c->swScale= planarCopyWrapper;
> }
> -#if ARCH_BFIN
> - if (flags & SWS_CPU_CAPS_BFIN)
> + if (ARCH_BFIN)
> ff_bfin_get_unscaled_swscale (c);
> -#endif
> }
>
> static void reset_ptr(const uint8_t* src[], int format)
> diff --git a/libswscale/swscale.h b/libswscale/swscale.h
> index aae1a6c..22dcb8e 100644
> --- a/libswscale/swscale.h
> +++ b/libswscale/swscale.h
> @@ -51,6 +51,10 @@
> #define FF_API_SWS_GETCONTEXT (LIBSWSCALE_VERSION_MAJOR < 2)
> #endif
>
> +#ifndef FF_API_SWS_CPUFLAGS
> +#define FF_API_SWS_CPUFLAGS (LIBSWSCALE_VERSION_MAJOR < 2)
> +#endif
> +
> /**
> * Returns the LIBSWSCALE_VERSION_INT constant.
> */
> @@ -95,12 +99,14 @@ const char *swscale_license(void);
> #define SWS_ACCURATE_RND 0x40000
> #define SWS_BITEXACT 0x80000
>
> +#if FF_API_SWS_CPUFLAGS
> #define SWS_CPU_CAPS_MMX 0x80000000
> #define SWS_CPU_CAPS_MMX2 0x20000000
> #define SWS_CPU_CAPS_3DNOW 0x40000000
> #define SWS_CPU_CAPS_ALTIVEC 0x10000000
> #define SWS_CPU_CAPS_BFIN 0x01000000
> #define SWS_CPU_CAPS_SSE2 0x02000000
> +#endif
>
> #define SWS_MAX_REDUCE_CUTOFF 0.002
>
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 4c6ad2a..91fdf24 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -316,6 +316,7 @@ typedef struct SwsContext {
>
> int needs_hcscale; ///< Set if there are chroma planes to be converted.
>
> + unsigned dsp_mask; ///< Flags passed by the user to select optimizations
name/description mismatch
The obvious name I can think of is "cpu_flags".
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel