From: Janne Grunau <[email protected]>

port swscale to AV_CPU_FLAGS* and use av_get_cpu_flags() for runtime cpu
detection. Still only used with --enable-runtime-cpudetect. Next step is
getting rid of all #if !CONFIG_RUNTIME_CPUDETECT code in libswscale or
enable it by default to get the same behaviour as in libavcodec.

Is there a way to enable deprecation warnings for defines?

APIChanges entry is missing.

Janne
---8<---
use AV_CPU_FLAG_* internally and deprecate SWS_CPU_CAPS_*
add dsp_mask to swsContext and AVOptions to fill them
auto detection is only done if no SWS_CPU_CAPS are set in flags and
dsp_mask does not contain AV_CPU_FLAG_FORCE
---
 libswscale/bfin/swscale_bfin.c    |    1 -
 libswscale/colorspace-test.c      |    8 +-
 libswscale/options.c              |   11 +++
 libswscale/ppc/yuv2rgb_altivec.c  |    2 +-
 libswscale/rgb2rgb.c              |    5 +-
 libswscale/rgb2rgb.h              |    4 +-
 libswscale/swscale.c              |   27 +++----
 libswscale/swscale.h              |    6 ++
 libswscale/swscale_internal.h     |    1 +
 libswscale/utils.c                |  138 ++++++++++++++++++++++++++++---------
 libswscale/x86/rgb2rgb.c          |   11 ++--
 libswscale/x86/swscale_template.c |    7 +-
 libswscale/x86/yuv2rgb_mmx.c      |    5 +-
 libswscale/yuv2rgb.c              |    6 +-
 14 files changed, 158 insertions(+), 74 deletions(-)

diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c
index fa3c03b..02fcfb5 100644
--- a/libswscale/bfin/swscale_bfin.c
+++ b/libswscale/bfin/swscale_bfin.c
@@ -79,7 +79,6 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], 
int srcStride[], i
 void ff_bfin_get_unscaled_swscale(SwsContext *c)
 {
     SwsFunc swScale = c->swScale;
-    if (c->flags & SWS_CPU_CAPS_BFIN)
         if (c->dstFormat == PIX_FMT_YUV420P)
             if (c->srcFormat == PIX_FMT_UYVY422) {
                 av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized 
uyvytoyv12_unscaled\n");
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index 4e7116f..914b824 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -33,7 +33,7 @@
 
 #define FUNC(s,d,n) {s,d,#n,n}
 
-static int cpu_caps;
+static unsigned cpu_caps;
 
 static char *args_parse(int argc, char *argv[])
 {
@@ -42,13 +42,13 @@ static char *args_parse(int argc, char *argv[])
     while ((o = getopt(argc, argv, "m23")) != -1) {
         switch (o) {
         case 'm':
-            cpu_caps |= SWS_CPU_CAPS_MMX;
+            cpu_caps |= AV_CPU_FLAG_MMX;
             break;
         case '2':
-            cpu_caps |= SWS_CPU_CAPS_MMX2;
+            cpu_caps |= AV_CPU_FLAG_MMX2;
             break;
         case '3':
-            cpu_caps |= SWS_CPU_CAPS_3DNOW;
+            cpu_caps |= AV_CPU_FLAG_3DNOW;
             break;
         default:
             av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o);
diff --git a/libswscale/options.c b/libswscale/options.c
index 59b9994..92b9789 100644
--- a/libswscale/options.c
+++ b/libswscale/options.c
@@ -19,6 +19,7 @@
  */
 
 #include "libavutil/avutil.h"
+#include "libavutil/cpu.h"
 #include "libavutil/pixfmt.h"
 #include "libavcodec/opt.h"
 #include "swscale.h"
@@ -48,12 +49,14 @@ static const AVOption options[] = {
     { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, SWS_SPLINE, 
INT_MIN, INT_MAX, VE, "sws_flags" },
     { "print_info", "print info", 0, FF_OPT_TYPE_CONST, SWS_PRINT_INFO, 
INT_MIN, INT_MAX, VE, "sws_flags" },
     { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, 
SWS_ACCURATE_RND, INT_MIN, INT_MAX, VE, "sws_flags" },
+#if FF_API_SWS_CPUFLAGS
     { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX, 
INT_MIN, INT_MAX, VE, "sws_flags" },
     { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
SWS_CPU_CAPS_MMX2, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
SWS_CPU_CAPS_SSE2, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
SWS_CPU_CAPS_3DNOW, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
SWS_CPU_CAPS_ALTIVEC, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
+#endif /* FF_API_SWS_CPUFLAGS */
     { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, 
SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, 
SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "bitexact", "", 0 , FF_OPT_TYPE_CONST, SWS_BITEXACT, INT_MIN, INT_MAX, 
VE, "sws_flags" },
@@ -69,6 +72,14 @@ static const AVOption options[] = {
     { "param0" , "scaler param 0" , OFFSET(param[0]) , FF_OPT_TYPE_DOUBLE, 
SWS_PARAM_DEFAULT, INT_MIN, INT_MAX, VE },
     { "param1" , "scaler param 1" , OFFSET(param[1]) , FF_OPT_TYPE_DOUBLE, 
SWS_PARAM_DEFAULT, INT_MIN, INT_MAX, VE },
 
+    { "sws_dsp_mask", "override cpu flags", OFFSET(dsp_mask), 
FF_OPT_TYPE_FLAGS, DEFAULT, 0, UINT_MAX, VE, "sws_dsp_mask" },
+    { "null", "no SIMD acceleration", 0, FF_OPT_TYPE_CONST, AV_CPU_FLAG_FORCE, 
INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+    { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_MMX, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+    { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_MMX2, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+    { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_SSE2, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+    { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_3DNOW, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+    { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, 
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_ALTIVEC, INT_MIN, UINT_MAX, VE, "sws_dsp_mask" },
+
     { NULL }
 };
 
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 0113c8d..6d15cb9 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -692,7 +692,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c,
 */
 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
 {
-    if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
+    if (!(c->dsp_mask & AV_CPU_FLAG_ALTIVEC))
         return NULL;
 
     /*
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 12d6287..92a4b37 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -25,6 +25,7 @@
 #include <inttypes.h>
 #include "config.h"
 #include "libavutil/bswap.h"
+#include "libavutil/cpu.h"
 #include "rgb2rgb.h"
 #include "swscale.h"
 #include "swscale_internal.h"
@@ -116,11 +117,11 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, 
uint8_t *vdst, const uint8_t
  32-bit C version, and and&add trick by Michael Niedermayer
 */
 
-void sws_rgb2rgb_init(int flags)
+void sws_rgb2rgb_init(unsigned dsp_mask)
 {
     rgb2rgb_init_c();
 #if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
-    rgb2rgb_init_x86(flags);
+    rgb2rgb_init_x86(dsp_mask);
 #endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
 }
 
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index bde1134..8028cc0 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -166,8 +166,8 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, 
uint8_t *vdst, const u
                             long width, long height,
                             long lumStride, long chromStride, long srcStride);
 
-void sws_rgb2rgb_init(int flags);
+void sws_rgb2rgb_init(unsigned dsp_mask);
 
-void rgb2rgb_init_x86(int flags);
+void rgb2rgb_init_x86(unsigned dsp_mask);
 
 #endif /* SWSCALE_RGB2RGB_H */
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index b77cc2e..2c22077 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -65,6 +65,7 @@ untested special converters
 #include "libavutil/avutil.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/bswap.h"
+#include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
 #undef MOVNTQ
@@ -1200,24 +1201,22 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
     sws_init_swScale_c(c);
 
 #if CONFIG_RUNTIME_CPUDETECT
-    int flags = c->flags;
-
 #if ARCH_X86
     // ordered per speed fastest first
-    if (flags & SWS_CPU_CAPS_MMX2) {
+    if (c->dsp_mask & AV_CPU_FLAG_MMX2) {
         sws_init_swScale_MMX2(c);
         return swScale_MMX2;
-    } else if (flags & SWS_CPU_CAPS_3DNOW) {
+    } else if (c->dsp_mask & AV_CPU_FLAG_3DNOW) {
         sws_init_swScale_3DNow(c);
         return swScale_3DNow;
-    } else if (flags & SWS_CPU_CAPS_MMX) {
+    } else if (c->dsp_mask & AV_CPU_FLAG_MMX) {
         sws_init_swScale_MMX(c);
         return swScale_MMX;
     }
 
 #else
 #if COMPILE_ALTIVEC
-    if (flags & SWS_CPU_CAPS_ALTIVEC) {
+    if (c->dsp_mask & AV_CPU_FLAG_ALTIVEC) {
         sws_init_swScale_altivec(c);
         return swScale_altivec;
     }
@@ -1665,15 +1664,13 @@ int ff_hardcodedcpuflags(void)
 {
     int flags = 0;
 #if   COMPILE_TEMPLATE_MMX2
-    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
+    flags |= AV_CPU_FLAG_MMX|AV_CPU_FLAG_MMX2;
 #elif COMPILE_TEMPLATE_AMD3DNOW
-    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
+    flags |= AV_CPU_FLAG_MMX|AV_CPU_FLAG_3DNOW;
 #elif COMPILE_TEMPLATE_MMX
-    flags |= SWS_CPU_CAPS_MMX;
+    flags |= AV_CPU_FLAG_MMX;
 #elif COMPILE_TEMPLATE_ALTIVEC
-    flags |= SWS_CPU_CAPS_ALTIVEC;
-#elif ARCH_BFIN
-    flags |= SWS_CPU_CAPS_BFIN;
+    flags |= AV_CPU_FLAG_ALTIVEC;
 #endif
     return flags;
 }
@@ -1760,7 +1757,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
         c->swScale= uyvyToYuv422Wrapper;
 
 #if COMPILE_ALTIVEC
-    if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
+    if ((c->dsp_mask & AV_CPU_FLAG_ALTIVEC) &&
         !(c->flags & SWS_BITEXACT) &&
         srcFormat == PIX_FMT_YUV420P) {
         // unscaled YV12 -> packed YUV, we want speed
@@ -1789,10 +1786,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
         else /* Planar YUV or gray */
             c->swScale= planarCopyWrapper;
     }
-#if ARCH_BFIN
-    if (flags & SWS_CPU_CAPS_BFIN)
+    if (ARCH_BFIN)
         ff_bfin_get_unscaled_swscale (c);
-#endif
 }
 
 static void reset_ptr(const uint8_t* src[], int format)
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index aae1a6c..22dcb8e 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -51,6 +51,10 @@
 #define FF_API_SWS_GETCONTEXT  (LIBSWSCALE_VERSION_MAJOR < 2)
 #endif
 
+#ifndef FF_API_SWS_CPUFLAGS
+#define FF_API_SWS_CPUFLAGS  (LIBSWSCALE_VERSION_MAJOR < 2)
+#endif
+
 /**
  * Returns the LIBSWSCALE_VERSION_INT constant.
  */
@@ -95,12 +99,14 @@ const char *swscale_license(void);
 #define SWS_ACCURATE_RND      0x40000
 #define SWS_BITEXACT          0x80000
 
+#if FF_API_SWS_CPUFLAGS
 #define SWS_CPU_CAPS_MMX      0x80000000
 #define SWS_CPU_CAPS_MMX2     0x20000000
 #define SWS_CPU_CAPS_3DNOW    0x40000000
 #define SWS_CPU_CAPS_ALTIVEC  0x10000000
 #define SWS_CPU_CAPS_BFIN     0x01000000
 #define SWS_CPU_CAPS_SSE2     0x02000000
+#endif
 
 #define SWS_MAX_REDUCE_CUTOFF 0.002
 
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 4c6ad2a..91fdf24 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -316,6 +316,7 @@ typedef struct SwsContext {
 
     int needs_hcscale; ///< Set if there are chroma planes to be converted.
 
+    unsigned dsp_mask; ///< Flags passed by the user to select optimizations
 } SwsContext;
 //FIXME check init (where 0)
 
diff --git a/libswscale/utils.c b/libswscale/utils.c
index bf61dfd..7f5a900 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -43,6 +43,7 @@
 #include "libavutil/x86_cpu.h"
 #include "libavutil/avutil.h"
 #include "libavutil/bswap.h"
+#include "libavutil/cpu.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 
@@ -173,6 +174,7 @@ static double getSplineCoeff(double a, double b, double c, 
double d, double dist
 
 static int initFilter(int16_t **outFilter, int16_t **filterPos, int 
*outFilterSize, int xInc,
                       int srcW, int dstW, int filterAlign, int one, int flags,
+                      unsigned dsp_mask,
                       SwsVector *srcFilter, SwsVector *dstFilter, double 
param[2])
 {
     int i;
@@ -184,7 +186,7 @@ static int initFilter(int16_t **outFilter, int16_t 
**filterPos, int *outFilterSi
     const int64_t fone= 1LL<<54;
     int ret= -1;
 #if ARCH_X86
-    if (flags & SWS_CPU_CAPS_MMX)
+    if (dsp_mask & AV_CPU_FLAG_MMX)
         __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be 
required but it IS (even for non-MMX versions)
 #endif
 
@@ -403,7 +405,7 @@ static int initFilter(int16_t **outFilter, int16_t 
**filterPos, int *outFilterSi
         if (min>minFilterSize) minFilterSize= min;
     }
 
-    if (flags & SWS_CPU_CAPS_ALTIVEC) {
+    if (dsp_mask & AV_CPU_FLAG_ALTIVEC) {
         // we can handle the special case 4,
         // so we don't want to go to the full 8
         if (minFilterSize < 5)
@@ -418,7 +420,7 @@ static int initFilter(int16_t **outFilter, int16_t 
**filterPos, int *outFilterSi
             filterAlign = 1;
     }
 
-    if (flags & SWS_CPU_CAPS_MMX) {
+    if (dsp_mask & AV_CPU_FLAG_MMX) {
         // special case for unscaled vertical filtering
         if (minFilterSize == 1 && filterAlign == 2)
             filterAlign= 1;
@@ -674,7 +676,11 @@ static void getSubSampleFactors(int *h, int *v, enum 
PixelFormat format)
     *v = av_pix_fmt_descriptors[format].log2_chroma_h;
 }
 
+#if FF_API_SWS_CPUFLAGS
 static int update_flags_cpu(int flags);
+static unsigned sws_cpu_caps2av_cpu_flags(int flags);
+static int strip_cpu_flags(int flags);
+#endif
 
 int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int 
srcRange, const int table[4], int dstRange, int brightness, int contrast, int 
saturation)
 {
@@ -690,13 +696,20 @@ int sws_setColorspaceDetails(SwsContext *c, const int 
inv_table[4], int srcRange
 
     c->dstFormatBpp = 
av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->dstFormat]);
     c->srcFormatBpp = 
av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->srcFormat]);
-    c->flags = update_flags_cpu(c->flags);
+
+    if (!(c->dsp_mask & AV_CPU_FLAG_FORCE))
+        c->dsp_mask = av_get_cpu_flags();
+#if FF_API_SWS_CPUFLAGS
+     // if any SWS_CPU_CAPS_* is set do not use auto detection
+    if (update_flags_cpu(c->flags) != strip_cpu_flags(c->flags))
+        c->dsp_mask = sws_cpu_caps2av_cpu_flags(update_flags_cpu(c->flags));
+#endif
 
     ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, 
saturation);
     //FIXME factorize
 
 #if HAVE_ALTIVEC
-    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
+    if (c->dsp_mask & AV_CPU_FLAG_ALTIVEC)
         ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, 
saturation);
 #endif
     return 0;
@@ -728,19 +741,27 @@ static int handle_jpeg(enum PixelFormat *format)
     }
 }
 
-static int update_flags_cpu(int flags)
+#if FF_API_SWS_CPUFLAGS
+static int strip_cpu_flags(int flags)
 {
-#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled 
variant if cpudetect is off
     flags &= ~( SWS_CPU_CAPS_MMX
                |SWS_CPU_CAPS_MMX2
                |SWS_CPU_CAPS_3DNOW
                |SWS_CPU_CAPS_SSE2
                |SWS_CPU_CAPS_ALTIVEC
                |SWS_CPU_CAPS_BFIN);
+    return flags;
+}
+
+static int update_flags_cpu(int flags)
+{
+#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled 
variant if cpudetect is off
+    flags  = strip_cpu_flags(flags);
     flags |= ff_hardcodedcpuflags();
 #endif /* CONFIG_RUNTIME_CPUDETECT */
     return flags;
 }
+#endif
 
 SwsContext *sws_alloc_context(void)
 {
@@ -752,6 +773,26 @@ SwsContext *sws_alloc_context(void)
     return c;
 }
 
+#if FF_API_SWS_CPUFLAGS
+static unsigned sws_cpu_caps2av_cpu_flags(int flags)
+{
+        unsigned cpuflags = 0;
+
+        if (ARCH_X86 && flags & SWS_CPU_CAPS_MMX)
+            cpuflags |= AV_CPU_FLAG_MMX;
+        if (ARCH_X86 && flags & SWS_CPU_CAPS_MMX2)
+            cpuflags |= AV_CPU_FLAG_MMX2;
+        if (ARCH_X86 && flags & SWS_CPU_CAPS_3DNOW)
+            cpuflags |= AV_CPU_FLAG_3DNOW;;
+        if (ARCH_X86 && flags & SWS_CPU_CAPS_SSE2)
+            cpuflags |= AV_CPU_FLAG_SSE2;
+        if (ARCH_PPC && flags & SWS_CPU_CAPS_ALTIVEC)
+            cpuflags |= AV_CPU_FLAG_ALTIVEC;
+
+        return cpuflags;
+}
+#endif
+
 int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 {
     int i;
@@ -763,15 +804,25 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, 
SwsFilter *dstFilter)
     int dstW= c->dstW;
     int dstH= c->dstH;
     int flags;
+    unsigned dsp_mask;
     enum PixelFormat srcFormat= c->srcFormat;
     enum PixelFormat dstFormat= c->dstFormat;
 
-    flags= c->flags = update_flags_cpu(c->flags);
+    if (!(c->dsp_mask & AV_CPU_FLAG_FORCE))
+        c->dsp_mask = av_get_cpu_flags();
+    dsp_mask = c->dsp_mask;
+
+#if FF_API_SWS_CPUFLAGS
+    flags = c->flags = strip_cpu_flags(c->flags);
+#else
+    flags = c->flags;
+#endif /* FF_API_SWS_CPUFLAGS */
+
 #if ARCH_X86
-    if (flags & SWS_CPU_CAPS_MMX)
+    if (dsp_mask & AV_CPU_FLAG_MMX)
         __asm__ volatile("emms\n\t"::: "memory");
 #endif
-    if (!rgb15to16) sws_rgb2rgb_init(flags);
+    if (!rgb15to16) sws_rgb2rgb_init(dsp_mask);
 
     unscaled = (srcW == dstW && srcH == dstH);
 
@@ -864,7 +915,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, 
SwsFilter *dstFilter)
         }
     }
 
-    if (flags & SWS_CPU_CAPS_MMX2) {
+    if (dsp_mask & AV_CPU_FLAG_MMX2) {
         c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 
0;
         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && 
(flags&SWS_FAST_BILINEAR)) {
             if (flags&SWS_PRINT_INFO)
@@ -890,7 +941,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, 
SwsFilter *dstFilter)
             c->chrXInc+= 20;
         }
         //we don't use the x86 asm scaler if MMX is available
-        else if (flags & SWS_CPU_CAPS_MMX) {
+        else if (dsp_mask & AV_CPU_FLAG_MMX) {
             c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
             c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
         }
@@ -933,18 +984,20 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, 
SwsFilter *dstFilter)
 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
         {
             const int filterAlign=
-                (flags & SWS_CPU_CAPS_MMX) ? 4 :
-                (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+                (dsp_mask & AV_CPU_FLAG_MMX) ? 4 :
+                (dsp_mask & AV_CPU_FLAG_ALTIVEC) ? 8 :
                 1;
 
             if (initFilter(&c->hLumFilter, &c->hLumFilterPos, 
&c->hLumFilterSize, c->lumXInc,
                            srcW      ,       dstW, filterAlign, 1<<14,
                            (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
+                           dsp_mask,
                            srcFilter->lumH, dstFilter->lumH, c->param) < 0)
                 goto fail;
             if (initFilter(&c->hChrFilter, &c->hChrFilterPos, 
&c->hChrFilterSize, c->chrXInc,
                            c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
                            (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+                           dsp_mask,
                            srcFilter->chrH, dstFilter->chrH, c->param) < 0)
                 goto fail;
         }
@@ -953,18 +1006,20 @@ int sws_init_context(SwsContext *c, SwsFilter 
*srcFilter, SwsFilter *dstFilter)
     /* precalculate vertical scaler filter coefficients */
     {
         const int filterAlign=
-            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
-            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+            (dsp_mask & AV_CPU_FLAG_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
+            (dsp_mask & AV_CPU_FLAG_ALTIVEC) ? 8 :
             1;
 
         if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, 
c->lumYInc,
                        srcH      ,        dstH, filterAlign, (1<<12),
                        (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
+                       dsp_mask,
                        srcFilter->lumV, dstFilter->lumV, c->param) < 0)
             goto fail;
         if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, 
c->chrYInc,
                        c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
                        (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+                       dsp_mask,
                        srcFilter->chrV, dstFilter->chrV, c->param) < 0)
             goto fail;
 
@@ -1058,13 +1113,13 @@ int sws_init_context(SwsContext *c, SwsFilter 
*srcFilter, SwsFilter *dstFilter)
 #endif
                sws_format_name(dstFormat));
 
-        if      (flags & SWS_CPU_CAPS_MMX2)    av_log(c, AV_LOG_INFO, "using 
MMX2\n");
-        else if (flags & SWS_CPU_CAPS_3DNOW)   av_log(c, AV_LOG_INFO, "using 
3DNOW\n");
-        else if (flags & SWS_CPU_CAPS_MMX)     av_log(c, AV_LOG_INFO, "using 
MMX\n");
-        else if (flags & SWS_CPU_CAPS_ALTIVEC) av_log(c, AV_LOG_INFO, "using 
AltiVec\n");
-        else                                   av_log(c, AV_LOG_INFO, "using 
C\n");
+        if      (dsp_mask & AV_CPU_FLAG_MMX2)    av_log(c, AV_LOG_INFO, "using 
MMX2\n");
+        else if (dsp_mask & AV_CPU_FLAG_3DNOW)   av_log(c, AV_LOG_INFO, "using 
3DNOW\n");
+        else if (dsp_mask & AV_CPU_FLAG_MMX)     av_log(c, AV_LOG_INFO, "using 
MMX\n");
+        else if (dsp_mask & AV_CPU_FLAG_ALTIVEC) av_log(c, AV_LOG_INFO, "using 
AltiVec\n");
+        else                                     av_log(c, AV_LOG_INFO, "using 
C\n");
 
-        if (flags & SWS_CPU_CAPS_MMX) {
+        if (dsp_mask & AV_CPU_FLAG_MMX) {
             if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
                 av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for 
horizontal scaling\n");
             else {
@@ -1094,31 +1149,31 @@ int sws_init_context(SwsContext *c, SwsFilter 
*srcFilter, SwsFilter *dstFilter)
         }
         if (isPlanarYUV(dstFormat)) {
             if (c->vLumFilterSize==1)
-                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for 
vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for 
vertical scaling (YV12 like)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
             else
-                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical 
scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical 
scaling (YV12 like)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         } else {
             if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
                 av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for 
vertical luminance scaling (BGR)\n"
-                       "      2-tap scaler for vertical chrominance scaling 
(BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                       "      2-tap scaler for vertical chrominance scaling 
(BGR)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
             else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
-                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for 
vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for 
vertical scaling (BGR)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
             else
-                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical 
scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical 
scaling (BGR)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         }
 
         if (dstFormat==PIX_FMT_BGR24)
             av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
-                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & 
SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
+                   (dsp_mask & AV_CPU_FLAG_MMX2) ? "MMX2" : ((dsp_mask & 
AV_CPU_FLAG_MMX) ? "MMX" : "C"));
         else if (dstFormat==PIX_FMT_RGB32)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", 
(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", 
(dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         else if (dstFormat==PIX_FMT_BGR565)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", 
(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", 
(dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         else if (dstFormat==PIX_FMT_BGR555)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", 
(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", 
(dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         else if (dstFormat == PIX_FMT_RGB444BE || dstFormat == 
PIX_FMT_RGB444LE ||
                  dstFormat == PIX_FMT_BGR444BE || dstFormat == 
PIX_FMT_BGR444LE)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", 
(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", 
(dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
 
         av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
         av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d 
yInc=%d\n",
@@ -1143,7 +1198,14 @@ SwsContext *sws_getContext(int srcW, int srcH, enum 
PixelFormat srcFormat,
     if(!(c=sws_alloc_context()))
         return NULL;
 
-    c->flags= flags;
+#if FF_API_SWS_CPUFLAGS
+    /* if any SWS_CPU_CAPS_* is set do not use auto detection */
+    if (update_flags_cpu(c->flags) != strip_cpu_flags(c->flags))
+        c->dsp_mask = 
AV_CPU_FLAG_FORCE|sws_cpu_caps2av_cpu_flags(update_flags_cpu(flags));
+    c->flags= strip_cpu_flags(flags);
+#else
+    c->flags = flags;
+#endif /* FF_API_SWS_CPUFLAGS */
     c->srcW= srcW;
     c->srcH= srcH;
     c->dstW= dstW;
@@ -1529,11 +1591,17 @@ struct SwsContext *sws_getCachedContext(struct 
SwsContext *context,
                                         SwsFilter *srcFilter, SwsFilter 
*dstFilter, const double *param)
 {
     static const double default_param[2] = {SWS_PARAM_DEFAULT, 
SWS_PARAM_DEFAULT};
+    unsigned dsp_mask = av_get_cpu_flags();;
 
     if (!param)
         param = default_param;
 
-    flags = update_flags_cpu(flags);
+#if FF_API_SWS_CPUFLAGS
+    // use SWS_CPU_CAP* if set
+    if (update_flags_cpu(flags) != strip_cpu_flags(flags))
+        dsp_mask = 
AV_CPU_FLAG_FORCE|sws_cpu_caps2av_cpu_flags(update_flags_cpu(flags));
+    flags = strip_cpu_flags(flags);
+#endif /* FF_API_SWS_CPUFLAGS */
 
     if (context &&
         (context->srcW      != srcW      ||
@@ -1543,6 +1611,7 @@ struct SwsContext *sws_getCachedContext(struct SwsContext 
*context,
          context->dstH      != dstH      ||
          context->dstFormat != dstFormat ||
          context->flags     != flags     ||
+         context->dsp_mask  != dsp_mask  ||
          context->param[0]  != param[0]  ||
          context->param[1]  != param[1])) {
         sws_freeContext(context);
@@ -1561,6 +1630,7 @@ struct SwsContext *sws_getCachedContext(struct SwsContext 
*context,
         context->dstRange  = handle_jpeg(&dstFormat);
         context->dstFormat = dstFormat;
         context->flags     = flags;
+        context->dsp_mask  = dsp_mask;
         context->param[0]  = param[0];
         context->param[1]  = param[1];
         sws_setColorspaceDetails(context, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], 
context->srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, 
context->dstRange, 0, 1<<16, 1<<16);
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index e84bc1b..2085acc 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -27,6 +27,7 @@
 
 #include "config.h"
 #include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
 #include "libavutil/bswap.h"
 #include "libswscale/rgb2rgb.h"
 #include "libswscale/swscale.h"
@@ -122,16 +123,16 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 
0x0000001f0000001fULL;
  32-bit C version, and and&add trick by Michael Niedermayer
 */
 
-void rgb2rgb_init_x86(int flags)
+void rgb2rgb_init_x86(unsigned dsp_mask)
 {
 #if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
-    if (flags & SWS_CPU_CAPS_SSE2)
+    if (dsp_mask & AV_CPU_FLAG_SSE2)
         rgb2rgb_init_SSE2();
-    else if (flags & SWS_CPU_CAPS_MMX2)
+    else if (dsp_mask & AV_CPU_FLAG_MMX2)
         rgb2rgb_init_MMX2();
-    else if (flags & SWS_CPU_CAPS_3DNOW)
+    else if (dsp_mask & AV_CPU_FLAG_3DNOW)
         rgb2rgb_init_3DNOW();
-    else if (flags & SWS_CPU_CAPS_MMX)
+    else if (dsp_mask & AV_CPU_FLAG_MMX)
         rgb2rgb_init_MMX();
 #endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
 }
diff --git a/libswscale/x86/swscale_template.c 
b/libswscale/x86/swscale_template.c
index 59ea2be..d478b2c 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2386,6 +2386,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* 
src[], int srcStride[],
     const int chrXInc= c->chrXInc;
     const enum PixelFormat dstFormat= c->dstFormat;
     const int flags= c->flags;
+    const unsigned dsp_mask= c->dsp_mask;
     int16_t *vLumFilterPos= c->vLumFilterPos;
     int16_t *vChrFilterPos= c->vChrFilterPos;
     int16_t *hLumFilterPos= c->hLumFilterPos;
@@ -2719,10 +2720,10 @@ static int RENAME(swScale)(SwsContext *c, const 
uint8_t* src[], int srcStride[],
     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 
-    if (flags & SWS_CPU_CAPS_MMX2 )  __asm__ volatile("sfence":::"memory");
+    if (dsp_mask & AV_CPU_FLAG_MMX2)  __asm__ volatile("sfence":::"memory");
     /* On K6 femms is faster than emms. On K7 femms is directly mapped to 
emms. */
-    if (flags & SWS_CPU_CAPS_3DNOW)  __asm__ volatile("femms" :::"memory");
-    else                             __asm__ volatile("emms"  :::"memory");
+    if (dsp_mask & AV_CPU_FLAG_3DNOW) __asm__ volatile("femms" :::"memory");
+    else                              __asm__ volatile("emms"  :::"memory");
     /* store changed local vars back in the context */
     c->dstY= dstY;
     c->lumBufIndex= lumBufIndex;
diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c
index ff3a93d..6f69b3f 100644
--- a/libswscale/x86/yuv2rgb_mmx.c
+++ b/libswscale/x86/yuv2rgb_mmx.c
@@ -34,6 +34,7 @@
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"
 #include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
 
 #define DITHER1XBPP // only for MMX
 
@@ -63,7 +64,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
 {
-    if (c->flags & SWS_CPU_CAPS_MMX2) {
+    if (c->dsp_mask & AV_CPU_FLAG_MMX2) {
         switch (c->dstFormat) {
         case PIX_FMT_RGB32:
             if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
@@ -81,7 +82,7 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
         case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
         }
     }
-    if (c->flags & SWS_CPU_CAPS_MMX) {
+    if (c->dsp_mask & AV_CPU_FLAG_MMX) {
         switch (c->dstFormat) {
         case PIX_FMT_RGB32:
             if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 8ea41af..a983eda 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -553,14 +553,12 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
     t = ff_yuv2rgb_init_mlib(c);
 #endif
 #if HAVE_ALTIVEC
-    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
+    if (c->dsp_mask & AV_CPU_FLAG_ALTIVEC)
         t = ff_yuv2rgb_init_altivec(c);
 #endif
 
-#if ARCH_BFIN
-    if (c->flags & SWS_CPU_CAPS_BFIN)
+    if (ARCH_BFIN)
         t = ff_yuv2rgb_get_func_ptr_bfin(c);
-#endif
 
     if (t)
         return t;
-- 
1.7.4

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to