From: Janne Grunau <[email protected]>
port swscale to AV_CPU_FLAGS* and use av_get_cpu_flags() for runtime cpu
detection. Still only used with --enable-runtime-cpudetect. Next step is
getting rid of all #if !CONFIG_RUNTIME_CPUDETECT code in libswscale or
enable it by default to get the same behaviour as in libavcodec.
Is there a way to enable deprecation warnings for defines?
APIChanges entry is missing.
Janne
---8<---
use AV_CPU_FLAG_* internally and deprecate SWS_CPU_CAPS_*
add dsp_mask to swsContext and AVOptions to fill them
auto detection is only done if no SWS_CPU_CAPS are set in flags and
dsp_mask does not contain AV_CPU_FLAG_FORCE
---
libswscale/bfin/swscale_bfin.c | 1 -
libswscale/colorspace-test.c | 8 +-
libswscale/options.c | 11 +++
libswscale/ppc/yuv2rgb_altivec.c | 2 +-
libswscale/rgb2rgb.c | 5 +-
libswscale/rgb2rgb.h | 4 +-
libswscale/swscale.c | 27 +++----
libswscale/swscale.h | 6 ++
libswscale/swscale_internal.h | 1 +
libswscale/utils.c | 138 ++++++++++++++++++++++++++++---------
libswscale/x86/rgb2rgb.c | 11 ++--
libswscale/x86/swscale_template.c | 7 +-
libswscale/x86/yuv2rgb_mmx.c | 5 +-
libswscale/yuv2rgb.c | 6 +-
14 files changed, 158 insertions(+), 74 deletions(-)
diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c
index fa3c03b..02fcfb5 100644
--- a/libswscale/bfin/swscale_bfin.c
+++ b/libswscale/bfin/swscale_bfin.c
@@ -79,7 +79,6 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[],
int srcStride[], i
void ff_bfin_get_unscaled_swscale(SwsContext *c)
{
SwsFunc swScale = c->swScale;
- if (c->flags & SWS_CPU_CAPS_BFIN)
if (c->dstFormat == PIX_FMT_YUV420P)
if (c->srcFormat == PIX_FMT_UYVY422) {
av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized
uyvytoyv12_unscaled\n");
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index 4e7116f..914b824 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -33,7 +33,7 @@
#define FUNC(s,d,n) {s,d,#n,n}
-static int cpu_caps;
+static unsigned cpu_caps;
static char *args_parse(int argc, char *argv[])
{
@@ -42,13 +42,13 @@ static char *args_parse(int argc, char *argv[])
while ((o = getopt(argc, argv, "m23")) != -1) {
switch (o) {
case 'm':
- cpu_caps |= SWS_CPU_CAPS_MMX;
+ cpu_caps |= AV_CPU_FLAG_MMX;
break;
case '2':
- cpu_caps |= SWS_CPU_CAPS_MMX2;
+ cpu_caps |= AV_CPU_FLAG_MMX2;
break;
case '3':
- cpu_caps |= SWS_CPU_CAPS_3DNOW;
+ cpu_caps |= AV_CPU_FLAG_3DNOW;
break;
default:
av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o);
diff --git a/libswscale/options.c b/libswscale/options.c
index 59b9994..92b9789 100644
--- a/libswscale/options.c
+++ b/libswscale/options.c
@@ -19,6 +19,7 @@
*/
#include "libavutil/avutil.h"
+#include "libavutil/cpu.h"
#include "libavutil/pixfmt.h"
#include "libavcodec/opt.h"
#include "swscale.h"
@@ -48,12 +49,14 @@ static const AVOption options[] = {
{ "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, SWS_SPLINE,
INT_MIN, INT_MAX, VE, "sws_flags" },
{ "print_info", "print info", 0, FF_OPT_TYPE_CONST, SWS_PRINT_INFO,
INT_MIN, INT_MAX, VE, "sws_flags" },
{ "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST,
SWS_ACCURATE_RND, INT_MIN, INT_MAX, VE, "sws_flags" },
+#if FF_API_SWS_CPUFLAGS
{ "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX,
INT_MIN, INT_MAX, VE, "sws_flags" },
{ "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST,
SWS_CPU_CAPS_MMX2, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST,
SWS_CPU_CAPS_SSE2, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST,
SWS_CPU_CAPS_3DNOW, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST,
SWS_CPU_CAPS_ALTIVEC, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST,
SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
+#endif /* FF_API_SWS_CPUFLAGS */
{ "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST,
SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST,
SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "bitexact", "", 0 , FF_OPT_TYPE_CONST, SWS_BITEXACT, INT_MIN, INT_MAX,
VE, "sws_flags" },
@@ -69,6 +72,14 @@ static const AVOption options[] = {
{ "param0" , "scaler param 0" , OFFSET(param[0]) , FF_OPT_TYPE_DOUBLE,
SWS_PARAM_DEFAULT, INT_MIN, INT_MAX, VE },
{ "param1" , "scaler param 1" , OFFSET(param[1]) , FF_OPT_TYPE_DOUBLE,
SWS_PARAM_DEFAULT, INT_MIN, INT_MAX, VE },
+ { "sws_dsp_mask", "override cpu flags", OFFSET(dsp_mask),
FF_OPT_TYPE_FLAGS, DEFAULT, 0, UINT_MAX, VE, "sws_dsp_mask" },
+ { "null", "no SIMD acceleration", 0, FF_OPT_TYPE_CONST, AV_CPU_FLAG_FORCE,
INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+ { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST,
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_MMX, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+ { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST,
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_MMX2, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+ { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST,
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_SSE2, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+ { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST,
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_3DNOW, INT_MIN, INT_MAX, VE, "sws_dsp_mask" },
+ { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST,
AV_CPU_FLAG_FORCE|AV_CPU_FLAG_ALTIVEC, INT_MIN, UINT_MAX, VE, "sws_dsp_mask" },
+
{ NULL }
};
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 0113c8d..6d15cb9 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -692,7 +692,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c,
*/
SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
{
- if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
+ if (!(c->dsp_mask & AV_CPU_FLAG_ALTIVEC))
return NULL;
/*
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 12d6287..92a4b37 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -25,6 +25,7 @@
#include <inttypes.h>
#include "config.h"
#include "libavutil/bswap.h"
+#include "libavutil/cpu.h"
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
@@ -116,11 +117,11 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst,
uint8_t *vdst, const uint8_t
32-bit C version, and and&add trick by Michael Niedermayer
*/
-void sws_rgb2rgb_init(int flags)
+void sws_rgb2rgb_init(unsigned dsp_mask)
{
rgb2rgb_init_c();
#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
- rgb2rgb_init_x86(flags);
+ rgb2rgb_init_x86(dsp_mask);
#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
}
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index bde1134..8028cc0 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -166,8 +166,8 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst,
uint8_t *vdst, const u
long width, long height,
long lumStride, long chromStride, long srcStride);
-void sws_rgb2rgb_init(int flags);
+void sws_rgb2rgb_init(unsigned dsp_mask);
-void rgb2rgb_init_x86(int flags);
+void rgb2rgb_init_x86(unsigned dsp_mask);
#endif /* SWSCALE_RGB2RGB_H */
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index b77cc2e..2c22077 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -65,6 +65,7 @@ untested special converters
#include "libavutil/avutil.h"
#include "libavutil/mathematics.h"
#include "libavutil/bswap.h"
+#include "libavutil/cpu.h"
#include "libavutil/pixdesc.h"
#undef MOVNTQ
@@ -1200,24 +1201,22 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
sws_init_swScale_c(c);
#if CONFIG_RUNTIME_CPUDETECT
- int flags = c->flags;
-
#if ARCH_X86
// ordered per speed fastest first
- if (flags & SWS_CPU_CAPS_MMX2) {
+ if (c->dsp_mask & AV_CPU_FLAG_MMX2) {
sws_init_swScale_MMX2(c);
return swScale_MMX2;
- } else if (flags & SWS_CPU_CAPS_3DNOW) {
+ } else if (c->dsp_mask & AV_CPU_FLAG_3DNOW) {
sws_init_swScale_3DNow(c);
return swScale_3DNow;
- } else if (flags & SWS_CPU_CAPS_MMX) {
+ } else if (c->dsp_mask & AV_CPU_FLAG_MMX) {
sws_init_swScale_MMX(c);
return swScale_MMX;
}
#else
#if COMPILE_ALTIVEC
- if (flags & SWS_CPU_CAPS_ALTIVEC) {
+ if (c->dsp_mask & AV_CPU_FLAG_ALTIVEC) {
sws_init_swScale_altivec(c);
return swScale_altivec;
}
@@ -1665,15 +1664,13 @@ int ff_hardcodedcpuflags(void)
{
int flags = 0;
#if COMPILE_TEMPLATE_MMX2
- flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
+ flags |= AV_CPU_FLAG_MMX|AV_CPU_FLAG_MMX2;
#elif COMPILE_TEMPLATE_AMD3DNOW
- flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
+ flags |= AV_CPU_FLAG_MMX|AV_CPU_FLAG_3DNOW;
#elif COMPILE_TEMPLATE_MMX
- flags |= SWS_CPU_CAPS_MMX;
+ flags |= AV_CPU_FLAG_MMX;
#elif COMPILE_TEMPLATE_ALTIVEC
- flags |= SWS_CPU_CAPS_ALTIVEC;
-#elif ARCH_BFIN
- flags |= SWS_CPU_CAPS_BFIN;
+ flags |= AV_CPU_FLAG_ALTIVEC;
#endif
return flags;
}
@@ -1760,7 +1757,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
c->swScale= uyvyToYuv422Wrapper;
#if COMPILE_ALTIVEC
- if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
+ if ((c->dsp_mask & AV_CPU_FLAG_ALTIVEC) &&
!(c->flags & SWS_BITEXACT) &&
srcFormat == PIX_FMT_YUV420P) {
// unscaled YV12 -> packed YUV, we want speed
@@ -1789,10 +1786,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
else /* Planar YUV or gray */
c->swScale= planarCopyWrapper;
}
-#if ARCH_BFIN
- if (flags & SWS_CPU_CAPS_BFIN)
+ if (ARCH_BFIN)
ff_bfin_get_unscaled_swscale (c);
-#endif
}
static void reset_ptr(const uint8_t* src[], int format)
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index aae1a6c..22dcb8e 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -51,6 +51,10 @@
#define FF_API_SWS_GETCONTEXT (LIBSWSCALE_VERSION_MAJOR < 2)
#endif
+#ifndef FF_API_SWS_CPUFLAGS
+#define FF_API_SWS_CPUFLAGS (LIBSWSCALE_VERSION_MAJOR < 2)
+#endif
+
/**
* Returns the LIBSWSCALE_VERSION_INT constant.
*/
@@ -95,12 +99,14 @@ const char *swscale_license(void);
#define SWS_ACCURATE_RND 0x40000
#define SWS_BITEXACT 0x80000
+#if FF_API_SWS_CPUFLAGS
#define SWS_CPU_CAPS_MMX 0x80000000
#define SWS_CPU_CAPS_MMX2 0x20000000
#define SWS_CPU_CAPS_3DNOW 0x40000000
#define SWS_CPU_CAPS_ALTIVEC 0x10000000
#define SWS_CPU_CAPS_BFIN 0x01000000
#define SWS_CPU_CAPS_SSE2 0x02000000
+#endif
#define SWS_MAX_REDUCE_CUTOFF 0.002
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 4c6ad2a..91fdf24 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -316,6 +316,7 @@ typedef struct SwsContext {
int needs_hcscale; ///< Set if there are chroma planes to be converted.
+ unsigned dsp_mask; ///< Flags passed by the user to select optimizations
} SwsContext;
//FIXME check init (where 0)
diff --git a/libswscale/utils.c b/libswscale/utils.c
index bf61dfd..7f5a900 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -43,6 +43,7 @@
#include "libavutil/x86_cpu.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
+#include "libavutil/cpu.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
@@ -173,6 +174,7 @@ static double getSplineCoeff(double a, double b, double c,
double d, double dist
static int initFilter(int16_t **outFilter, int16_t **filterPos, int
*outFilterSize, int xInc,
int srcW, int dstW, int filterAlign, int one, int flags,
+ unsigned dsp_mask,
SwsVector *srcFilter, SwsVector *dstFilter, double
param[2])
{
int i;
@@ -184,7 +186,7 @@ static int initFilter(int16_t **outFilter, int16_t
**filterPos, int *outFilterSi
const int64_t fone= 1LL<<54;
int ret= -1;
#if ARCH_X86
- if (flags & SWS_CPU_CAPS_MMX)
+ if (dsp_mask & AV_CPU_FLAG_MMX)
__asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be
required but it IS (even for non-MMX versions)
#endif
@@ -403,7 +405,7 @@ static int initFilter(int16_t **outFilter, int16_t
**filterPos, int *outFilterSi
if (min>minFilterSize) minFilterSize= min;
}
- if (flags & SWS_CPU_CAPS_ALTIVEC) {
+ if (dsp_mask & AV_CPU_FLAG_ALTIVEC) {
// we can handle the special case 4,
// so we don't want to go to the full 8
if (minFilterSize < 5)
@@ -418,7 +420,7 @@ static int initFilter(int16_t **outFilter, int16_t
**filterPos, int *outFilterSi
filterAlign = 1;
}
- if (flags & SWS_CPU_CAPS_MMX) {
+ if (dsp_mask & AV_CPU_FLAG_MMX) {
// special case for unscaled vertical filtering
if (minFilterSize == 1 && filterAlign == 2)
filterAlign= 1;
@@ -674,7 +676,11 @@ static void getSubSampleFactors(int *h, int *v, enum
PixelFormat format)
*v = av_pix_fmt_descriptors[format].log2_chroma_h;
}
+#if FF_API_SWS_CPUFLAGS
static int update_flags_cpu(int flags);
+static unsigned sws_cpu_caps2av_cpu_flags(int flags);
+static int strip_cpu_flags(int flags);
+#endif
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int
srcRange, const int table[4], int dstRange, int brightness, int contrast, int
saturation)
{
@@ -690,13 +696,20 @@ int sws_setColorspaceDetails(SwsContext *c, const int
inv_table[4], int srcRange
c->dstFormatBpp =
av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->dstFormat]);
c->srcFormatBpp =
av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->srcFormat]);
- c->flags = update_flags_cpu(c->flags);
+
+ if (!(c->dsp_mask & AV_CPU_FLAG_FORCE))
+ c->dsp_mask = av_get_cpu_flags();
+#if FF_API_SWS_CPUFLAGS
+ // if any SWS_CPU_CAPS_* is set do not use auto detection
+ if (update_flags_cpu(c->flags) != strip_cpu_flags(c->flags))
+ c->dsp_mask = sws_cpu_caps2av_cpu_flags(update_flags_cpu(c->flags));
+#endif
ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast,
saturation);
//FIXME factorize
#if HAVE_ALTIVEC
- if (c->flags & SWS_CPU_CAPS_ALTIVEC)
+ if (c->dsp_mask & AV_CPU_FLAG_ALTIVEC)
ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast,
saturation);
#endif
return 0;
@@ -728,19 +741,27 @@ static int handle_jpeg(enum PixelFormat *format)
}
}
-static int update_flags_cpu(int flags)
+#if FF_API_SWS_CPUFLAGS
+static int strip_cpu_flags(int flags)
{
-#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled
variant if cpudetect is off
flags &= ~( SWS_CPU_CAPS_MMX
|SWS_CPU_CAPS_MMX2
|SWS_CPU_CAPS_3DNOW
|SWS_CPU_CAPS_SSE2
|SWS_CPU_CAPS_ALTIVEC
|SWS_CPU_CAPS_BFIN);
+ return flags;
+}
+
+static int update_flags_cpu(int flags)
+{
+#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled
variant if cpudetect is off
+ flags = strip_cpu_flags(flags);
flags |= ff_hardcodedcpuflags();
#endif /* CONFIG_RUNTIME_CPUDETECT */
return flags;
}
+#endif
SwsContext *sws_alloc_context(void)
{
@@ -752,6 +773,26 @@ SwsContext *sws_alloc_context(void)
return c;
}
+#if FF_API_SWS_CPUFLAGS
+static unsigned sws_cpu_caps2av_cpu_flags(int flags)
+{
+ unsigned cpuflags = 0;
+
+ if (ARCH_X86 && flags & SWS_CPU_CAPS_MMX)
+ cpuflags |= AV_CPU_FLAG_MMX;
+ if (ARCH_X86 && flags & SWS_CPU_CAPS_MMX2)
+ cpuflags |= AV_CPU_FLAG_MMX2;
+ if (ARCH_X86 && flags & SWS_CPU_CAPS_3DNOW)
+ cpuflags |= AV_CPU_FLAG_3DNOW;;
+ if (ARCH_X86 && flags & SWS_CPU_CAPS_SSE2)
+ cpuflags |= AV_CPU_FLAG_SSE2;
+ if (ARCH_PPC && flags & SWS_CPU_CAPS_ALTIVEC)
+ cpuflags |= AV_CPU_FLAG_ALTIVEC;
+
+ return cpuflags;
+}
+#endif
+
int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
{
int i;
@@ -763,15 +804,25 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
int dstW= c->dstW;
int dstH= c->dstH;
int flags;
+ unsigned dsp_mask;
enum PixelFormat srcFormat= c->srcFormat;
enum PixelFormat dstFormat= c->dstFormat;
- flags= c->flags = update_flags_cpu(c->flags);
+ if (!(c->dsp_mask & AV_CPU_FLAG_FORCE))
+ c->dsp_mask = av_get_cpu_flags();
+ dsp_mask = c->dsp_mask;
+
+#if FF_API_SWS_CPUFLAGS
+ flags = c->flags = strip_cpu_flags(c->flags);
+#else
+ flags = c->flags;
+#endif /* FF_API_SWS_CPUFLAGS */
+
#if ARCH_X86
- if (flags & SWS_CPU_CAPS_MMX)
+ if (dsp_mask & AV_CPU_FLAG_MMX)
__asm__ volatile("emms\n\t"::: "memory");
#endif
- if (!rgb15to16) sws_rgb2rgb_init(flags);
+ if (!rgb15to16) sws_rgb2rgb_init(dsp_mask);
unscaled = (srcW == dstW && srcH == dstH);
@@ -864,7 +915,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
}
}
- if (flags & SWS_CPU_CAPS_MMX2) {
+ if (dsp_mask & AV_CPU_FLAG_MMX2) {
c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 :
0;
if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 &&
(flags&SWS_FAST_BILINEAR)) {
if (flags&SWS_PRINT_INFO)
@@ -890,7 +941,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
c->chrXInc+= 20;
}
//we don't use the x86 asm scaler if MMX is available
- else if (flags & SWS_CPU_CAPS_MMX) {
+ else if (dsp_mask & AV_CPU_FLAG_MMX) {
c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
}
@@ -933,18 +984,20 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
#endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
{
const int filterAlign=
- (flags & SWS_CPU_CAPS_MMX) ? 4 :
- (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+ (dsp_mask & AV_CPU_FLAG_MMX) ? 4 :
+ (dsp_mask & AV_CPU_FLAG_ALTIVEC) ? 8 :
1;
if (initFilter(&c->hLumFilter, &c->hLumFilterPos,
&c->hLumFilterSize, c->lumXInc,
srcW , dstW, filterAlign, 1<<14,
(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
+ dsp_mask,
srcFilter->lumH, dstFilter->lumH, c->param) < 0)
goto fail;
if (initFilter(&c->hChrFilter, &c->hChrFilterPos,
&c->hChrFilterSize, c->chrXInc,
c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+ dsp_mask,
srcFilter->chrH, dstFilter->chrH, c->param) < 0)
goto fail;
}
@@ -953,18 +1006,20 @@ int sws_init_context(SwsContext *c, SwsFilter
*srcFilter, SwsFilter *dstFilter)
/* precalculate vertical scaler filter coefficients */
{
const int filterAlign=
- (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
- (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+ (dsp_mask & AV_CPU_FLAG_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
+ (dsp_mask & AV_CPU_FLAG_ALTIVEC) ? 8 :
1;
if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
c->lumYInc,
srcH , dstH, filterAlign, (1<<12),
(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
+ dsp_mask,
srcFilter->lumV, dstFilter->lumV, c->param) < 0)
goto fail;
if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
c->chrYInc,
c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+ dsp_mask,
srcFilter->chrV, dstFilter->chrV, c->param) < 0)
goto fail;
@@ -1058,13 +1113,13 @@ int sws_init_context(SwsContext *c, SwsFilter
*srcFilter, SwsFilter *dstFilter)
#endif
sws_format_name(dstFormat));
- if (flags & SWS_CPU_CAPS_MMX2) av_log(c, AV_LOG_INFO, "using
MMX2\n");
- else if (flags & SWS_CPU_CAPS_3DNOW) av_log(c, AV_LOG_INFO, "using
3DNOW\n");
- else if (flags & SWS_CPU_CAPS_MMX) av_log(c, AV_LOG_INFO, "using
MMX\n");
- else if (flags & SWS_CPU_CAPS_ALTIVEC) av_log(c, AV_LOG_INFO, "using
AltiVec\n");
- else av_log(c, AV_LOG_INFO, "using
C\n");
+ if (dsp_mask & AV_CPU_FLAG_MMX2) av_log(c, AV_LOG_INFO, "using
MMX2\n");
+ else if (dsp_mask & AV_CPU_FLAG_3DNOW) av_log(c, AV_LOG_INFO, "using
3DNOW\n");
+ else if (dsp_mask & AV_CPU_FLAG_MMX) av_log(c, AV_LOG_INFO, "using
MMX\n");
+ else if (dsp_mask & AV_CPU_FLAG_ALTIVEC) av_log(c, AV_LOG_INFO, "using
AltiVec\n");
+ else av_log(c, AV_LOG_INFO, "using
C\n");
- if (flags & SWS_CPU_CAPS_MMX) {
+ if (dsp_mask & AV_CPU_FLAG_MMX) {
if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for
horizontal scaling\n");
else {
@@ -1094,31 +1149,31 @@ int sws_init_context(SwsContext *c, SwsFilter
*srcFilter, SwsFilter *dstFilter)
}
if (isPlanarYUV(dstFormat)) {
if (c->vLumFilterSize==1)
- av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for
vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for
vertical scaling (YV12 like)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else
- av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical
scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical
scaling (YV12 like)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
} else {
if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for
vertical luminance scaling (BGR)\n"
- " 2-tap scaler for vertical chrominance scaling
(BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ " 2-tap scaler for vertical chrominance scaling
(BGR)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
- av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for
vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for
vertical scaling (BGR)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else
- av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical
scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical
scaling (BGR)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
}
if (dstFormat==PIX_FMT_BGR24)
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
- (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags &
SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
+ (dsp_mask & AV_CPU_FLAG_MMX2) ? "MMX2" : ((dsp_mask &
AV_CPU_FLAG_MMX) ? "MMX" : "C"));
else if (dstFormat==PIX_FMT_RGB32)
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n",
(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n",
(dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else if (dstFormat==PIX_FMT_BGR565)
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n",
(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n",
(dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else if (dstFormat==PIX_FMT_BGR555)
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n",
(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n",
(dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else if (dstFormat == PIX_FMT_RGB444BE || dstFormat ==
PIX_FMT_RGB444LE ||
dstFormat == PIX_FMT_BGR444BE || dstFormat ==
PIX_FMT_BGR444LE)
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n",
(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n",
(dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d
yInc=%d\n",
@@ -1143,7 +1198,14 @@ SwsContext *sws_getContext(int srcW, int srcH, enum
PixelFormat srcFormat,
if(!(c=sws_alloc_context()))
return NULL;
- c->flags= flags;
+#if FF_API_SWS_CPUFLAGS
+ /* if any SWS_CPU_CAPS_* is set do not use auto detection */
+ if (update_flags_cpu(c->flags) != strip_cpu_flags(c->flags))
+ c->dsp_mask =
AV_CPU_FLAG_FORCE|sws_cpu_caps2av_cpu_flags(update_flags_cpu(flags));
+ c->flags= strip_cpu_flags(flags);
+#else
+ c->flags = flags;
+#endif /* FF_API_SWS_CPUFLAGS */
c->srcW= srcW;
c->srcH= srcH;
c->dstW= dstW;
@@ -1529,11 +1591,17 @@ struct SwsContext *sws_getCachedContext(struct
SwsContext *context,
SwsFilter *srcFilter, SwsFilter
*dstFilter, const double *param)
{
static const double default_param[2] = {SWS_PARAM_DEFAULT,
SWS_PARAM_DEFAULT};
+ unsigned dsp_mask = av_get_cpu_flags();;
if (!param)
param = default_param;
- flags = update_flags_cpu(flags);
+#if FF_API_SWS_CPUFLAGS
+ // use SWS_CPU_CAP* if set
+ if (update_flags_cpu(flags) != strip_cpu_flags(flags))
+ dsp_mask =
AV_CPU_FLAG_FORCE|sws_cpu_caps2av_cpu_flags(update_flags_cpu(flags));
+ flags = strip_cpu_flags(flags);
+#endif /* FF_API_SWS_CPUFLAGS */
if (context &&
(context->srcW != srcW ||
@@ -1543,6 +1611,7 @@ struct SwsContext *sws_getCachedContext(struct SwsContext
*context,
context->dstH != dstH ||
context->dstFormat != dstFormat ||
context->flags != flags ||
+ context->dsp_mask != dsp_mask ||
context->param[0] != param[0] ||
context->param[1] != param[1])) {
sws_freeContext(context);
@@ -1561,6 +1630,7 @@ struct SwsContext *sws_getCachedContext(struct SwsContext
*context,
context->dstRange = handle_jpeg(&dstFormat);
context->dstFormat = dstFormat;
context->flags = flags;
+ context->dsp_mask = dsp_mask;
context->param[0] = param[0];
context->param[1] = param[1];
sws_setColorspaceDetails(context, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT],
context->srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/,
context->dstRange, 0, 1<<16, 1<<16);
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index e84bc1b..2085acc 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -27,6 +27,7 @@
#include "config.h"
#include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
#include "libavutil/bswap.h"
#include "libswscale/rgb2rgb.h"
#include "libswscale/swscale.h"
@@ -122,16 +123,16 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) =
0x0000001f0000001fULL;
32-bit C version, and and&add trick by Michael Niedermayer
*/
-void rgb2rgb_init_x86(int flags)
+void rgb2rgb_init_x86(unsigned dsp_mask)
{
#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
- if (flags & SWS_CPU_CAPS_SSE2)
+ if (dsp_mask & AV_CPU_FLAG_SSE2)
rgb2rgb_init_SSE2();
- else if (flags & SWS_CPU_CAPS_MMX2)
+ else if (dsp_mask & AV_CPU_FLAG_MMX2)
rgb2rgb_init_MMX2();
- else if (flags & SWS_CPU_CAPS_3DNOW)
+ else if (dsp_mask & AV_CPU_FLAG_3DNOW)
rgb2rgb_init_3DNOW();
- else if (flags & SWS_CPU_CAPS_MMX)
+ else if (dsp_mask & AV_CPU_FLAG_MMX)
rgb2rgb_init_MMX();
#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
}
diff --git a/libswscale/x86/swscale_template.c
b/libswscale/x86/swscale_template.c
index 59ea2be..d478b2c 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2386,6 +2386,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t*
src[], int srcStride[],
const int chrXInc= c->chrXInc;
const enum PixelFormat dstFormat= c->dstFormat;
const int flags= c->flags;
+ const unsigned dsp_mask= c->dsp_mask;
int16_t *vLumFilterPos= c->vLumFilterPos;
int16_t *vChrFilterPos= c->vChrFilterPos;
int16_t *hLumFilterPos= c->hLumFilterPos;
@@ -2719,10 +2720,10 @@ static int RENAME(swScale)(SwsContext *c, const
uint8_t* src[], int srcStride[],
if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
- if (flags & SWS_CPU_CAPS_MMX2 ) __asm__ volatile("sfence":::"memory");
+ if (dsp_mask & AV_CPU_FLAG_MMX2) __asm__ volatile("sfence":::"memory");
/* On K6 femms is faster than emms. On K7 femms is directly mapped to
emms. */
- if (flags & SWS_CPU_CAPS_3DNOW) __asm__ volatile("femms" :::"memory");
- else __asm__ volatile("emms" :::"memory");
+ if (dsp_mask & AV_CPU_FLAG_3DNOW) __asm__ volatile("femms" :::"memory");
+ else __asm__ volatile("emms" :::"memory");
/* store changed local vars back in the context */
c->dstY= dstY;
c->lumBufIndex= lumBufIndex;
diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c
index ff3a93d..6f69b3f 100644
--- a/libswscale/x86/yuv2rgb_mmx.c
+++ b/libswscale/x86/yuv2rgb_mmx.c
@@ -34,6 +34,7 @@
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
#define DITHER1XBPP // only for MMX
@@ -63,7 +64,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
{
- if (c->flags & SWS_CPU_CAPS_MMX2) {
+ if (c->dsp_mask & AV_CPU_FLAG_MMX2) {
switch (c->dstFormat) {
case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
@@ -81,7 +82,7 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
}
}
- if (c->flags & SWS_CPU_CAPS_MMX) {
+ if (c->dsp_mask & AV_CPU_FLAG_MMX) {
switch (c->dstFormat) {
case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 8ea41af..a983eda 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -553,14 +553,12 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
t = ff_yuv2rgb_init_mlib(c);
#endif
#if HAVE_ALTIVEC
- if (c->flags & SWS_CPU_CAPS_ALTIVEC)
+ if (c->dsp_mask & AV_CPU_FLAG_ALTIVEC)
t = ff_yuv2rgb_init_altivec(c);
#endif
-#if ARCH_BFIN
- if (c->flags & SWS_CPU_CAPS_BFIN)
+ if (ARCH_BFIN)
t = ff_yuv2rgb_get_func_ptr_bfin(c);
-#endif
if (t)
return t;
--
1.7.4
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel