From 1d43d3143a36022c5eed7ac82bb95926d14729a0 Mon Sep 17 00:00:00 2001
From: Zhislina <Victoria.Zhislina@intel.com>
Date: Mon, 3 May 2021 21:53:54 +0300
Subject: [PATCH] Intel IPP library based x86 optimized video scaling.

Uses vector instructions up to AVX512 for suitable CPUs and multithreading
to provide up to 16x performance increase vs libswscale.
Introduces superscaling interpolation method for video downscale.
Adds antialiasing option to linear, cubic and lanczos interpolation.

Signed-off-by: Zhislina <Victoria.Zhislina@intel.com>
---
 configure                  |   15 +-
 doc/filters.texi           |   48 ++
 libavfilter/Makefile       |    1 +
 libavfilter/allfilters.c   |    1 +
 libavfilter/vf_scale_ipp.c | 1275 ++++++++++++++++++++++++++++++++++++
 5 files changed, 1339 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_scale_ipp.c

diff --git a/configure b/configure
index 6580859ef2..e28f4ba8a9 100755
--- a/configure
+++ b/configure
@@ -240,6 +240,7 @@ External library support:
   --enable-libgsm          enable GSM de/encoding via libgsm [no]
   --enable-libiec61883     enable iec61883 via libiec61883 [no]
   --enable-libilbc         enable iLBC de/encoding via libilbc [no]
+  --enable-libipp          enable Intel IPP libary based scaling [no]
   --enable-libjack         enable JACK audio sound server [no]
   --enable-libklvanc       enable Kernel Labs VANC processing [no]
   --enable-libkvazaar      enable HEVC encoding via libkvazaar [no]
@@ -1766,6 +1767,7 @@ EXTERNAL_LIBRARY_NONFREE_LIST="
     libfdk_aac
     openssl
     libtls
+    libipp
 "
 
 EXTERNAL_LIBRARY_VERSION3_LIST="
@@ -3643,6 +3645,7 @@ rubberband_filter_deps="librubberband"
 sab_filter_deps="gpl swscale"
 scale2ref_filter_deps="swscale"
 scale_filter_deps="swscale"
+scale_ipp_filter_deps="libipp"
 scale_qsv_filter_deps="libmfx"
 scdet_filter_select="scene_sad"
 select_filter_select="scene_sad"
@@ -6412,6 +6415,17 @@ if enabled libmfx; then
    check_cc MFX_CODEC_VP9 "mfx/mfxvp9.h mfx/mfxstructures.h" "MFX_CODEC_VP9"
 fi
 
+if enabled libipp; then
+   ipp_header_for_check='ippcore.h'
+   case $target_os in
+       mingw32*|mingw64*)
+           ipp_header_for_check='_mingw.h ippcore.h'
+           ;;
+   esac
+   check_lib libipp "$ipp_header_for_check" ippInit -Wl,--start-group -lippi -lipps -lippcore -lippvm -Wl,--end-group ||
+   die "ERROR: Intel IPP not found"
+fi
+
 enabled libmodplug        && require_pkg_config libmodplug libmodplug libmodplug/modplug.h ModPlug_Load
 enabled libmp3lame        && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame $libm_extralibs
 enabled libmysofa         && { check_pkg_config libmysofa libmysofa mysofa.h mysofa_neighborhood_init_withstepdefine ||
@@ -6490,7 +6504,6 @@ enabled libvpx            && {
         die "libvpx enabled but no supported decoders found"
     fi
 }
-
 enabled libwebp           && {
     enabled libwebp_encoder      && require_pkg_config libwebp "libwebp >= 0.2.0" webp/encode.h WebPGetEncoderVersion
     enabled libwebp_anim_encoder && check_pkg_config libwebp_anim_encoder "libwebpmux >= 0.4.0" webp/mux.h WebPAnimEncoderOptionsInit; }
diff --git a/doc/filters.texi b/doc/filters.texi
index 36e35a175b..85480b6013 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -17706,6 +17706,54 @@ If the specified expression is not valid, it is kept at its current
 value.
 @end table
 
+@section scale_ipp
+
+Use the Intel Performance Primitives library (libipp) to perform x86 optimized frames scaling.
+Setting the output width, height and the output display aspect ratio 
+works in the same way as for the @ref{scale} filter.
+
+Filter supports YUV420 (AV_PIX_FMT_YUV420P) and YUV420p10 (AV_PIX_FMT_YUV420P10LE) image formats only,
+no input-output format conversions provided.
+
+Interlaced images scaling is not supported.
+
+The following additional options are accepted:
+@table @option
+
+@item interpolation, flags
+The interpolation algorithm used for resizing. One of the following:
+@table @option
+@item nn
+Nearest neighbour.
+
+@item linear
+@item cubic
+2-parameter cubic (B=0, C=1/2)
+
+@item super
+Supersampling (could be used for downscaling only).
+Default interpolation value.
+
+@item lanczos
+@end table
+
+@item ipp_antialiasing
+Enables internal IPP anti-aliasing ( @code{0} by default).
+Provides smoothing of jagged edges, but decreases performance. 
+Could be used for linear, cubic and lancsoz interpolation algorithms only.
+
+@item ipp_threading
+Enables IPP scaling filter threading usage (@code{1} by default).
+By default the number of threads used is selected automatically basing on the number of logical processors available,
+output image size and interpolation used for scaling.
+Set @var{ipp_threading}  to @code{0} to switch IPP scaling filter threading off. 
+
+@item threads
+The maximal allowed number of threads executing the IPP scaling filter. The actual number of threads used will be equal to
+ @var{threads} or in some cases less  basing on the number of logical processors available,
+output image size and interpolation used for scaling
+@end table
+
 @section scale_npp
 
 Use the NVIDIA Performance Primitives (libnpp) to perform scaling and/or pixel
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 5a287364b0..101719c192 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -393,6 +393,7 @@ OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale_eval.o
 OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o scale_eval.o \
                                                 vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o
+OBJS-$(CONFIG_SCALE_IPP_FILTER)              += vf_scale_ipp.o scale_eval.o
 OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale_eval.o
 OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
 OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 931d7dbb0d..2b5014da0c 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -375,6 +375,7 @@ extern const AVFilter ff_vf_rotate;
 extern const AVFilter ff_vf_sab;
 extern const AVFilter ff_vf_scale;
 extern const AVFilter ff_vf_scale_cuda;
+extern const AVFilter ff_vf_scale_ipp;
 extern const AVFilter ff_vf_scale_npp;
 extern const AVFilter ff_vf_scale_qsv;
 extern const AVFilter ff_vf_scale_vaapi;
diff --git a/libavfilter/vf_scale_ipp.c b/libavfilter/vf_scale_ipp.c
new file mode 100644
index 0000000000..8632af0da6
--- /dev/null
+++ b/libavfilter/vf_scale_ipp.c
@@ -0,0 +1,1275 @@
+/*
+ * Copyright (c) 2021 Intel
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Intel IPP library based x86 optimized scale video filter.
+ * Uses SIMD instructions up to AVX512 for suitable CPUs
+ * and multithreading for scaling optimization
+ */
+#include <stdio.h>
+#include <string.h>
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/x86/cpu.h"
+#include "scale_eval.h"
+#include "video.h"
+
+#include "ippcore.h"
+#include "ippi.h"
+#include "ipps.h"
+
+
+static const char *const var_names[] = {"in_w",  "iw", "in_h",  "ih",
+                                        "out_w", "ow", "out_h", "oh",
+                                        "a", // antialiasing
+                                        NULL};
+
+enum var_name {
+    VAR_IN_W,
+    VAR_IW,
+    VAR_IN_H,
+    VAR_IH,
+    VAR_OUT_W,
+    VAR_OW,
+    VAR_OUT_H,
+    VAR_OH,
+    VAR_A,
+    VARS_NB
+};
+
+// not in use currently
+enum ScaleStage {
+    STAGE_DEINTERLEAVE,
+    STAGE_RESIZE,
+    STAGE_INTERLEAVE,
+    STAGE_NB,
+};
+
+// not in use currently
+typedef struct IPPScaleStageContext {
+    int stage_needed;
+    enum AVPixelFormat in_fmt;
+    enum AVPixelFormat out_fmt;
+
+    struct {
+        int width;
+        int height;
+    } planes_in[3], planes_out[3];
+
+    AVBufferRef *frames_ctx;
+    AVFrame *frame;
+} IPPScaleStageContext;
+
+//forward declaration
+struct IPPparallelResizeInfo;
+struct IPPScaleContext;
+// IPP wrapper function ptr
+typedef IppStatus (*ippResize_YUV420_ptr)(struct IPPScaleContext *scale,  const Ipp8u* pSrc[], int* srcStep, Ipp8u* pDst[], int* dstStep);
+//Exact ippi resize function
+typedef IppStatus (IPP_STDCALL *ippiResize_C1R_L_ptr)(const Ipp8u* pSrc, IppSizeL srcStep, Ipp8u* pDst, IppSizeL dstStep, IppiPointL dstOffset, IppiSizeL dstSize, const IppiResizeSpec* pSpec, Ipp8u* pBufferY);
+typedef IppStatus (IPP_STDCALL *ippiResize_border_C1R_L_ptr)(const Ipp8u* pSrc, IppSizeL srcStep, Ipp8u* pDst, IppSizeL dstStep, IppiPointL dstOffset, IppiSizeL dstSize, IppiBorderType border, const Ipp8u* pBorderValue, const IppiResizeSpec* pSpec, Ipp8u* pBufferY);
+
+
+typedef IppStatus(*functype_l)(IppSizeL i, void* arg);
+
+typedef struct
+{
+    IppiPointL        split;
+    IppiSizeL         tileSize;
+    IppiSizeL         lastTileSize;
+    IppSizeL          tileBufferSize;
+    IppSizeL          lastTileBufferSize;
+    IppSizeL          numTiles;
+} IppResizeInfo_LT;
+
+// currently the AV_PIX_FMT_YUV420P and  AV_PIX_FMT_YUV420P10LE are supported only
+static const enum AVPixelFormat supported_formats[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV420P10LE,
+    AV_PIX_FMT_NONE
+};
+
+typedef struct IPPScaleContext {
+  const AVClass *class;
+  AVDictionary *opts;
+
+// Output sw format. AV_PIX_FMT_NONE for no conversion.
+  enum AVPixelFormat format;  // not in use currently
+  /**
+   * New dimensions. Special values are:
+   *   0 = original width/height
+   *  -1 = keep original aspect
+   *  -N = try to keep aspect but make sure it is divisible by N
+   */
+  int w, h;
+  char *size_str;
+  unsigned int flags; /// scaling flags
+
+  int interlaced; // not in use currently
+
+  char *w_expr; ///< width  expression string
+  char *h_expr; ///< height expression string
+  AVExpr *w_pexpr;
+  AVExpr *h_pexpr;
+  double var_values[VARS_NB];
+
+  char *flags_str;
+
+  int force_original_aspect_ratio;
+  int force_divisible_by;
+
+  int interpolation;
+  int ipp_antialiasing;
+
+  int ipp_threading;
+  Ipp32u ipp_threads;
+  int ippDataType; //ipp8u or ipp16u
+  IppiBorderSize   borderSize; //not in use  currently
+
+  //ipp functions pointers to switch between interpolation types and  8/16 bits data
+  ippResize_YUV420_ptr ippResize_YUV420_func;
+  ippiResize_C1R_L_ptr ippiResize_C1R_L_func;
+  ippiResize_border_C1R_L_ptr ippiResize_border_C1R_L_func;
+
+  //typedef int (avfilter_action_func)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+  avfilter_action_func *ippiResize_Y420_LT_ptr;
+  avfilter_action_func *ippiResize_UV420_LT_ptr;
+
+  IppiResizeSpec *pSpec;
+  IppiResizeSpec *pSpecUV; //for UV data resize, 2 times smaller components
+
+  Ipp8u* pSrc[3]; //all components source ptr
+  Ipp8u* pDst[3]; //all components dst ptr
+  int srcStep[3];
+  int dstStep[3];
+
+  IppiSizeL srcSize;
+  IppiSizeL srcSizeUV;
+  IppiSizeL dstSize;
+  IppiSizeL dstSizeUV;
+  IppResizeInfo_LT tileResizeInfo;
+  IppResizeInfo_LT tileResizeInfoUV;
+
+  unsigned char *pInitBuffer;
+  unsigned char **pBufferY;
+  unsigned char **pBufferU;
+  unsigned char **pBufferV;
+} IPPScaleContext;
+
+AVFilter ff_vf_scale_ipp;
+
+#define IPP_ALIGNED_SIZE(size, align) (((size)+(align)-1)&~((align)-1))
+/* Heuristic constants for image tiling */
+#define RESIZE_NEAREST_MIN_PIX_NUMBER     (256 * 128)
+#define RESIZE_LINEAR_MIN_PIX_NUMBER      (256 * 224)
+#define RESIZE_CUBIC_MIN_PIX_NUMBER       (256 * 224)
+#define RESIZE_LANCZOS_MIN_PIX_NUMBER     RESIZE_CUBIC_MIN_PIX_NUMBER
+#define RESIZE_SUPER_MIN_PIX_NUMBER       (256 * 192)
+
+static const IppiSizeL RESIZE_NEAREST_MIN_TILE_SIZE = { 1,1 };
+static const IppiSizeL RESIZE_LINEAR_MIN_TILE_SIZE = { 8,8 };
+static const IppiSizeL RESIZE_CUBIC_MIN_TILE_SIZE = { 16,16 };
+static const IppiSizeL RESIZE_LANCZOS_MIN_TILE_SIZE = { 24,24 };
+static const IppiSizeL RESIZE_SUPER_MIN_TILE_SIZE = { 16,16 };
+
+static IppStatus ippResizeGetTileBufferSize(IppiResizeSpec *pSpec, IppResizeInfo_LT* tileResizeInfo)
+{
+    IppStatus status = ippStsNoErr;
+    IppSizeL  s0 = 0, s1 = 0;
+    /* compute work buffer for each thread/tile */
+    /* if split is done by rows and cols considering the last col could be wider
+    and/or the last raw could be higher we need to use max buffer size for all tiles */
+    if ((tileResizeInfo->split.y > 1) && (tileResizeInfo->split.x > 1)) {
+        IppiSizeL maxTileSize = {IPP_MAX(tileResizeInfo->tileSize.width, tileResizeInfo->lastTileSize.width),
+                             IPP_MAX(tileResizeInfo->tileSize.height, tileResizeInfo->lastTileSize.height)};
+        status = ippiResizeGetBufferSize_L(pSpec, maxTileSize, 1, &s0);
+    } else {
+        /* the single row or column */
+        status = ippiResizeGetBufferSize_L(pSpec, tileResizeInfo->tileSize, 1, &s0);
+    }
+    status = ippiResizeGetBufferSize_L(pSpec, tileResizeInfo->lastTileSize, 1, &s1);
+
+    tileResizeInfo->tileBufferSize = IPP_ALIGNED_SIZE(s0, 64);
+    tileResizeInfo->lastTileBufferSize = IPP_ALIGNED_SIZE(s1, 64);
+    return status;
+}
+
+static void split(IppSizeL splitSize, IppSizeL multiplier, IppSizeL threshold,  IppSizeL *tileLength, IppSizeL *residual, IppSizeL *numTiles)
+{
+    IppSizeL k, size, res;
+    size = *tileLength;
+    res = *residual;
+    if (size * multiplier > threshold)
+    {
+        k = *numTiles;
+
+        while (size * multiplier > threshold)
+        {
+            if (splitSize / (k + 1) == 0)
+                break;
+            k++;
+            size = splitSize / k;
+            res = splitSize % k;
+        }
+        if (res > size) {
+            k += res / size;
+            size = splitSize / k;
+            res = splitSize % k;
+        }
+        *tileLength = size;
+        *residual = res;
+        *numTiles = k;
+    }
+}
+
+static void get_tilesize_simple(IppiSizeL roiSize, IppSizeL minItemNumber, IppiSizeL minTileSize, IppiSizeL *pTileSize, Ipp32u numThreads)
+{
+    /* not splitting the destination image into the tiles */
+    if (roiSize.width * roiSize.height <= minItemNumber || numThreads == 1) {
+        pTileSize->width = roiSize.width;
+        pTileSize->height = roiSize.height;
+    } else {
+        IppiSizeL tileSize;
+        IppiSizeL residualSize = { 0 };
+        IppSizeL  cols = 1, rows = 1;
+        IppSizeL  desiredItemNumber;
+
+        tileSize.width = roiSize.width;
+        tileSize.height = roiSize.height;
+
+        desiredItemNumber = tileSize.width * tileSize.height / numThreads;
+        desiredItemNumber = IPP_MAX(minItemNumber, desiredItemNumber);
+        desiredItemNumber = IPP_MAX(tileSize.width * minTileSize.height, desiredItemNumber);
+
+        /* splitting by row */
+        split(roiSize.height, tileSize.width, desiredItemNumber,  &tileSize.height, &residualSize.height, &rows);
+
+        desiredItemNumber = tileSize.width * tileSize.height * rows / numThreads;
+        desiredItemNumber = IPP_MAX(minItemNumber, desiredItemNumber);
+        desiredItemNumber = IPP_MAX(tileSize.height * minTileSize.width, desiredItemNumber);
+
+        /* splitting by col */
+        split(roiSize.width, tileSize.height, desiredItemNumber, &tileSize.width, &residualSize.width, &cols);
+
+        pTileSize->width = tileSize.width;
+        pTileSize->height = tileSize.height;
+    }
+
+    return;
+}
+
+static void split_to_tiles(IppiSizeL roiSize, IppiSizeL tileSize, IppiPointL *pSplit, IppiSizeL *pTileSize, IppiSizeL *pLastSize)
+{
+    IppSizeL width = roiSize.width;
+    IppSizeL height = roiSize.height;
+    IppSizeL widthT = tileSize.width;
+    IppSizeL heightT = tileSize.height;
+    IppSizeL widthL, heightL;
+    IppSizeL additionX, additionY, additionLastX, additionLastY;
+    if (widthT > width)   widthT = width;
+    if (heightT > height) heightT = height;
+    widthL = width % widthT;
+    heightL = height % heightT;
+    (*pSplit).x = (IppSizeL)(width / widthT);
+    (*pSplit).y = (IppSizeL)(height / heightT);
+    (*pTileSize).height = heightT;
+
+    if ((heightL < (*pSplit).y) && heightL) {
+        (*pTileSize).width = widthT;
+        (*pTileSize).height = heightT;
+        (*pLastSize).width = widthT + widthL;
+        (*pLastSize).height = heightT + heightL;
+    } else {
+        additionX = widthL / (*pSplit).x;
+        additionY = heightL / (*pSplit).y;
+        (*pTileSize).width = widthT + additionX;
+        (*pTileSize).height = heightT + additionY;
+        additionLastX = widthL % ((*pSplit).x);
+        additionLastY = heightL % ((*pSplit).y);
+        (*pLastSize).width = (*pTileSize).width + additionLastX;
+        (*pLastSize).height = heightT + additionY + additionLastY;
+    }
+}
+
+static IppStatus get_tile_params_by_index(IppSizeL index, IppiPointL splitImage, IppiSizeL tileSize, IppiSizeL tailSize, IppiPointL *pTileOffset, IppiSizeL *pTileSize)
+{
+    IppSizeL i, j;
+    IppSizeL firstGreaterIndex = 1;
+    int k;
+    int add;
+    if (pTileOffset == NULL || pTileSize == NULL) return ippStsNullPtrErr;
+    i = index / splitImage.x;
+    j = index % splitImage.x;
+
+    if (i >= splitImage.y)
+        return ippStsSizeErr;
+
+    (*pTileOffset).x = j * tileSize.width;
+    (*pTileOffset).y = i * tileSize.height;
+    (*pTileSize).width = (j < splitImage.x - 1) ? tileSize.width : tailSize.width;
+    firstGreaterIndex = tailSize.height - tileSize.height;
+    k = splitImage.y - index;
+    add = splitImage.y - firstGreaterIndex;
+    if ((firstGreaterIndex < splitImage.y) && (tailSize.height > tileSize.height) && (firstGreaterIndex > 0)) {
+        if (i < splitImage.y - firstGreaterIndex) {
+            (*pTileSize).height = tileSize.height;
+        } else {
+            (*pTileSize).height = (tileSize.height + 1);
+            if (k < firstGreaterIndex)
+                (*pTileOffset).y = i * tileSize.height + (i - add) * 1;
+            else
+                (*pTileOffset).y = i * tileSize.height;
+        }
+    } else {
+        (*pTileSize).height = (i < splitImage.y - 1) ? tileSize.height : tailSize.height;
+    }
+    return ippStsNoErr;
+}
+
+static void get_tile_by_index(int index, IppiPointL splitImage, IppiSizeL tileSize, IppiSizeL tailSize, IppiPointL *pTileOffset, IppiSizeL *pTileSize)
+{
+    IppiPointL tileOffsetL = { 0 };
+    get_tile_params_by_index(index, splitImage, tileSize, tailSize, &tileOffsetL, pTileSize);
+    pTileOffset->x = (int)tileOffsetL.x;
+    pTileOffset->y = (int)tileOffsetL.y;
+}
+
+static Ipp8u* get_image_pointer_8u(const Ipp8u* pData, IppSizeL dataStep, IppSizeL w, IppSizeL h, int ippDataType)
+{
+    Ipp8u* retPtr = 0;
+    if (ippDataType == ipp8u) {
+        retPtr = (Ipp8u*)((Ipp8u*)(pData + w) + h * dataStep);
+    } else { //Ipp16u type
+        retPtr = (Ipp8u*)((Ipp8u*)((Ipp16u*)(pData) + w) + h * dataStep);
+    }
+    return retPtr;
+}
+
+static int alloc_internal_ipp_buffers(unsigned char **pBuffer[], IppResizeInfo_LT* tileResizeInfo, int ippDataType)
+{
+    int iBufferSize = tileResizeInfo->tileBufferSize;
+    *pBuffer = av_calloc(tileResizeInfo->numTiles, sizeof(unsigned char*));
+    for (int i = 0; i < tileResizeInfo->numTiles; i++) {
+        if (i == (tileResizeInfo->numTiles - 1))
+            iBufferSize = tileResizeInfo->lastTileBufferSize;
+        if (ippDataType == ipp8u)
+            (*pBuffer)[i] = ippsMalloc_8u(iBufferSize);
+        else
+            (*pBuffer)[i] = (unsigned char*)ippsMalloc_16u(iBufferSize);
+
+        if ((*pBuffer)[i] == 0)
+            return AVERROR(ENOMEM);
+    }
+    return 0;
+}
+
+static int ippResize_UV420_LT_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    IPPScaleContext *scale = ctx->priv;
+    IppStatus threadStatus;
+    const Ipp8u *pSrcRoiUV = 0;
+    Ipp8u       *pDstRoiUV = 0;
+    IppiSizeL     roiSizeUV = { 0 };
+    IppiPointL    roiOffsetUV, srcRoiOffsetUV;
+
+    if (jobnr < scale->tileResizeInfoUV.numTiles) { //U
+        get_tile_by_index(jobnr, scale->tileResizeInfoUV.split, scale->tileResizeInfoUV.tileSize, scale->tileResizeInfoUV.lastTileSize, &roiOffsetUV, &roiSizeUV);
+        threadStatus = ippiResizeGetSrcOffset_L(scale->pSpecUV, roiOffsetUV, &srcRoiOffsetUV);
+        if (threadStatus < 0)
+            return threadStatus;
+
+        pSrcRoiUV = get_image_pointer_8u(scale->pSrc[1], scale->srcStep[1], srcRoiOffsetUV.x, srcRoiOffsetUV.y, scale->ippDataType);
+        pDstRoiUV = get_image_pointer_8u(scale->pDst[1], scale->dstStep[1], roiOffsetUV.x, roiOffsetUV.y, scale->ippDataType);
+        threadStatus = scale->ippiResize_C1R_L_func(pSrcRoiUV, (IppSizeL)scale->srcStep[1], pDstRoiUV, (IppSizeL)scale->dstStep[1], roiOffsetUV, roiSizeUV, scale->pSpecUV, scale->pBufferU[jobnr]);
+    } else { //V
+        get_tile_by_index(jobnr - scale->tileResizeInfoUV.numTiles, scale->tileResizeInfoUV.split, scale->tileResizeInfoUV.tileSize, scale->tileResizeInfoUV.lastTileSize, &roiOffsetUV, &roiSizeUV);
+        threadStatus = ippiResizeGetSrcOffset_L(scale->pSpecUV, roiOffsetUV, &srcRoiOffsetUV);
+        if (threadStatus < 0)
+            return threadStatus;
+
+        pSrcRoiUV = get_image_pointer_8u(scale->pSrc[2], scale->srcStep[2], srcRoiOffsetUV.x, srcRoiOffsetUV.y, scale->ippDataType);
+        pDstRoiUV = get_image_pointer_8u(scale->pDst[2], scale->dstStep[2], roiOffsetUV.x, roiOffsetUV.y, scale->ippDataType);
+        threadStatus = scale->ippiResize_C1R_L_func(pSrcRoiUV, (IppSizeL)scale->srcStep[2], pDstRoiUV, (IppSizeL)scale->dstStep[2], roiOffsetUV, roiSizeUV, scale->pSpecUV, scale->pBufferV[jobnr- scale->tileResizeInfoUV.numTiles]);
+    }
+    return threadStatus;
+}
+
+static int ippResize_Y420_LT_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    IPPScaleContext *scale = ctx->priv;
+    IppStatus threadStatus;
+    if (jobnr < scale->tileResizeInfo.numTiles) {
+        /* Pointers to the source and destination tiles */
+        const Ipp8u *pSrcRoi = 0;
+        Ipp8u       *pDstRoi = 0;
+        /* Source and destination tile ROI parameters */
+        IppiPointL     roiOffset = { 0 }, srcRoiOffset = { 0 };
+        IppiSizeL      roiSize = { 0 };
+
+        get_tile_by_index(jobnr, scale->tileResizeInfo.split, scale->tileResizeInfo.tileSize, scale->tileResizeInfo.lastTileSize, &roiOffset, &roiSize);
+
+        threadStatus = ippiResizeGetSrcOffset_L(scale->pSpec, roiOffset, &srcRoiOffset);
+        if (threadStatus < 0)
+            return threadStatus;
+
+        /* Compute pointers to ROIs */
+        pSrcRoi = get_image_pointer_8u(scale->pSrc[0], scale->srcStep[0], srcRoiOffset.x, srcRoiOffset.y, scale->ippDataType);
+        pDstRoi = get_image_pointer_8u(scale->pDst[0], scale->dstStep[0], roiOffset.x, roiOffset.y, scale->ippDataType);
+        threadStatus = scale->ippiResize_C1R_L_func(pSrcRoi, (IppSizeL)scale->srcStep[0], pDstRoi, (IppSizeL)scale->dstStep[0], roiOffset, roiSize, scale->pSpec, scale->pBufferY[jobnr]);
+    } else {
+        ippResize_UV420_LT_func(ctx, NULL, jobnr-scale->tileResizeInfo.numTiles, nb_jobs); //start from zero job
+    }
+    return threadStatus;
+}
+
+static int ippResize_UV420_border_LT_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    /*UV*/
+    IPPScaleContext *scale = ctx->priv;
+    IppStatus threadStatus;
+    const Ipp8u *pSrcRoiUV = 0;
+    Ipp8u       *pDstRoiUV = 0;
+    IppiSizeL     roiSizeUV = { 0 };
+
+    IppiPointL    roiOffsetUV, srcRoiOffsetUV;
+    if (jobnr < scale->tileResizeInfoUV.numTiles) {
+        get_tile_by_index(jobnr, scale->tileResizeInfoUV.split, scale->tileResizeInfoUV.tileSize, scale->tileResizeInfoUV.lastTileSize, &roiOffsetUV, &roiSizeUV);
+
+        threadStatus = ippiResizeGetSrcOffset_L(scale->pSpecUV, roiOffsetUV, &srcRoiOffsetUV);
+        if (threadStatus < 0) return threadStatus;
+        /* Compute pointers to ROIs */
+        pSrcRoiUV = get_image_pointer_8u(scale->pSrc[1], scale->srcStep[1], srcRoiOffsetUV.x, srcRoiOffsetUV.y, scale->ippDataType);
+        pDstRoiUV = get_image_pointer_8u(scale->pDst[1], scale->dstStep[1], roiOffsetUV.x, roiOffsetUV.y, scale->ippDataType);
+        threadStatus = scale->ippiResize_border_C1R_L_func(pSrcRoiUV, (IppSizeL)scale->srcStep[1], pDstRoiUV, (IppSizeL)scale->dstStep[1], roiOffsetUV, roiSizeUV, ippBorderRepl, NULL, scale->pSpecUV, scale->pBufferU[jobnr]);
+    } else {
+        /* Compute pointers to ROIs */
+        get_tile_by_index(jobnr - scale->tileResizeInfoUV.numTiles, scale->tileResizeInfoUV.split, scale->tileResizeInfoUV.tileSize, scale->tileResizeInfoUV.lastTileSize, &roiOffsetUV, &roiSizeUV);
+
+        threadStatus = ippiResizeGetSrcOffset_L(scale->pSpecUV, roiOffsetUV, &srcRoiOffsetUV);
+        if (threadStatus < 0)
+            return threadStatus;
+
+        pSrcRoiUV = get_image_pointer_8u(scale->pSrc[2], scale->srcStep[2], srcRoiOffsetUV.x, srcRoiOffsetUV.y, scale->ippDataType);
+        pDstRoiUV = get_image_pointer_8u(scale->pDst[2], scale->dstStep[2], roiOffsetUV.x, roiOffsetUV.y, scale->ippDataType);
+        threadStatus = scale->ippiResize_border_C1R_L_func(pSrcRoiUV, (IppSizeL)scale->srcStep[2], pDstRoiUV, (IppSizeL)scale->dstStep[2], roiOffsetUV, roiSizeUV, ippBorderRepl, NULL, scale->pSpecUV, scale->pBufferV[jobnr - scale->tileResizeInfoUV.numTiles]);
+    }
+    return threadStatus;
+}
+
+static int ippResize_Y420_border_LT_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    IPPScaleContext *scale = ctx->priv;
+    IppStatus threadStatus;
+    if (jobnr < scale->tileResizeInfo.numTiles) {
+
+        /* Pointers to the source and destination tiles */
+        const Ipp8u *pSrcRoi = 0;
+        Ipp8u       *pDstRoi = 0;
+        /* Source and destination tile ROI parameters */
+        IppiPointL     roiOffset = { 0 }, srcRoiOffset = { 0 };
+        IppiSizeL      roiSize = { 0 };
+        get_tile_by_index(jobnr, scale->tileResizeInfo.split, scale->tileResizeInfo.tileSize, scale->tileResizeInfo.lastTileSize, &roiOffset, &roiSize);
+
+        threadStatus = ippiResizeGetSrcOffset_L(scale->pSpec, roiOffset, &srcRoiOffset);
+        if (threadStatus < 0)
+            return threadStatus;
+
+        /* Compute pointers to ROIs */
+        pSrcRoi = get_image_pointer_8u(scale->pSrc[0], scale->srcStep[0], srcRoiOffset.x, srcRoiOffset.y, scale->ippDataType);
+        pDstRoi = get_image_pointer_8u(scale->pDst[0], scale->dstStep[0], roiOffset.x, roiOffset.y, scale->ippDataType);
+        threadStatus = scale->ippiResize_border_C1R_L_func(pSrcRoi, (IppSizeL)scale->srcStep[0], pDstRoi, (IppSizeL)scale->dstStep[0], roiOffset, roiSize, ippBorderRepl, NULL, scale->pSpec, scale->pBufferY[jobnr]);
+
+    } else {
+       ippResize_UV420_border_LT_func(ctx, NULL, jobnr - scale->tileResizeInfo.numTiles, nb_jobs);//start from zero
+    }
+    return threadStatus;
+}
+
+//single threaded version
+static IppStatus ippResize_YUV420_L(IPPScaleContext *scale, const Ipp8u* pSrc[], int* srcStep, Ipp8u* pDst[], int* dstStep)
+{
+    IppiPointL dstRoiOffset = { 0, 0 };
+    IppStatus ippSts, ippStsU, ippStsV;
+    ippSts = scale->ippiResize_C1R_L_func(pSrc[0], (IppSizeL)srcStep[0], pDst[0], (IppSizeL)dstStep[0], dstRoiOffset, scale->dstSize, scale->pSpec, scale->pBufferY[0]);
+
+    ippStsU = scale->ippiResize_C1R_L_func(pSrc[1], (IppSizeL)srcStep[1], pDst[1], (IppSizeL)dstStep[1], dstRoiOffset, scale->dstSizeUV, scale->pSpecUV, scale->pBufferY[0]);
+    ippStsV = scale->ippiResize_C1R_L_func(pSrc[2], (IppSizeL)srcStep[2], pDst[2], (IppSizeL)dstStep[2], dstRoiOffset, scale->dstSizeUV, scale->pSpecUV, scale->pBufferY[0]);
+
+    return ippSts | ippStsU | ippStsV;
+}
+
+static IppStatus ippResize_YUV420_border_L(IPPScaleContext *scale, const Ipp8u* pSrc[], int* srcStep, Ipp8u* pDst[], int* dstStep)
+{
+    IppiPointL dstRoiOffset = { 0, 0 };
+    IppStatus ippSts, ippStsU, ippStsV;
+    ippSts = scale->ippiResize_border_C1R_L_func(pSrc[0], srcStep[0], pDst[0], dstStep[0], dstRoiOffset, scale->dstSize, ippBorderRepl, NULL, scale->pSpec, scale->pBufferY[0]);
+
+    ippStsU = scale->ippiResize_border_C1R_L_func(pSrc[1], srcStep[1], pDst[1], dstStep[1], dstRoiOffset, scale->dstSizeUV, ippBorderRepl, NULL, scale->pSpecUV, scale->pBufferY[0]);
+    ippStsV = scale->ippiResize_border_C1R_L_func(pSrc[2], srcStep[2], pDst[2], dstStep[2], dstRoiOffset, scale->dstSizeUV, ippBorderRepl, NULL, scale->pSpecUV, scale->pBufferY[0]);
+
+    return ippSts | ippStsU | ippStsV;
+}
+
+// init ipp
+// Get sizes for internal and initialization buffers
+static IppStatus ippInitResize(AVFilterLink *inlink, AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    IPPScaleContext *scale = ctx->priv;
+    IppSizeL iSpecSize=0, iInitSize=0, iBufferSize=0;
+    IppSizeL iSpecSizeUV=0, iInitSizeUV=0,iBufferSizeUV=0;
+    IppStatus ippSts, ippStsUV;
+    IppSizeL minItemNumber;
+    IppiSizeL minTileSize;
+
+    //if scale->ipp_threading==0 just don't use threading independently on other params
+    if (scale->ipp_threading == 1) {
+        if (ctx->nb_threads > 1) {
+            scale->ipp_threads = ff_filter_get_nb_threads(ctx);
+        } else {
+            if (ctx->nb_threads <= 0)
+                scale->ipp_threads = av_cpu_count();
+            if (ctx->nb_threads == 1)
+                scale->ipp_threading = 0;
+        }
+    }
+
+    scale->srcSize.width = inlink->w;
+    scale->srcSize.height = inlink->h;
+    scale->srcSizeUV.width = inlink->w >> 1;
+    scale->srcSizeUV.height = inlink->h >> 1;
+
+    scale->dstSize.width = outlink->w;
+    scale->dstSize.height = outlink->h;
+    scale->dstSizeUV.width = outlink->w >> 1;
+    scale->dstSizeUV.height = outlink->h >> 1;
+
+    scale->ippDataType = ipp8u;
+    if (inlink->format == AV_PIX_FMT_YUV420P10LE) {
+        scale->ippDataType = ipp16u;
+    }
+
+    scale->ippiResize_C1R_L_func = NULL;
+    scale->ippiResize_border_C1R_L_func = NULL;
+
+    ippSts = ippiResizeGetSize_L(scale->srcSize, scale->dstSize, scale->ippDataType, scale->interpolation, scale->ipp_antialiasing,
+                 &iSpecSize, &iInitSize);
+    if (ippSts) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResizeGetSize Y error: %d\n", ippSts);
+        return ippSts;
+    }
+    if (!scale->pSpec)
+        scale->pSpec = (IppiResizeSpec *)ippsMalloc_8u(iSpecSize);
+    if (!scale->pSpec) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize Y spec \n");
+        return AVERROR(ENOMEM);
+    }
+
+    ippStsUV = ippiResizeGetSize_L(scale->srcSizeUV, scale->dstSizeUV, scale->ippDataType, scale->interpolation, scale->ipp_antialiasing,
+                   &iSpecSizeUV, &iInitSizeUV);
+    if (ippStsUV) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResizeGetSize UV error: %d\n", ippStsUV);
+        return ippSts;
+    }
+    if (!scale->pSpecUV)
+        scale->pSpecUV = (IppiResizeSpec *)ippsMalloc_8u(iSpecSizeUV);
+    if (!scale->pSpecUV) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize UV spec \n");
+        return AVERROR(ENOMEM);
+    }
+
+    // allocate initialization buffer, use the max Y capacity
+    iInitSize = iInitSize > iInitSizeUV ? iInitSize : iInitSizeUV;
+    if (iInitSize &&
+        ((scale->interpolation == ippCubic) || (scale->interpolation == ippLanczos) ||
+         ((scale->ipp_antialiasing == 1) && (scale->interpolation == ippLinear)))) {
+        if (!scale->pInitBuffer){
+            if (scale->ippDataType == ipp8u)
+                scale->pInitBuffer = ippsMalloc_8u(iInitSize);
+            else
+                scale->pInitBuffer = (unsigned char*)ippsMalloc_16u(iInitSize);
+        }
+        if (!scale->pInitBuffer) {
+            av_log(ctx, AV_LOG_ERROR,
+                "Cannot allocate memory for resize init buffer");
+            return AVERROR(ENOMEM);
+        }
+    }
+    // init ipp resizer
+    if (scale->interpolation == ippSuper) {
+        ippSts = ippiResizeSuperInit_L(scale->srcSize, scale->dstSize, scale->ippDataType, scale->pSpec);
+        ippStsUV = ippiResizeSuperInit_L(scale->srcSizeUV, scale->dstSizeUV, scale->ippDataType,  scale->pSpecUV);
+        scale->ippiResize_Y420_LT_ptr = ippResize_Y420_LT_func;
+        scale->ippiResize_UV420_LT_ptr = ippResize_UV420_LT_func;
+        scale->ippResize_YUV420_func = ippResize_YUV420_L;
+        minItemNumber =  RESIZE_SUPER_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_SUPER_MIN_TILE_SIZE;
+        if (scale->ippDataType == ipp8u)
+            scale->ippiResize_C1R_L_func = ippiResizeSuper_8u_C1R_L;
+        else
+            scale->ippiResize_C1R_L_func = (ippiResize_C1R_L_ptr)ippiResizeSuper_16u_C1R_L;
+    } else if (scale->interpolation == ippLinear) {
+        if (scale->ipp_antialiasing == 1) {
+            ippSts = ippiResizeAntialiasingLinearInit_L(scale->srcSize, scale->dstSize, scale->ippDataType,  scale->pSpec,
+                scale->pInitBuffer);
+            ippStsUV = ippiResizeAntialiasingLinearInit_L(scale->srcSizeUV, scale->dstSizeUV, scale->ippDataType,  scale->pSpecUV,
+                scale->pInitBuffer);
+            if (scale->ippDataType == ipp8u)
+                scale->ippiResize_border_C1R_L_func = ippiResizeAntialiasing_8u_C1R_L;
+            if (scale->ippDataType == ipp16u)
+                scale->ippiResize_border_C1R_L_func = (ippiResize_border_C1R_L_ptr)ippiResizeAntialiasing_16u_C1R_L;
+        } else {
+            ippSts = ippiResizeLinearInit_L(scale->srcSize, scale->dstSize, scale->ippDataType,  scale->pSpec);
+            ippStsUV = ippiResizeLinearInit_L(scale->srcSizeUV, scale->dstSizeUV, scale->ippDataType,  scale->pSpecUV);
+            if (scale->ippDataType == ipp8u)
+                scale->ippiResize_border_C1R_L_func = ippiResizeLinear_8u_C1R_L;
+            if (scale->ippDataType == ipp16u)
+                scale->ippiResize_border_C1R_L_func = (ippiResize_border_C1R_L_ptr)ippiResizeLinear_16u_C1R_L;
+        }
+        scale->ippiResize_Y420_LT_ptr = ippResize_Y420_border_LT_func;
+        scale->ippiResize_UV420_LT_ptr = ippResize_UV420_border_LT_func;
+        scale->ippResize_YUV420_func = ippResize_YUV420_border_L;
+        minItemNumber = RESIZE_LINEAR_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_LINEAR_MIN_TILE_SIZE;
+    } else if (scale->interpolation == ippLanczos) {
+        // here the simplest Lanczos filtering with numLobes =2 is used.
+        if (scale->ipp_antialiasing == 1) {
+            ippSts = ippiResizeAntialiasingLanczosInit_L(scale->srcSize, scale->dstSize, scale->ippDataType, 2, scale->pSpec,
+                scale->pInitBuffer);
+            ippStsUV = ippiResizeAntialiasingLanczosInit_L(scale->srcSizeUV, scale->dstSizeUV, scale->ippDataType, 2, scale->pSpecUV,
+                scale->pInitBuffer);
+            if (scale->ippDataType == ipp8u)
+                scale->ippiResize_border_C1R_L_func = ippiResizeAntialiasing_8u_C1R_L;
+            else
+                scale->ippiResize_border_C1R_L_func = (ippiResize_border_C1R_L_ptr)ippiResizeAntialiasing_16u_C1R_L;
+        } else {
+            ippSts = ippiResizeLanczosInit_L(scale->srcSize, scale->dstSize, scale->ippDataType, 2, scale->pSpec,
+                scale->pInitBuffer);
+            ippStsUV = ippiResizeLanczosInit_L(scale->srcSizeUV, scale->dstSizeUV, scale->ippDataType, 2, scale->pSpecUV,
+                scale->pInitBuffer);
+            if (scale->ippDataType == ipp8u)
+                scale->ippiResize_border_C1R_L_func = ippiResizeLanczos_8u_C1R_L;
+            else
+                scale->ippiResize_border_C1R_L_func = (ippiResize_border_C1R_L_ptr)ippiResizeLanczos_16u_C1R_L;
+        }
+        scale->ippiResize_Y420_LT_ptr = ippResize_Y420_border_LT_func;
+        scale->ippiResize_UV420_LT_ptr = ippResize_UV420_border_LT_func;
+        scale->ippResize_YUV420_func = ippResize_YUV420_border_L;
+        minItemNumber = RESIZE_LANCZOS_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_LANCZOS_MIN_TILE_SIZE;
+    } else if (scale->interpolation == ippCubic) {
+        // cubic  IPPI_INTER_CUBIC2P_CATMULLROM is used  (B=0, C=1/2)
+        if (scale->ipp_antialiasing == 1) {
+            ippSts = ippiResizeAntialiasingCubicInit_L(scale->srcSize, scale->dstSize, scale->ippDataType, 0, 0.5, scale->pSpec,
+                scale->pInitBuffer);
+            ippStsUV = ippiResizeAntialiasingCubicInit_L(scale->srcSizeUV, scale->dstSizeUV, scale->ippDataType, 0, 0.5, scale->pSpecUV,
+                scale->pInitBuffer);
+            if (scale->ippDataType == ipp8u)
+                scale->ippiResize_border_C1R_L_func = ippiResizeAntialiasing_8u_C1R_L;
+            else
+                scale->ippiResize_border_C1R_L_func = (ippiResize_border_C1R_L_ptr)ippiResizeAntialiasing_16u_C1R_L;
+        } else {
+            ippSts = ippiResizeCubicInit_L(scale->srcSize, scale->dstSize, scale->ippDataType, 0, 0.5, scale->pSpec,
+                scale->pInitBuffer);
+            ippStsUV = ippiResizeCubicInit_L(scale->srcSizeUV, scale->dstSizeUV, scale->ippDataType, 0, 0.5, scale->pSpecUV,
+                scale->pInitBuffer);
+            if (scale->ippDataType == ipp8u)
+                scale->ippiResize_border_C1R_L_func = ippiResizeCubic_8u_C1R_L;
+            else
+                scale->ippiResize_border_C1R_L_func = (ippiResize_border_C1R_L_ptr)ippiResizeCubic_16u_C1R_L;
+        }
+        scale->ippiResize_Y420_LT_ptr = ippResize_Y420_border_LT_func;
+        scale->ippiResize_UV420_LT_ptr = ippResize_UV420_border_LT_func;
+        scale->ippResize_YUV420_func = ippResize_YUV420_border_L;
+        minItemNumber = RESIZE_CUBIC_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_CUBIC_MIN_TILE_SIZE;
+    } else if (scale->interpolation == ippNearest) {
+        ippSts = ippiResizeNearestInit_L(scale->srcSize, scale->dstSize, scale->ippDataType, scale->pSpec);
+        ippStsUV = ippiResizeNearestInit_L(scale->srcSizeUV, scale->dstSizeUV, scale->ippDataType, scale->pSpecUV);
+        scale->ippResize_YUV420_func = ippResize_YUV420_L;
+        scale->ippiResize_Y420_LT_ptr = ippResize_Y420_LT_func;
+        scale->ippiResize_UV420_LT_ptr = ippResize_UV420_LT_func;
+        minItemNumber = RESIZE_NEAREST_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_NEAREST_MIN_TILE_SIZE;
+        if (scale->ippDataType == ipp8u)
+            scale->ippiResize_C1R_L_func = ippiResizeNearest_8u_C1R_L;
+        else
+            scale->ippiResize_C1R_L_func = (ippiResize_C1R_L_ptr)ippiResizeNearest_16u_C1R_L;
+    }
+    if (ippSts) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResizeInit Y error: %d\n", ippSts);
+        return ippSts;
+    }
+    if (ippStsUV) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResizeInit UV error: %d\n", ippStsUV);
+        return ippSts;
+    }
+
+    if (scale->ipp_threading) {
+        /*Split to tiles !!*/
+        IppiSizeL tileSize, tileSizeUV;
+        int nthrY, nthrUV;
+        // 2/3 of the threads work on Y, 1/3 = 1/6 + 1/6 of threads work on U and V
+        nthrUV = (scale->ipp_threads > 6) ? scale->ipp_threads / 6 : 1;
+        nthrY =  (scale->ipp_threads > 3) ? scale->ipp_threads - 2 * nthrUV : 1;
+
+        get_tilesize_simple(scale->dstSize, minItemNumber, minTileSize, &tileSize, nthrY);
+        split_to_tiles(scale->dstSize, tileSize, &scale->tileResizeInfo.split, &scale->tileResizeInfo.tileSize, &scale->tileResizeInfo.lastTileSize);
+
+        //need to do it for UV planes (not just divide Y tile Size by 2) to deal with uneven tile sizes and numTiles difference
+        get_tilesize_simple(scale->dstSizeUV, minItemNumber, minTileSize, &tileSizeUV, nthrUV);
+        split_to_tiles(scale->dstSizeUV, tileSizeUV, &scale->tileResizeInfoUV.split, &scale->tileResizeInfoUV.tileSize, &scale->tileResizeInfoUV.lastTileSize);
+
+        scale->tileResizeInfo.numTiles = scale->tileResizeInfo.split.x * scale->tileResizeInfo.split.y;
+        scale->tileResizeInfoUV.numTiles = scale->tileResizeInfoUV.split.x * scale->tileResizeInfoUV.split.y;
+        av_log(NULL, AV_LOG_INFO, "Intel IPP uses  %lld tiles (threads) for Y and %lld tiles for UV scale \n", scale->tileResizeInfo.numTiles,
+                scale->tileResizeInfoUV.numTiles * 2);
+
+        ippSts = ippResizeGetTileBufferSize(scale->pSpec, &scale->tileResizeInfo);
+        ippStsUV = ippResizeGetTileBufferSize(scale->pSpecUV, &scale->tileResizeInfoUV);
+    } else {
+        ippSts = ippiResizeGetBufferSize_L(scale->pSpec, scale->dstSize, 1, &iBufferSize);
+        ippStsUV = ippiResizeGetBufferSize_L(scale->pSpecUV, scale->dstSizeUV, 1, &iBufferSizeUV);
+        scale->tileResizeInfo.lastTileBufferSize = iBufferSize;
+        scale->tileResizeInfoUV.lastTileBufferSize = iBufferSizeUV;
+    }
+    if (!scale->pBufferY) {
+        int err = alloc_internal_ipp_buffers(&scale->pBufferY, &scale->tileResizeInfo, scale->ippDataType);
+            if (err) {
+                av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize buffer Y");
+                return AVERROR(ENOMEM);
+            }
+    }
+    if (!scale->pBufferU) {
+        int err = alloc_internal_ipp_buffers(&scale->pBufferU, &scale->tileResizeInfoUV, scale->ippDataType);
+        if (err) {
+            av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize buffer U");
+            return AVERROR(ENOMEM);
+        }
+    }
+    if (!scale->pBufferV) {
+        int err = alloc_internal_ipp_buffers(&scale->pBufferV, &scale->tileResizeInfoUV, scale->ippDataType);
+        if (err) {
+            av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize buffer V");
+            return AVERROR(ENOMEM);
+        }
+    }
+    return ippSts;
+}
+
+static int ippscale_config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    IPPScaleContext *s = ctx->priv;
+    int w, h;
+    int ret;
+
+    if ((ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink,
+                                      &w, &h)) < 0)
+        return ret;
+
+    ff_scale_adjust_dimensions(inlink, &w, &h, s->force_original_aspect_ratio,
+                               s->force_divisible_by);
+
+    if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX)
+        av_log(ctx, AV_LOG_ERROR,
+               "Rescaled value for width or height is too big.\n");
+
+    outlink->w = w;
+    outlink->h = h;
+
+    av_log(NULL, AV_LOG_INFO, "Intel IPP based scaling  w:%d h:%d -> w:%d h:%d, interpolation %d \n", inlink->w, inlink->h,
+           outlink->w, outlink->h, s->interpolation);
+    if ( (((s->interpolation == ippSuper) || (s->interpolation == ippNearest)) && s->ipp_antialiasing) ) {
+        s->ipp_antialiasing =0;
+        av_log(ctx, AV_LOG_WARNING,
+            "Supersampling and Nearest neighbor interpolations don't support antialiasing,"
+            "antialiasing is disabled.\n");
+    }
+
+    if ((s->interpolation == ippSuper) &&
+        ((outlink->w > inlink->w) || (outlink->h > inlink->h)) ){
+        s->interpolation = ippCubic;
+        av_log(ctx, AV_LOG_WARNING,
+               "Supersampling not supported for upscaling, using cubic "
+               "instead.\n");
+    }
+
+    if (inlink->sample_aspect_ratio.num)
+      outlink->sample_aspect_ratio = ff_scale_adjust_dimensions(inlink, &w, &h, s->force_original_aspect_ratio,
+                             s->force_divisible_by);
+
+  if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX)
+    av_log(ctx, AV_LOG_ERROR,
+           "Rescaled value for width or height is too big.\n");
+
+  outlink->w = w;
+  outlink->h = h;
+
+  av_log(NULL, AV_LOG_INFO, "Intel IPP based scaling  w:%d h:%d -> w:%d h:%d, interpolation %d \n", inlink->w, inlink->h,
+         outlink->w, outlink->h, s->interpolation);
+  if ( (((s->interpolation == ippSuper) || (s->interpolation == ippNearest)) && s->ipp_antialiasing) )
+  {
+      s->ipp_antialiasing =0;
+      av_log(ctx, AV_LOG_WARNING,
+          "Supersampling and Nearest neighbor interpolations don't support antialiasing,"
+          "antialiasing is disabled.\n");
+  }
+
+  if ((s->interpolation == ippSuper) &&
+      !((outlink->w <= inlink->w) && outlink->h <= inlink->h)) {
+    s->interpolation = ippLinear;
+    av_log(ctx, AV_LOG_WARNING,
+           "Supersampling not supported for upscaling, using linear"
+           "instead.\n");
+  }
+
+  if (inlink->sample_aspect_ratio.num)
+    outlink->sample_aspect_ratio =
+>>>>>>> 03e92b529f (IPP scaling filter threading bug fix)
+        av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h},
+                 inlink->sample_aspect_ratio);
+    else
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+    ippInitResize(inlink, outlink);
+
+    return 0;
+}
+
+static int check_exprs(AVFilterContext *ctx)
+{
+    IPPScaleContext *scale = ctx->priv;
+    unsigned vars_w[VARS_NB] = {0}, vars_h[VARS_NB] = {0};
+
+    if (!scale->w_pexpr && !scale->h_pexpr)
+        return AVERROR(EINVAL);
+
+    if (scale->w_pexpr)
+        av_expr_count_vars(scale->w_pexpr, vars_w, VARS_NB);
+    if (scale->h_pexpr)
+        av_expr_count_vars(scale->h_pexpr, vars_h, VARS_NB);
+
+    if (vars_w[VAR_OUT_W] || vars_w[VAR_OW]) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Width expression cannot be self-referencing: '%s'.\n",
+               scale->w_expr);
+        return AVERROR(EINVAL);
+    }
+
+    if (vars_h[VAR_OUT_H] || vars_h[VAR_OH]) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Height expression cannot be self-referencing: '%s'.\n",
+               scale->h_expr);
+        return AVERROR(EINVAL);
+    }
+
+    if ((vars_w[VAR_OUT_H] || vars_w[VAR_OH]) &&
+        (vars_h[VAR_OUT_W] || vars_h[VAR_OW])) {
+        av_log(ctx, AV_LOG_WARNING,
+               "Circular references detected for width '%s' and height '%s' - "
+               "possibly invalid.\n",
+               scale->w_expr, scale->h_expr);
+    }
+
+    return 0;
+}
+
+static int scale_parse_expr(AVFilterContext *ctx, char *str_expr,
+                            AVExpr **pexpr_ptr, const char *var,
+                            const char *args)
+{
+    IPPScaleContext *scale = ctx->priv;
+    int ret, is_inited = 0;
+    char *old_str_expr = NULL;
+    AVExpr *old_pexpr = NULL;
+
+    if (str_expr) {
+        old_str_expr = av_strdup(str_expr);
+        if (!old_str_expr)
+            return AVERROR(ENOMEM);
+        av_opt_set(scale, var, args, 0);
+    }
+
+    if (*pexpr_ptr) {
+        old_pexpr = *pexpr_ptr;
+        *pexpr_ptr = NULL;
+        is_inited = 1;
+    }
+
+    ret =
+        av_expr_parse(pexpr_ptr, args, var_names, NULL, NULL, NULL, NULL, 0, ctx);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot parse expression for %s: '%s'\n", var,
+               args);
+        goto revert;
+    }
+
+    ret = check_exprs(ctx);
+    if (ret < 0)
+        goto revert;
+
+    if (is_inited && (ret = ippscale_config_props(ctx->outputs[0])) < 0)
+        goto revert;
+
+    av_expr_free(old_pexpr);
+    old_pexpr = NULL;
+    av_freep(&old_str_expr);
+
+    return 0;
+
+revert:
+    av_expr_free(*pexpr_ptr);
+    *pexpr_ptr = NULL;
+    if (old_str_expr) {
+        av_opt_set(scale, var, old_str_expr, 0);
+        av_free(old_str_expr);
+    }
+    if (old_pexpr)
+        *pexpr_ptr = old_pexpr;
+
+    return ret;
+}
+
+#if defined (_M_AMD64) || defined (__x86_64__)
+
+//64-bit architecture
+#define IPP_SSE2 ( ippCPUID_MMX | ippCPUID_SSE | ippCPUID_SSE2 )
+#define IPP_SSE3 ( IPP_SSE2 | ippCPUID_SSE3 )
+#define IPP_SSSE3 ( IPP_SSE3 | ippCPUID_SSSE3 )
+#define IPP_SSE4 IPP_SSSE3
+#define IPP_SSE42 ( IPP_SSSE3 | ippCPUID_SSE41 | ippCPUID_SSE42 )
+#define IPP_AVX ( IPP_SSE42 | ippCPUID_AVX | ippAVX_ENABLEDBYOS | ippCPUID_F16C )
+#define IPP_AVX2 ( IPP_AVX | ippCPUID_MOVBE | ippCPUID_AVX2 | ippCPUID_PREFETCHW )
+#define IPP_AVX512 ( IPP_AVX2 | ippCPUID_AVX512F | ippCPUID_AVX512CD | ippCPUID_AVX512VL | ippCPUID_AVX512BW | ippCPUID_AVX512DQ | ippAVX512_ENABLEDBYOS )
+
+#else
+
+//32-bit architecture
+#define IPP_SSE2 ( ippCPUID_MMX | ippCPUID_SSE | ippCPUID_SSE2 )
+#define IPP_SSE3 IPP_SSE2
+#define IPP_SSE3_MOVBE ( IPP_SSE2 | ippCPUID_SSE3 | ippCPUID_SSSE3 | ippCPUID_MOVBE )
+#define IPP_SSSE3 IPP_SSE2
+#define IPP_SSE4 IPP_SSE2
+#define IPP_SSE42 ( IPP_SSE2 | ippCPUID_SSE3 | ippCPUID_SSSE3 | ippCPUID_SSE41 | ippCPUID_SSE42 )
+#define IPP_AVX ( IPP_SSE42 | ippCPUID_AVX | ippAVX_ENABLEDBYOS |  ippCPUID_F16C )
+#define IPP_AVX2 ( IPP_AVX | ippCPUID_AVX2 | ippCPUID_MOVBE |  ippCPUID_PREFETCHW )
+#define IPP_AVX512 IPP_AVX2
+
+#endif
+
+
+static IppStatus ippInitPreferredCpu(void)
+{
+    IppStatus status = ippStsNoErr;
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_AVX512(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_AVX512);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_AVX2(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_AVX2);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_AVX(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_AVX);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_SSE42(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_SSE42);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_SSE4(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_SSE4);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_SSSE3(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_SSSE3);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_SSE3(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_SSE3);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    }
+
+    return ippInit();
+}
+
+static av_cold int ippscale_init(AVFilterContext *ctx, AVDictionary **opts)
+{
+    IPPScaleContext *scale = ctx->priv;
+    int ret;
+    if (scale->w_expr && !scale->h_expr)
+        FFSWAP(char *, scale->w_expr, scale->size_str);
+
+    if (scale->size_str) {
+        char buf[32];
+        if ((ret = av_parse_video_size(&scale->w, &scale->h, scale->size_str)) < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid size '%s'\n", scale->size_str);
+            return ret;
+        }
+        snprintf(buf, sizeof(buf) - 1, "%d", scale->w);
+        av_opt_set(scale, "w", buf, 0);
+        snprintf(buf, sizeof(buf) - 1, "%d", scale->h);
+        av_opt_set(scale, "h", buf, 0);
+    }
+    if (!scale->w_expr)
+        av_opt_set(scale, "w", "iw", 0);
+    if (!scale->h_expr)
+        av_opt_set(scale, "h", "ih", 0);
+
+    ret = scale_parse_expr(ctx, NULL, &scale->w_pexpr, "width", scale->w_expr);
+    if (ret < 0)
+        return ret;
+
+    ret = scale_parse_expr(ctx, NULL, &scale->h_pexpr, "height", scale->h_expr);
+    if (ret < 0)
+        return ret;
+
+    scale->flags = 0;
+    scale->opts = *opts;
+    *opts = NULL;
+
+    scale->pSpec = NULL;
+    scale->pSpecUV = NULL;
+    scale->pInitBuffer = NULL;
+    scale->pBufferY = NULL;
+    scale->pBufferU = NULL;
+    scale->pBufferV = NULL;
+    scale->tileResizeInfo.numTiles = 1;
+    scale->tileResizeInfoUV.numTiles = 1;
+
+    ret = (int) ippInitPreferredCpu();
+
+    return ret;
+}
+
+static av_cold void ippscale_uninit(AVFilterContext *ctx)
+{
+    IPPScaleContext *scale = ctx->priv;
+
+    if (scale->pBufferY) {
+        for (int i = 0; i < scale->tileResizeInfo.numTiles; i++) {
+            if (scale->pBufferY[i])
+                ippsFree(scale->pBufferY[i]);
+        }
+        av_free(scale->pBufferY);
+    }
+
+   if (scale->pBufferU) {
+        for (int i = 0; i < scale->tileResizeInfoUV.numTiles; i++) {
+            if (scale->pBufferU[i])
+                ippsFree(scale->pBufferU[i]);
+        }
+    av_free(scale->pBufferU);
+    }
+   if (scale->pBufferV) {
+        for (int i = 0; i < scale->tileResizeInfoUV.numTiles; i++) {
+            if (scale->pBufferV[i])
+                ippsFree(scale->pBufferV[i]);
+        }
+    av_free(scale->pBufferV);
+    }
+
+    if (scale->pInitBuffer)
+        ippsFree(scale->pInitBuffer);
+    if (scale->pSpec)
+        ippsFree(scale->pSpec);
+    if (scale->pSpecUV)
+        ippsFree(scale->pSpecUV);
+
+    av_expr_free(scale->w_pexpr);
+    av_expr_free(scale->h_pexpr);
+    scale->w_pexpr = scale->h_pexpr = NULL;
+
+    av_dict_free(&scale->opts);
+}
+
+static int ippscale_query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *fmts_list = ff_make_format_list(supported_formats);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int scale_frame(AVFilterLink *link, AVFrame *in, AVFrame **frame_out)
+{
+    AVFilterContext *ctx = link->dst;
+    IPPScaleContext *scale = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    IppStatus ippSts=0;
+    *frame_out = NULL;
+
+    link->dst->inputs[0]->format = in->format;
+    link->dst->inputs[0]->w = in->width;
+    link->dst->inputs[0]->h = in->height;
+
+    link->dst->inputs[0]->sample_aspect_ratio.den = in->sample_aspect_ratio.den;
+    link->dst->inputs[0]->sample_aspect_ratio.num = in->sample_aspect_ratio.num;
+
+
+    if (in->interlaced_frame) {
+        // tbd do deinterlaced path
+        av_log(ctx, AV_LOG_ERROR, "Interlaced input format is not supported. \n");
+        return AVERROR(ENOSYS);
+    }
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    *frame_out = out;
+
+    av_frame_copy_props(out, in);
+    out->width = outlink->w;
+    out->height = outlink->h;
+
+    // the whole frame processing
+    scale->pSrc[0] = in->data[0]; scale->pSrc[1] = in->data[1]; scale->pSrc[2] = in->data[2];
+    scale->pDst[0] = out->data[0]; scale->pDst[1] = out->data[1]; scale->pDst[2] = out->data[2];
+    scale->srcStep[0] = in->linesize[0]; scale->srcStep[1] = in->linesize[1];  scale->srcStep[2] = in->linesize[2];
+    scale->dstStep[0] = out->linesize[0]; scale->dstStep[1] = out->linesize[1];  scale->dstStep[2] = out->linesize[2];
+
+    if(scale->ipp_threading){
+        ctx->internal->execute(ctx, scale->ippiResize_Y420_LT_ptr, &scale, NULL, scale->tileResizeInfo.numTiles+2*scale->tileResizeInfoUV.numTiles);
+    } else {
+        scale->ippResize_YUV420_func(scale, (const Ipp8u**)in->data, in->linesize, out->data, out->linesize);
+    }
+
+    if (ippSts) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResize_C1R error: ippSts %d \n",
+               ippSts);
+        return ippSts;
+    }
+    av_frame_free(&in);
+    return 0;
+}
+
+static int ippscale_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterContext *ctx = link->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    int ret;
+
+    ret = scale_frame(link, in, &out);
+    if (out)
+        return ff_filter_frame(outlink, out);
+
+    return ret;
+}
+
+#define OFFSET(x) offsetof(IPPScaleContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+#define TFLAGS                                                                 \
+  AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM |                      \
+      AV_OPT_FLAG_RUNTIME_PARAM
+
+static const AVOption ippscale_options[] = {
+    {"w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, .flags = TFLAGS},
+    {"width", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, .flags = TFLAGS},
+    {"h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING,.flags = TFLAGS},
+    {"height", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING,.flags = TFLAGS},
+    {"size", "set video size", OFFSET(size_str),AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS},
+    {"s", "set video size", OFFSET(size_str),AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS},
+    {"force_original_aspect_ratio", "Change w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio),AV_OPT_TYPE_INT, {.i64 = 0}, 0,2, FLAGS, "force_oar"},
+    {"disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
+    {"decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
+    {"increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
+    {"force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
+
+    {"ipp_threading", "Enables IPP scalinfg filter threading usage (ON by default)", OFFSET(ipp_threading),AV_OPT_TYPE_BOOL, {.i64 = 1},0,1, FLAGS},
+    {"ipp_antialiasing", "Enables internal IPP anti-aliasing (OFF by default)", OFFSET(ipp_antialiasing),AV_OPT_TYPE_BOOL, {.i64 = 0},0,1, FLAGS},
+
+    {"interpolation","Interpolation algorithm used for resizing", OFFSET(interpolation), AV_OPT_TYPE_INT,{.i64 = IPPI_INTER_LINEAR},0, INT_MAX,  FLAGS,  "interpolation"},
+    //same as interpolation above, for compatibility with the swscale  filter
+    {"flags", "Interpolation algorithm used for resizing", OFFSET(interpolation), AV_OPT_TYPE_INT,{.i64 = IPPI_INTER_LINEAR},0, INT_MAX,  FLAGS,  "interpolation"},
+    {"nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_NN}, 0, 0, FLAGS, "interpolation"},
+    {"linear", "linear", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_LINEAR}, 0, 0, FLAGS, "interpolation"},
+    {"bilinear", "linear by X and Y, same as linear", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_LINEAR}, 0, 0, FLAGS, "interpolation"},
+    {"cubic", "cubic (B=0, C=1/2)", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_CUBIC2P_CATMULLROM}, 0, 0, FLAGS, "interpolation"},
+    {"super", "supersampling", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_SUPER}, 0, 0, FLAGS, "interpolation"},
+    {"lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_LANCZOS}, 0, 0, FLAGS, "interpolation"},
+    {NULL}};
+
+static const AVClass ippscale_class = {
+    .class_name = "ippscale",
+    .item_name = av_default_item_name,
+    .option = ippscale_options,
+    .version = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVFilterPad ippscale_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = ippscale_filter_frame,
+    },
+    {NULL}
+};
+
+static const AVFilterPad ippscale_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = ippscale_config_props,
+    },
+    {NULL}
+};
+
+AVFilter ff_vf_scale_ipp = {
+    .name = "scale_ipp",
+    .description = NULL_IF_CONFIG_SMALL(
+        "Intel Performance Primitives library based video scaling"),
+    .init_dict = ippscale_init,
+    .uninit = ippscale_uninit,
+    .query_formats = ippscale_query_formats,
+    .priv_size = sizeof(IPPScaleContext),
+    .priv_class = &ippscale_class,
+
+    .inputs = ippscale_inputs,
+    .outputs = ippscale_outputs,
+    .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
-- 
2.31.1.windows.1

