[FFmpeg-devel] [PATCH] VP56DSP patches (PR #21014)

mkver via ffmpeg-devel Tue, 25 Nov 2025 04:18:45 -0800

PR #21014 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21014
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21014.patch



>From a68c465722f642d3b378107a002a04fe53a91da8 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 00:19:58 +0100
Subject: [PATCH 01/10] avcodec/arm/vp6dsp: Remove VP6 edge filter functions

Forgotten in 160ebe0a8d780f6db7c18e824d8ec6f437da33a2.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/arm/Makefile          |   2 -
 libavcodec/arm/vp6dsp_init_arm.c |  39 ----------
 libavcodec/arm/vp6dsp_neon.S     | 121 -------------------------------
 libavcodec/vp56dsp.c             |   4 +-
 libavcodec/vp56dsp.h             |   1 -
 5 files changed, 1 insertion(+), 166 deletions(-)
 delete mode 100644 libavcodec/arm/vp6dsp_init_arm.c
 delete mode 100644 libavcodec/arm/vp6dsp_neon.S

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 811b364195..e32a0bf49f 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -42,7 +42,6 @@ OBJS-$(CONFIG_RV40_DECODER)            += 
arm/rv40dsp_init_arm.o
 OBJS-$(CONFIG_SBC_ENCODER)             += arm/sbcdsp_init_arm.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += arm/mlpdsp_init_arm.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += arm/vorbisdsp_init_arm.o
-OBJS-$(CONFIG_VP6_DECODER)             += arm/vp6dsp_init_arm.o
 OBJS-$(CONFIG_VP9_DECODER)             += arm/vp9dsp_init_10bpp_arm.o   \
                                           arm/vp9dsp_init_12bpp_arm.o   \
                                           arm/vp9dsp_init_arm.o
@@ -139,7 +138,6 @@ NEON-OBJS-$(CONFIG_RV40_DECODER)       += 
arm/rv34dsp_neon.o            \
                                           arm/rv40dsp_neon.o
 NEON-OBJS-$(CONFIG_SBC_ENCODER)        += arm/sbcdsp_neon.o
 NEON-OBJS-$(CONFIG_VORBIS_DECODER)     += arm/vorbisdsp_neon.o
-NEON-OBJS-$(CONFIG_VP6_DECODER)        += arm/vp6dsp_neon.o
 NEON-OBJS-$(CONFIG_VP9_DECODER)        += arm/vp9itxfm_16bpp_neon.o     \
                                           arm/vp9itxfm_neon.o           \
                                           arm/vp9lpf_16bpp_neon.o       \
diff --git a/libavcodec/arm/vp6dsp_init_arm.c b/libavcodec/arm/vp6dsp_init_arm.c
deleted file mode 100644
index a59d61278c..0000000000
--- a/libavcodec/arm/vp6dsp_init_arm.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2010 Mans Rullgard <[email protected]>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "libavutil/attributes.h"
-#include "libavutil/arm/cpu.h"
-
-#include "libavcodec/vp56dsp.h"
-
-void ff_vp6_edge_filter_hor_neon(uint8_t *yuv, ptrdiff_t stride, int t);
-void ff_vp6_edge_filter_ver_neon(uint8_t *yuv, ptrdiff_t stride, int t);
-
-av_cold void ff_vp6dsp_init_arm(VP56DSPContext *s)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (have_neon(cpu_flags)) {
-        s->edge_filter_hor = ff_vp6_edge_filter_hor_neon;
-        s->edge_filter_ver = ff_vp6_edge_filter_ver_neon;
-    }
-}
diff --git a/libavcodec/arm/vp6dsp_neon.S b/libavcodec/arm/vp6dsp_neon.S
deleted file mode 100644
index 03dd28d1cb..0000000000
--- a/libavcodec/arm/vp6dsp_neon.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2010 Mans Rullgard <[email protected]>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/arm/asm.S"
-
-.macro  vp6_edge_filter
-        vdup.16         q3,  r2                 @ t
-        vmov.i16        q13, #1
-        vsubl.u8        q0,  d20, d18           @ p[   0] - p[-s]
-        vsubl.u8        q1,  d16, d22           @ p[-2*s] - p[ s]
-        vsubl.u8        q14, d21, d19
-        vsubl.u8        q15, d17, d23
-        vadd.i16        q2,  q0,  q0            @ 2*(p[0]-p[-s])
-        vadd.i16        d29, d28, d28
-        vadd.i16        q0,  q0,  q1            @    p[0]-p[-s]  + p[-2*s]-p[s]
-        vadd.i16        d28, d28, d30
-        vadd.i16        q0,  q0,  q2            @ 3*(p[0]-p[-s]) + p[-2*s]-p[s]
-        vadd.i16        d28, d28, d29
-        vrshr.s16       q0,  q0,  #3            @ v
-        vrshr.s16       d28, d28, #3
-        vsub.i16        q8,  q3,  q13           @ t-1
-        vabs.s16        q1,  q0                 @ V
-        vshr.s16        q2,  q0,  #15           @ s
-        vabs.s16        d30, d28
-        vshr.s16        d29, d28, #15
-        vsub.i16        q12, q1,  q3            @ V-t
-        vsub.i16        d31, d30, d6
-        vsub.i16        q12, q12, q13           @ V-t-1
-        vsub.i16        d31, d31, d26
-        vcge.u16        q12, q12, q8            @ V-t-1 >= t-1
-        vcge.u16        d31, d31, d16
-        vadd.i16        q13, q3,  q3            @ 2*t
-        vadd.i16        d16, d6,  d6
-        vsub.i16        q13, q13, q1            @ 2*t - V
-        vsub.i16        d16, d16, d30
-        vadd.i16        q13, q13, q2            @ += s
-        vadd.i16        d16, d16, d29
-        veor            q13, q13, q2            @ ^= s
-        veor            d16, d16, d29
-        vbif            q0,  q13, q12
-        vbif            d28, d16, d31
-        vmovl.u8        q1,  d20
-        vmovl.u8        q15, d21
-        vaddw.u8        q2,  q0,  d18
-        vaddw.u8        q3,  q14, d19
-        vsub.i16        q1,  q1,  q0
-        vsub.i16        d30, d30, d28
-        vqmovun.s16     d18, q2
-        vqmovun.s16     d19, q3
-        vqmovun.s16     d20, q1
-        vqmovun.s16     d21, q15
-.endm
-
-function ff_vp6_edge_filter_ver_neon, export=1
-        sub             r0,  r0,  r1,  lsl #1
-        vld1.8          {q8},     [r0], r1      @ p[-2*s]
-        vld1.8          {q9},     [r0], r1      @ p[-s]
-        vld1.8          {q10},    [r0], r1      @ p[0]
-        vld1.8          {q11},    [r0]          @ p[s]
-        vp6_edge_filter
-        sub             r0,  r0,  r1,  lsl #1
-        sub             r1,  r1,  #8
-        vst1.8          {d18},    [r0]!
-        vst1.32         {d19[0]}, [r0], r1
-        vst1.8          {d20},    [r0]!
-        vst1.32         {d21[0]}, [r0]
-        bx              lr
-endfunc
-
-function ff_vp6_edge_filter_hor_neon, export=1
-        sub             r3,  r0,  #1
-        sub             r0,  r0,  #2
-        vld1.32         {d16[0]}, [r0], r1
-        vld1.32         {d18[0]}, [r0], r1
-        vld1.32         {d20[0]}, [r0], r1
-        vld1.32         {d22[0]}, [r0], r1
-        vld1.32         {d16[1]}, [r0], r1
-        vld1.32         {d18[1]}, [r0], r1
-        vld1.32         {d20[1]}, [r0], r1
-        vld1.32         {d22[1]}, [r0], r1
-        vld1.32         {d17[0]}, [r0], r1
-        vld1.32         {d19[0]}, [r0], r1
-        vld1.32         {d21[0]}, [r0], r1
-        vld1.32         {d23[0]}, [r0], r1
-        vtrn.8          q8,  q9
-        vtrn.8          q10, q11
-        vtrn.16         q8,  q10
-        vtrn.16         q9,  q11
-        vp6_edge_filter
-        vtrn.8          q9,  q10
-        vst1.16         {d18[0]}, [r3], r1
-        vst1.16         {d20[0]}, [r3], r1
-        vst1.16         {d18[1]}, [r3], r1
-        vst1.16         {d20[1]}, [r3], r1
-        vst1.16         {d18[2]}, [r3], r1
-        vst1.16         {d20[2]}, [r3], r1
-        vst1.16         {d18[3]}, [r3], r1
-        vst1.16         {d20[3]}, [r3], r1
-        vst1.16         {d19[0]}, [r3], r1
-        vst1.16         {d21[0]}, [r3], r1
-        vst1.16         {d19[1]}, [r3], r1
-        vst1.16         {d21[1]}, [r3], r1
-        bx              lr
-endfunc
diff --git a/libavcodec/vp56dsp.c b/libavcodec/vp56dsp.c
index a668712384..1ff67b1c87 100644
--- a/libavcodec/vp56dsp.c
+++ b/libavcodec/vp56dsp.c
@@ -77,9 +77,7 @@ av_cold void ff_vp6dsp_init(VP56DSPContext *s)
 {
     s->vp6_filter_diag4 = ff_vp6_filter_diag4_c;
 
-#if ARCH_ARM
-    ff_vp6dsp_init_arm(s);
-#elif ARCH_X86
+#if ARCH_X86
     ff_vp6dsp_init_x86(s);
 #endif
 }
diff --git a/libavcodec/vp56dsp.h b/libavcodec/vp56dsp.h
index e35e232ea3..f2cbb41a1e 100644
--- a/libavcodec/vp56dsp.h
+++ b/libavcodec/vp56dsp.h
@@ -38,7 +38,6 @@ void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, 
ptrdiff_t stride,
 void ff_vp5dsp_init(VP56DSPContext *s);
 void ff_vp6dsp_init(VP56DSPContext *s);
 
-void ff_vp6dsp_init_arm(VP56DSPContext *s);
 void ff_vp6dsp_init_x86(VP56DSPContext *s);
 
 #endif /* AVCODEC_VP56DSP_H */
-- 
2.49.1


>From 9c7542c2575563398cb1d8a2b2356a8d7c495baf Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 00:28:32 +0100
Subject: [PATCH 02/10] avcodec/vp56: Fix indentation

Forgotten in 160ebe0a8d780f6db7c18e824d8ec6f437da33a2.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/vp56.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index dc3ae70c66..0ddf7c985c 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -325,9 +325,9 @@ static void vp56_deblock_filter(VP56Context *s, uint8_t 
*yuv,
                                 ptrdiff_t stride, int dx, int dy)
 {
     if (s->avctx->codec->id == AV_CODEC_ID_VP5) {
-    int t = ff_vp56_filter_threshold[s->quantizer];
-    if (dx)  s->vp56dsp.edge_filter_hor(yuv +         10-dx , stride, t);
-    if (dy)  s->vp56dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
+        int t = ff_vp56_filter_threshold[s->quantizer];
+        if (dx)  s->vp56dsp.edge_filter_hor(yuv +         10-dx , stride, t);
+        if (dy)  s->vp56dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
     } else {
         int * bounding_values = s->bounding_values_array + 127;
         if (dx)
-- 
2.49.1


>From fc33633b4459e8cac10b76e95b685254e2dd6eab Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 00:44:47 +0100
Subject: [PATCH 03/10] avcodec/vp56dsp: Separate VP5DSP and VP6DSP

They don't have anything in common since
160ebe0a8d780f6db7c18e824d8ec6f437da33a2.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 configure                          |  5 ++---
 libavcodec/Makefile                |  4 ++--
 libavcodec/vp5.c                   |  2 +-
 libavcodec/vp56.c                  |  4 ++--
 libavcodec/vp56.h                  |  5 ++++-
 libavcodec/vp56dsp.h               | 15 +++++++--------
 libavcodec/{vp56dsp.c => vp5dsp.c} | 17 +----------------
 libavcodec/vp6.c                   |  4 ++--
 libavcodec/vp6dsp.c                | 13 +++++++++++--
 libavcodec/x86/vp6dsp_init.c       |  2 +-
 10 files changed, 33 insertions(+), 38 deletions(-)
 rename libavcodec/{vp56dsp.c => vp5dsp.c} (87%)

diff --git a/configure b/configure
index 99734e9d03..c135ce3a75 100755
--- a/configure
+++ b/configure
@@ -2701,7 +2701,6 @@ CONFIG_EXTRA="
     vc1dsp
     videodsp
     vp3dsp
-    vp56dsp
     vp8dsp
     vulkan_encode
     vvc_sei
@@ -3197,8 +3196,8 @@ vc1image_decoder_select="vc1_decoder"
 vorbis_encoder_select="audio_frame_queue"
 vp3_decoder_select="hpeldsp vp3dsp videodsp"
 vp4_decoder_select="vp3_decoder"
-vp5_decoder_select="h264chroma hpeldsp videodsp vp3dsp vp56dsp"
-vp6_decoder_select="h264chroma hpeldsp huffman videodsp vp3dsp vp56dsp"
+vp5_decoder_select="h264chroma hpeldsp videodsp vp3dsp"
+vp6_decoder_select="h264chroma hpeldsp huffman videodsp vp3dsp"
 vp6a_decoder_select="vp6_decoder"
 vp6f_decoder_select="vp6_decoder"
 vp7_decoder_select="h264pred videodsp vp8dsp"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 0cd2408865..3ed3188a9a 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -182,7 +182,6 @@ OBJS-$(CONFIG_AV1_AMF_DECODER)         += amfdec.o
 OBJS-$(CONFIG_VC1DSP)                  += vc1dsp.o
 OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
 OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
-OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
 OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
 OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o 
v4l2_buffers.o v4l2_fmt.o
 OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
@@ -803,7 +802,8 @@ OBJS-$(CONFIG_VORBIS_DECODER)          += vorbisdec.o 
vorbisdsp.o vorbis.o \
 OBJS-$(CONFIG_VORBIS_ENCODER)          += vorbisenc.o vorbis.o \
                                           vorbis_data.o
 OBJS-$(CONFIG_VP3_DECODER)             += vp3.o jpegquanttables.o
-OBJS-$(CONFIG_VP5_DECODER)             += vp5.o vp56.o vp56data.o vpx_rac.o
+OBJS-$(CONFIG_VP5_DECODER)             += vp5.o vp56.o vp56data.o \
+                                          vp5dsp.o vpx_rac.o
 OBJS-$(CONFIG_VP6_DECODER)             += vp6.o vp56.o vp56data.o \
                                           vp6dsp.o vpx_rac.o
 OBJS-$(CONFIG_VP7_DECODER)             += vp8.o vp8data.o vpx_rac.o
diff --git a/libavcodec/vp5.c b/libavcodec/vp5.c
index 77b479471b..98b8cf41f2 100644
--- a/libavcodec/vp5.c
+++ b/libavcodec/vp5.c
@@ -285,7 +285,7 @@ static av_cold int vp5_decode_init(AVCodecContext *avctx)
 
     if ((ret = ff_vp56_init_context(avctx, s, 1, 0)) < 0)
         return ret;
-    ff_vp5dsp_init(&s->vp56dsp);
+    ff_vp5dsp_init(&s->vp5dsp);
     s->vp56_coord_div = vp5_coord_div;
     s->parse_vector_adjustment = vp5_parse_vector_adjustment;
     s->parse_coeff = vp5_parse_coeff;
diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index 0ddf7c985c..0d13d7a276 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -326,8 +326,8 @@ static void vp56_deblock_filter(VP56Context *s, uint8_t 
*yuv,
 {
     if (s->avctx->codec->id == AV_CODEC_ID_VP5) {
         int t = ff_vp56_filter_threshold[s->quantizer];
-        if (dx)  s->vp56dsp.edge_filter_hor(yuv +         10-dx , stride, t);
-        if (dy)  s->vp56dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
+        if (dx)  s->vp5dsp.edge_filter_hor(yuv +         10-dx , stride, t);
+        if (dy)  s->vp5dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
     } else {
         int * bounding_values = s->bounding_values_array + 127;
         if (dx)
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index af46e2f188..6610fc2892 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -118,7 +118,10 @@ struct vp56_context {
     HpelDSPContext hdsp;
     VideoDSPContext vdsp;
     VP3DSPContext vp3dsp;
-    VP56DSPContext vp56dsp;
+    union {
+        VP5DSPContext vp5dsp;
+        VP6DSPContext vp6dsp;
+    };
     uint8_t idct_scantable[64];
     AVFrame *frames[4];
     uint8_t *edge_emu_buffer_alloc;
diff --git a/libavcodec/vp56dsp.h b/libavcodec/vp56dsp.h
index f2cbb41a1e..692fd0c8ac 100644
--- a/libavcodec/vp56dsp.h
+++ b/libavcodec/vp56dsp.h
@@ -24,20 +24,19 @@
 #include <stddef.h>
 #include <stdint.h>
 
-typedef struct VP56DSPContext {
+typedef struct VP5DSPContext {
     void (*edge_filter_hor)(uint8_t *yuv, ptrdiff_t stride, int t);
     void (*edge_filter_ver)(uint8_t *yuv, ptrdiff_t stride, int t);
+} VP5DSPContext;
 
+typedef struct VP6DSPContext {
     void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
                              const int16_t *h_weights,const int16_t 
*v_weights);
-} VP56DSPContext;
+} VP6DSPContext;
 
-void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
-                           const int16_t *h_weights, const int16_t *v_weights);
+void ff_vp5dsp_init(VP5DSPContext *s);
 
-void ff_vp5dsp_init(VP56DSPContext *s);
-void ff_vp6dsp_init(VP56DSPContext *s);
-
-void ff_vp6dsp_init_x86(VP56DSPContext *s);
+void ff_vp6dsp_init(VP6DSPContext *s);
+void ff_vp6dsp_init_x86(VP6DSPContext *s);
 
 #endif /* AVCODEC_VP56DSP_H */
diff --git a/libavcodec/vp56dsp.c b/libavcodec/vp5dsp.c
similarity index 87%
rename from libavcodec/vp56dsp.c
rename to libavcodec/vp5dsp.c
index 1ff67b1c87..a06c2cfd5f 100644
--- a/libavcodec/vp56dsp.c
+++ b/libavcodec/vp5dsp.c
@@ -21,8 +21,6 @@
 
 #include <stdint.h>
 
-#include "config.h"
-#include "config_components.h"
 #include "libavutil/attributes.h"
 #include "vp56dsp.h"
 #include "libavutil/common.h"
@@ -43,7 +41,6 @@ static void pfx ## _edge_filter_ ## suf(uint8_t *yuv, 
ptrdiff_t stride, \
     }                                                                   \
 }
 
-#if CONFIG_VP5_DECODER
 /* Gives very similar result than the vp6 version except in a few cases */
 static int vp5_adjust(int v, int t)
 {
@@ -65,20 +62,8 @@ static int vp5_adjust(int v, int t)
 VP56_EDGE_FILTER(vp5, hor, 1, stride)
 VP56_EDGE_FILTER(vp5, ver, stride, 1)
 
-av_cold void ff_vp5dsp_init(VP56DSPContext *s)
+av_cold void ff_vp5dsp_init(VP5DSPContext *s)
 {
     s->edge_filter_hor = vp5_edge_filter_hor;
     s->edge_filter_ver = vp5_edge_filter_ver;
 }
-#endif /* CONFIG_VP5_DECODER */
-
-#if CONFIG_VP6_DECODER
-av_cold void ff_vp6dsp_init(VP56DSPContext *s)
-{
-    s->vp6_filter_diag4 = ff_vp6_filter_diag4_c;
-
-#if ARCH_X86
-    ff_vp6dsp_init_x86(s);
-#endif
-}
-#endif /* CONFIG_VP6_DECODER */
diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index 48ff9da818..3f4bd42d07 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -641,7 +641,7 @@ static void vp6_filter(VP56Context *s, uint8_t *dst, 
uint8_t *src,
             vp6_filter_hv4(dst, src+offset1, stride, stride,
                            vp6_block_copy_filter[select][y8]);
         } else {
-            s->vp56dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31), 
stride,
+            s->vp6dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31), 
stride,
                              vp6_block_copy_filter[select][x8],
                              vp6_block_copy_filter[select][y8]);
         }
@@ -661,7 +661,7 @@ static av_cold int vp6_decode_init_context(AVCodecContext 
*avctx,
     if (ret < 0)
         return ret;
 
-    ff_vp6dsp_init(&s->vp56dsp);
+    ff_vp6dsp_init(&s->vp6dsp);
 
     s->deblock_filtering = 0;
     s->vp56_coord_div = vp6_coord_div;
diff --git a/libavcodec/vp6dsp.c b/libavcodec/vp6dsp.c
index f7f6856330..76c4983960 100644
--- a/libavcodec/vp6dsp.c
+++ b/libavcodec/vp6dsp.c
@@ -27,8 +27,8 @@
 #include "vp56dsp.h"
 
 
-void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
-                           const int16_t *h_weights, const int16_t *v_weights)
+static void vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                               const int16_t *h_weights, const int16_t 
*v_weights)
 {
     int x, y;
     int tmp[8*11];
@@ -59,3 +59,12 @@ void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, 
ptrdiff_t stride,
         t += 8;
     }
 }
+
+av_cold void ff_vp6dsp_init(VP6DSPContext *s)
+{
+    s->vp6_filter_diag4 = vp6_filter_diag4_c;
+
+#if ARCH_X86
+    ff_vp6dsp_init_x86(s);
+#endif
+}
diff --git a/libavcodec/x86/vp6dsp_init.c b/libavcodec/x86/vp6dsp_init.c
index 83d45ec36c..07e3becaec 100644
--- a/libavcodec/x86/vp6dsp_init.c
+++ b/libavcodec/x86/vp6dsp_init.c
@@ -28,7 +28,7 @@
 void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
                               const int16_t *h_weights,const int16_t 
*v_weights);
 
-av_cold void ff_vp6dsp_init_x86(VP56DSPContext *c)
+av_cold void ff_vp6dsp_init_x86(VP6DSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
 
-- 
2.49.1


>From b2f692cf2f4dc05ed01b236a5b0df4511437c978 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 01:00:58 +0100
Subject: [PATCH 04/10] avcodec/vp6dsp: Constify source in vp6_filter_diag4

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/vp56dsp.h         | 2 +-
 libavcodec/vp6dsp.c          | 2 +-
 libavcodec/x86/vp6dsp_init.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vp56dsp.h b/libavcodec/vp56dsp.h
index 692fd0c8ac..3981de4015 100644
--- a/libavcodec/vp56dsp.h
+++ b/libavcodec/vp56dsp.h
@@ -30,7 +30,7 @@ typedef struct VP5DSPContext {
 } VP5DSPContext;
 
 typedef struct VP6DSPContext {
-    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+    void (*vp6_filter_diag4)(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride,
                              const int16_t *h_weights,const int16_t 
*v_weights);
 } VP6DSPContext;
 
diff --git a/libavcodec/vp6dsp.c b/libavcodec/vp6dsp.c
index 76c4983960..bdaa054307 100644
--- a/libavcodec/vp6dsp.c
+++ b/libavcodec/vp6dsp.c
@@ -27,7 +27,7 @@
 #include "vp56dsp.h"
 
 
-static void vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+static void vp6_filter_diag4_c(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride,
                                const int16_t *h_weights, const int16_t 
*v_weights)
 {
     int x, y;
diff --git a/libavcodec/x86/vp6dsp_init.c b/libavcodec/x86/vp6dsp_init.c
index 07e3becaec..db9a95767e 100644
--- a/libavcodec/x86/vp6dsp_init.c
+++ b/libavcodec/x86/vp6dsp_init.c
@@ -25,7 +25,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/vp56dsp.h"
 
-void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+void ff_vp6_filter_diag4_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t 
stride,
                               const int16_t *h_weights,const int16_t 
*v_weights);
 
 av_cold void ff_vp6dsp_init_x86(VP6DSPContext *c)
-- 
2.49.1


>From 7cf5a977d4079224f78533ff97f045fbb3c2c11c Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 10:53:41 +0100
Subject: [PATCH 05/10] tests/checkasm: Test VP6DSP

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 tests/checkasm/Makefile   |  1 +
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/vp6dsp.c   | 93 +++++++++++++++++++++++++++++++++++++++
 tests/fate/checkasm.mak   |  1 +
 5 files changed, 99 insertions(+)
 create mode 100644 tests/checkasm/vp6dsp.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 6636bc7774..3762c0d83b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -50,6 +50,7 @@ AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER)   += utvideodsp.o
 AVCODECOBJS-$(CONFIG_V210_DECODER)      += v210dec.o
 AVCODECOBJS-$(CONFIG_V210_ENCODER)      += v210enc.o
 AVCODECOBJS-$(CONFIG_VORBIS_DECODER)    += vorbisdsp.o
+AVCODECOBJS-$(CONFIG_VP6_DECODER)       += vp6dsp.o
 AVCODECOBJS-$(CONFIG_VP9_DECODER)       += vp9dsp.o
 AVCODECOBJS-$(CONFIG_VVC_DECODER)       += vvc_alf.o vvc_mc.o vvc_sao.o
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 20d8f19757..8c64684fa3 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -254,6 +254,9 @@ static const struct {
     #if CONFIG_VP3DSP
         { "vp3dsp", checkasm_check_vp3dsp },
     #endif
+    #if CONFIG_VP6_DECODER
+        { "vp6dsp", checkasm_check_vp6dsp },
+    #endif
     #if CONFIG_VP8DSP
         { "vp8dsp", checkasm_check_vp8dsp },
     #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 45cd23cac4..bd33aba263 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -154,6 +154,7 @@ void checkasm_check_vf_hflip(void);
 void checkasm_check_vf_threshold(void);
 void checkasm_check_vf_sobel(void);
 void checkasm_check_vp3dsp(void);
+void checkasm_check_vp6dsp(void);
 void checkasm_check_vp8dsp(void);
 void checkasm_check_vp9dsp(void);
 void checkasm_check_videodsp(void);
diff --git a/tests/checkasm/vp6dsp.c b/tests/checkasm/vp6dsp.c
new file mode 100644
index 0000000000..a5f1c9c2fc
--- /dev/null
+++ b/tests/checkasm/vp6dsp.c
@@ -0,0 +1,93 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "checkasm.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/macros.h"
+#include "libavutil/mem_internal.h"
+#include "libavcodec/vp6data.h"
+#include "libavcodec/vp56dsp.h"
+
+#define randomize_buffer(buf)                                   \
+    do {                                                        \
+        for (size_t k = 0; k < (sizeof(buf) & ~3); k += 4)      \
+            AV_WN32A(buf + k, rnd());                           \
+        for (size_t k = sizeof(buf) & ~3; k < sizeof(buf); ++k) \
+            buf[k] = rnd();                                     \
+    } while (0)
+
+
+void checkasm_check_vp6dsp(void)
+{
+    enum {
+        BLOCK_SIZE_1D  = 8,
+        SRC_ROWS_ABOVE = 1,
+        SRC_ROWS_BELOW = 2,
+        SRC_COLS_LEFT  = 1,
+        SRC_COLS_RIGHT = 2,
+        SRC_ROWS       = SRC_ROWS_ABOVE + BLOCK_SIZE_1D + SRC_ROWS_BELOW,
+        SRC_ROW_SIZE   = SRC_COLS_LEFT  + BLOCK_SIZE_1D + SRC_COLS_RIGHT,
+        MAX_STRIDE     = 64,    ///< arbitrary
+        SRC_BUF_SIZE   = (SRC_ROWS - 1) * MAX_STRIDE + SRC_ROW_SIZE + 7 /* to 
vary misalignment */,
+        DST_BUF_SIZE   = (BLOCK_SIZE_1D - 1) * MAX_STRIDE + BLOCK_SIZE_1D,
+    };
+    VP6DSPContext vp6dsp;
+
+    ff_vp6dsp_init(&vp6dsp);
+
+    declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                       const int16_t *h_weights, const int16_t *v_weights);
+
+    if (check_func(vp6dsp.vp6_filter_diag4, "filter_diag4")) {
+        DECLARE_ALIGNED(8, uint8_t, dstbuf_ref)[DST_BUF_SIZE];
+        DECLARE_ALIGNED(8, uint8_t, dstbuf_new)[DST_BUF_SIZE];
+        DECLARE_ALIGNED(8, uint8_t, srcbuf)[SRC_BUF_SIZE];
+
+        randomize_buffer(dstbuf_ref);
+        randomize_buffer(srcbuf);
+        memcpy(dstbuf_new, dstbuf_ref, sizeof(dstbuf_new));
+
+        ptrdiff_t  stride = (rnd() % (MAX_STRIDE / 16) + 1) * 16;
+        const uint8_t *src = srcbuf + SRC_COLS_LEFT + rnd() % 8U;
+        uint8_t *dst_new = dstbuf_new, *dst_ref = dstbuf_ref;
+
+        if (rnd() & 1) {
+            dst_new += (BLOCK_SIZE_1D - 1) * stride;
+            dst_ref += (BLOCK_SIZE_1D - 1) * stride;
+            src     += (SRC_ROWS - 1) * stride;
+            stride  *= -1;
+        }
+        src += SRC_ROWS_ABOVE * stride;
+
+        unsigned select = rnd() % FF_ARRAY_ELEMS(vp6_block_copy_filter);
+        unsigned x8 = 1 + rnd() % (FF_ARRAY_ELEMS(vp6_block_copy_filter[0]) - 
1);
+        unsigned y8 = 1 + rnd() % (FF_ARRAY_ELEMS(vp6_block_copy_filter[0]) - 
1);
+        const int16_t *h_weights = vp6_block_copy_filter[select][x8];
+        const int16_t *v_weights = vp6_block_copy_filter[select][y8];
+
+        call_ref(dst_ref, src, stride, h_weights, v_weights);
+        call_new(dst_new, src, stride, h_weights, v_weights);
+        if (memcmp(dstbuf_new, dstbuf_ref, sizeof(dstbuf_new)))
+            fail();
+        bench_new(dst_new, src, stride, h_weights, v_weights);
+    }
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 2be880c8db..f182efde46 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -76,6 +76,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                       
          \
                 fate-checkasm-videodsp                                  \
                 fate-checkasm-vorbisdsp                                 \
                 fate-checkasm-vp3dsp                                    \
+                fate-checkasm-vp6dsp                                    \
                 fate-checkasm-vp8dsp                                    \
                 fate-checkasm-vp9dsp                                    \
                 fate-checkasm-vvc_alf                                   \
-- 
2.49.1


>From 5b0d9eed695ba95421277845b664670b1c7ab9fd Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 10:57:39 +0100
Subject: [PATCH 06/10] avcodec/x86/vp6dsp: Fix outdated comment

Forgotten in 6cb3ee80b3b58d692a722fb38ee05f170ae8b0d2.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vp6dsp.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index 0106541734..61336f6465 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -1,5 +1,5 @@
 ;******************************************************************************
-;* MMX/SSE2-optimized functions for the VP6 decoder
+;* SSE2-optimized functions for the VP6 decoder
 ;* Copyright (C) 2009  Sebastien Lucas <[email protected]>
 ;* Copyright (C) 2009  Zuxy Meng <[email protected]>
 ;*
-- 
2.49.1


>From 6dd59e3cdd55edce907d884230d1f172813b97b7 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 11:15:15 +0100
Subject: [PATCH 07/10] avcodec/x86/vp6dsp: Don't align the stack manually

For most systems (particularly all x64), the stack is already
guaranteed to be sufficiently aligned. So just use x86inc's
stack feature which does the right thing.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vp6dsp.asm | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index 61336f6465..a9340ed05b 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -62,11 +62,7 @@ SECTION .text
 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
 ;                                const int16_t h_weight[4], const int16_t 
v_weights[4])
 INIT_XMM sse2
-cglobal vp6_filter_diag4, 5, 7, 8
-    mov          r5, rsp         ; backup stack pointer
-    and         rsp, ~(mmsize-1) ; align stack
-    sub         rsp, 8*11
-
+cglobal vp6_filter_diag4, 5, 6, 8, -8*11
     sub          r1, r2
 
     pxor         m7, m7
@@ -74,25 +70,24 @@ cglobal vp6_filter_diag4, 5, 7, 8
     SPLAT4REGS
 
     mov          r3, rsp
-    mov          r6, 11
+    mov         r5d, 11
 .nextrow:
     DIAG4        r1, -1, 0, 1, 2, r3
     add          r3, 8
     add          r1, r2
-    dec          r6
+    dec         r5d
     jnz .nextrow
 
     movq         m3, [r4]
     SPLAT4REGS
 
     lea          r3, [rsp+8]
-    mov          r6, 8
+    mov         r1d, 8
 .nextcol:
     DIAG4        r3, -8, 0, 8, 16, r0
     add          r3, 8
     add          r0, r2
-    dec          r6
+    dec         r1d
     jnz .nextcol
 
-    mov         rsp, r5          ; restore stack pointer
     RET
-- 
2.49.1


>From 0e440771dacd71e918694456c02a3a3344c97264 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 11:22:45 +0100
Subject: [PATCH 08/10] avcodec/x86/vp6dsp: Simplify splatting

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vp6dsp.asm | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index a9340ed05b..83b26d03cd 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -49,14 +49,11 @@ SECTION .text
 %endmacro
 
 %macro SPLAT4REGS 0
-    pshuflw      m4, m3, 0x0
-    pshuflw      m5, m3, 0x55
-    pshuflw      m6, m3, 0xAA
-    pshuflw      m3, m3, 0xFF
-    punpcklqdq   m4, m4
-    punpcklqdq   m5, m5
-    punpcklqdq   m6, m6
-    punpcklqdq   m3, m3
+    punpcklwd    m3, m3
+    pshufd       m4, m3, 0x0
+    pshufd       m5, m3, 0x55
+    pshufd       m6, m3, 0xAA
+    pshufd       m3, m3, 0xFF
 %endmacro
 
 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
-- 
2.49.1


>From ca983cb934190a6bb6ac3a9ec056750b06514f37 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 11:27:16 +0100
Subject: [PATCH 09/10] avcodec/x86/vp6dsp: Avoid saturated addition

Only the two middle coefficients are so huge that overflow can happen.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vp6dsp.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index 83b26d03cd..b9b562f84f 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -38,11 +38,11 @@ SECTION .text
     movq          m2, [%1+%5]
     punpcklbw     m1, m7
     punpcklbw     m2, m7
+    paddw         m0, [pw_64]    ; Add 64
     pmullw        m1, m6         ; src[x+8 ] * biweight [2]
     pmullw        m2, m3         ; src[x+16] * biweight [3]
     paddw         m1, m2
     paddsw        m0, m1
-    paddsw        m0, [pw_64]    ; Add 64
     psraw         m0, 7
     packuswb      m0, m0
     movq        [%6], m0
-- 
2.49.1


>From 5b1ffeb1bd7d5ecdd74d1ded43d79e0924133116 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 25 Nov 2025 11:46:40 +0100
Subject: [PATCH 10/10] avcodec/x86/vp6dsp: Avoid packing+unpacking

Store the intermediate values as words, clipped to the 0..255 range
instead.

Old benchmarks:
filter_diag4_c:                                        353.4 ( 1.00x)
filter_diag4_sse2:                                      57.5 ( 6.15x)

New benchmarks:
filter_diag4_c:                                        350.6 ( 1.00x)
filter_diag4_sse2:                                      55.1 ( 6.36x)

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vp6dsp.asm | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index b9b562f84f..1f7443db69 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -26,26 +26,41 @@ cextern pw_64
 
 SECTION .text
 
-%macro DIAG4 6
+%macro DIAG4 7
+%if %7
+    mova          m0, [%1+%2]
+    mova          m1, [%1+%3]
+%else
     movq          m0, [%1+%2]
     movq          m1, [%1+%3]
     punpcklbw     m0, m7
     punpcklbw     m1, m7
+%endif
     pmullw        m0, m4         ; src[x-8 ] * biweight [0]
     pmullw        m1, m5         ; src[x   ] * biweight [1]
     paddw         m0, m1
+%if %7
+    mova          m1, [%1+%4]
+    mova          m2, [%1+%5]
+%else
     movq          m1, [%1+%4]
     movq          m2, [%1+%5]
     punpcklbw     m1, m7
     punpcklbw     m2, m7
+%endif
     paddw         m0, [pw_64]    ; Add 64
     pmullw        m1, m6         ; src[x+8 ] * biweight [2]
     pmullw        m2, m3         ; src[x+16] * biweight [3]
     paddw         m1, m2
     paddsw        m0, m1
     psraw         m0, 7
+%if %7
     packuswb      m0, m0
     movq        [%6], m0
+%else
+    pmaxsw        m0, m7         ; clip to 0-255 range
+    mova        [%6], m0
+%endif
 %endmacro
 
 %macro SPLAT4REGS 0
@@ -59,7 +74,7 @@ SECTION .text
 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
 ;                                const int16_t h_weight[4], const int16_t 
v_weights[4])
 INIT_XMM sse2
-cglobal vp6_filter_diag4, 5, 6, 8, -8*11
+cglobal vp6_filter_diag4, 5, 6, 8, -16*11
     sub          r1, r2
 
     pxor         m7, m7
@@ -69,8 +84,8 @@ cglobal vp6_filter_diag4, 5, 6, 8, -8*11
     mov          r3, rsp
     mov         r5d, 11
 .nextrow:
-    DIAG4        r1, -1, 0, 1, 2, r3
-    add          r3, 8
+    DIAG4        r1, -1, 0, 1, 2, r3, 0
+    add          r3, 16
     add          r1, r2
     dec         r5d
     jnz .nextrow
@@ -78,11 +93,11 @@ cglobal vp6_filter_diag4, 5, 6, 8, -8*11
     movq         m3, [r4]
     SPLAT4REGS
 
-    lea          r3, [rsp+8]
+    lea          r3, [rsp+16]
     mov         r1d, 8
 .nextcol:
-    DIAG4        r3, -8, 0, 8, 16, r0
-    add          r3, 8
+    DIAG4        r3, -16, 0, 16, 32, r0, 1
+    add          r3, 16
     add          r0, r2
     dec         r1d
     jnz .nextcol
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PATCH] VP56DSP patches (PR #21014)

Reply via email to