This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 9b84b8682f2629a23ccebd68af9f82c701a308e4
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Wed Mar 4 22:16:10 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Mon Mar 9 10:17:26 2026 +0100

    avutil/riscv: Add rvv optimizations for pixelutils
    
    Adapted from the corresponding me_cmp code.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavutil/pixelutils.c                             |  4 ++
 libavutil/riscv/Makefile                           |  1 +
 .../rv34dsp_init.c => libavutil/riscv/pixelutils.h | 25 +++++---
 libavutil/riscv/pixelutils_rvv.S                   | 71 ++++++++++++++++++++++
 4 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c
index 95cf34282b..6658730724 100644
--- a/libavutil/pixelutils.c
+++ b/libavutil/pixelutils.c
@@ -30,6 +30,8 @@
 
 #if ARCH_AARCH64 && HAVE_NEON
 #include "aarch64/pixelutils.h"
+#elif ARCH_RISCV
+#include "riscv/pixelutils.h"
 #elif ARCH_X86 && HAVE_X86ASM
 #include "x86/pixelutils.h"
 #endif
@@ -92,6 +94,8 @@ av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int 
h_bits, int aligne
 
 #if ARCH_AARCH64 && HAVE_NEON
     ff_pixelutils_sad_init_aarch64(sad, aligned);
+#elif ARCH_RISCV
+    ff_pixelutils_init_riscv(sad, aligned);
 #elif ARCH_X86 && HAVE_X86ASM
     ff_pixelutils_sad_init_x86(sad, aligned);
 #endif
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
index 5db4c432d9..82a534824a 100644
--- a/libavutil/riscv/Makefile
+++ b/libavutil/riscv/Makefile
@@ -6,3 +6,4 @@ OBJS +=     riscv/float_dsp_init.o \
 RVV-OBJS += riscv/float_dsp_rvv.o \
             riscv/fixed_dsp_rvv.o \
             riscv/lls_rvv.o
+RVV-OBJS-$(CONFIG_PIXELUTILS) += riscv/pixelutils_rvv.o
diff --git a/libavcodec/riscv/rv34dsp_init.c b/libavutil/riscv/pixelutils.h
similarity index 61%
copy from libavcodec/riscv/rv34dsp_init.c
copy to libavutil/riscv/pixelutils.h
index a8437f0705..a693ec8e47 100644
--- a/libavcodec/riscv/rv34dsp_init.c
+++ b/libavutil/riscv/pixelutils.h
@@ -1,6 +1,4 @@
 /*
- * Copyright (c) 2024 Institute of Software Chinese Academy of Sciences 
(ISCAS).
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -18,24 +16,33 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVUTIL_RISCV_PIXELUTILS_H
+#define AVUTIL_RISCV_PIXELUTILS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
 #include "config.h"
 
+#include "cpu.h"
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
-#include "libavutil/riscv/cpu.h"
-#include "libavcodec/rv34dsp.h"
+#include "libavutil/pixelutils.h"
 
-void ff_rv34_inv_transform_dc_rvv(int16_t *block);
-void ff_rv34_idct_dc_add_rvv(uint8_t *dst, ptrdiff_t stride, int dc);
+int ff_pixelutils_sad16_rvv(const uint8_t *src1, ptrdiff_t stride1,
+                            const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad8_rvv (const uint8_t *src1, ptrdiff_t stride1,
+                            const uint8_t *src2, ptrdiff_t stride2);
 
-av_cold void ff_rv34dsp_init_riscv(RV34DSPContext *c)
+static inline av_cold void ff_pixelutils_init_riscv(av_pixelutils_sad_fn *sad, 
int aligned)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
     if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
-        c->rv34_inv_transform_dc = ff_rv34_inv_transform_dc_rvv;
-        c->rv34_idct_dc_add = ff_rv34_idct_dc_add_rvv;
+        sad[3] = ff_pixelutils_sad16_rvv;
+        sad[2] = ff_pixelutils_sad8_rvv;
     }
 #endif
 }
+#endif
diff --git a/libavutil/riscv/pixelutils_rvv.S b/libavutil/riscv/pixelutils_rvv.S
new file mode 100644
index 0000000000..a869b3dc4f
--- /dev/null
+++ b/libavutil/riscv/pixelutils_rvv.S
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2024 Institute of Software Chinese Academy of Sciences 
(ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+.macro pix_abs_ret
+        vsetivli        zero, 1, e32, m1, ta, ma
+        vmv.x.s         a0, v0
+        ret
+.endm
+
+func ff_pixelutils_sad16_rvv, zve32x
+        lpad    0
+        li              a4, 16
+        vsetivli        zero, 1, e32, m1, ta, ma
+        vmv.s.x         v0, zero
+1:
+        vsetivli        zero, 16, e8, m1, tu, ma
+        vle8.v          v4, (a0)
+        vle8.v          v12, (a2)
+        addi            a4, a4, -1
+        vwsubu.vv       v16, v4, v12
+        add             a0, a0, a1
+        vwsubu.vv       v20, v12, v4
+        vsetvli         zero, zero, e16, m2, tu, ma
+        vmax.vv         v16, v16, v20
+        add             a2, a2, a3
+        vwredsum.vs     v0, v16, v0
+        bnez            a4, 1b
+
+        pix_abs_ret
+endfunc
+
+func ff_pixelutils_sad8_rvv, zve32x
+        lpad    0
+        li              a4, 8
+        vsetivli        zero, 1, e32, m1, ta, ma
+        vmv.s.x         v0, zero
+1:
+        vsetivli        zero, 8, e8, mf2, tu, ma
+        vle8.v          v4, (a0)
+        vle8.v          v12, (a2)
+        addi            a4, a4, -1
+        vwsubu.vv       v16, v4, v12
+        add             a0, a0, a1
+        vwsubu.vv       v20, v12, v4
+        vsetvli         zero, zero, e16, m1, tu, ma
+        vmax.vv         v16, v16, v20
+        add             a2, a2, a3
+        vwredsum.vs     v0, v16, v0
+        bnez            a4, 1b
+
+        pix_abs_ret
+endfunc

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to