This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 9b84b8682f2629a23ccebd68af9f82c701a308e4 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Wed Mar 4 22:16:10 2026 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Mon Mar 9 10:17:26 2026 +0100 avutil/riscv: Add rvv optimizations for pixelutils Adapted from the corresponding me_cmp code. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavutil/pixelutils.c | 4 ++ libavutil/riscv/Makefile | 1 + .../rv34dsp_init.c => libavutil/riscv/pixelutils.h | 25 +++++--- libavutil/riscv/pixelutils_rvv.S | 71 ++++++++++++++++++++++ 4 files changed, 92 insertions(+), 9 deletions(-) diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c index 95cf34282b..6658730724 100644 --- a/libavutil/pixelutils.c +++ b/libavutil/pixelutils.c @@ -30,6 +30,8 @@ #if ARCH_AARCH64 && HAVE_NEON #include "aarch64/pixelutils.h" +#elif ARCH_RISCV +#include "riscv/pixelutils.h" #elif ARCH_X86 && HAVE_X86ASM #include "x86/pixelutils.h" #endif @@ -92,6 +94,8 @@ av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligne #if ARCH_AARCH64 && HAVE_NEON ff_pixelutils_sad_init_aarch64(sad, aligned); +#elif ARCH_RISCV + ff_pixelutils_init_riscv(sad, aligned); #elif ARCH_X86 && HAVE_X86ASM ff_pixelutils_sad_init_x86(sad, aligned); #endif diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile index 5db4c432d9..82a534824a 100644 --- a/libavutil/riscv/Makefile +++ b/libavutil/riscv/Makefile @@ -6,3 +6,4 @@ OBJS += riscv/float_dsp_init.o \ RVV-OBJS += riscv/float_dsp_rvv.o \ riscv/fixed_dsp_rvv.o \ riscv/lls_rvv.o +RVV-OBJS-$(CONFIG_PIXELUTILS) += riscv/pixelutils_rvv.o diff --git a/libavcodec/riscv/rv34dsp_init.c b/libavutil/riscv/pixelutils.h similarity index 61% copy from libavcodec/riscv/rv34dsp_init.c copy to libavutil/riscv/pixelutils.h index a8437f0705..a693ec8e47 100644 --- a/libavcodec/riscv/rv34dsp_init.c +++ b/libavutil/riscv/pixelutils.h @@ -1,6 +1,4 @@ /* - * Copyright (c) 2024 Institute of Software Chinese Academy of Sciences (ISCAS). - * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -18,24 +16,33 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifndef AVUTIL_RISCV_PIXELUTILS_H +#define AVUTIL_RISCV_PIXELUTILS_H + +#include <stddef.h> +#include <stdint.h> + #include "config.h" +#include "cpu.h" #include "libavutil/attributes.h" #include "libavutil/cpu.h" -#include "libavutil/riscv/cpu.h" -#include "libavcodec/rv34dsp.h" +#include "libavutil/pixelutils.h" -void ff_rv34_inv_transform_dc_rvv(int16_t *block); -void ff_rv34_idct_dc_add_rvv(uint8_t *dst, ptrdiff_t stride, int dc); +int ff_pixelutils_sad16_rvv(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad8_rvv (const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); -av_cold void ff_rv34dsp_init_riscv(RV34DSPContext *c) +static inline av_cold void ff_pixelutils_init_riscv(av_pixelutils_sad_fn *sad, int aligned) { #if HAVE_RVV int flags = av_get_cpu_flags(); if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) { - c->rv34_inv_transform_dc = ff_rv34_inv_transform_dc_rvv; - c->rv34_idct_dc_add = ff_rv34_idct_dc_add_rvv; + sad[3] = ff_pixelutils_sad16_rvv; + sad[2] = ff_pixelutils_sad8_rvv; } #endif } +#endif diff --git a/libavutil/riscv/pixelutils_rvv.S b/libavutil/riscv/pixelutils_rvv.S new file mode 100644 index 0000000000..a869b3dc4f --- /dev/null +++ b/libavutil/riscv/pixelutils_rvv.S @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024 Institute of Software Chinese Academy of Sciences (ISCAS). + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.S" + +.macro pix_abs_ret + vsetivli zero, 1, e32, m1, ta, ma + vmv.x.s a0, v0 + ret +.endm + +func ff_pixelutils_sad16_rvv, zve32x + lpad 0 + li a4, 16 + vsetivli zero, 1, e32, m1, ta, ma + vmv.s.x v0, zero +1: + vsetivli zero, 16, e8, m1, tu, ma + vle8.v v4, (a0) + vle8.v v12, (a2) + addi a4, a4, -1 + vwsubu.vv v16, v4, v12 + add a0, a0, a1 + vwsubu.vv v20, v12, v4 + vsetvli zero, zero, e16, m2, tu, ma + vmax.vv v16, v16, v20 + add a2, a2, a3 + vwredsum.vs v0, v16, v0 + bnez a4, 1b + + pix_abs_ret +endfunc + +func ff_pixelutils_sad8_rvv, zve32x + lpad 0 + li a4, 8 + vsetivli zero, 1, e32, m1, ta, ma + vmv.s.x v0, zero +1: + vsetivli zero, 8, e8, mf2, tu, ma + vle8.v v4, (a0) + vle8.v v12, (a2) + addi a4, a4, -1 + vwsubu.vv v16, v4, v12 + add a0, a0, a1 + vwsubu.vv v20, v12, v4 + vsetvli zero, zero, e16, m1, tu, ma + vmax.vv v16, v16, v20 + add a2, a2, a3 + vwredsum.vs v0, v16, v0 + bnez a4, 1b + + pix_abs_ret +endfunc _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
