> vssseg2e8 > vlsseg4e8 > vwadd.wv > I can't find where VXRM is initialised for that.
Updated them and add csrwi <[email protected]> 于2024年7月15日周一 00:30写道: > From: sunyuechi <[email protected]> > > C908 X60 > vp8_loop_filter_simple_h_c : 6.2 5.7 > vp8_loop_filter_simple_h_rvv_i32 : 3.0 2.5 > vp8_loop_filter_simple_v_c : 6.5 6.2 > vp8_loop_filter_simple_v_rvv_i32 : 2.0 1.5 > --- > libavcodec/riscv/vp8dsp_init.c | 18 +++++++- > libavcodec/riscv/vp8dsp_rvv.S | 77 ++++++++++++++++++++++++++++++++++ > 2 files changed, 94 insertions(+), 1 deletion(-) > > diff --git a/libavcodec/riscv/vp8dsp_init.c > b/libavcodec/riscv/vp8dsp_init.c > index dcb6307d5b..8c5b2c8b04 100644 > --- a/libavcodec/riscv/vp8dsp_init.c > +++ b/libavcodec/riscv/vp8dsp_init.c > @@ -49,6 +49,9 @@ VP8_BILIN(16, rvv256); > VP8_BILIN(8, rvv256); > VP8_BILIN(4, rvv256); > > +VP8_LF(rvv128); > +VP8_LF(rvv256); > + > av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c) > { > #if HAVE_RV > @@ -147,9 +150,15 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c) > av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c) > { > #if HAVE_RVV > + int vlenb = ff_get_rv_vlenb(); > + > +#define init_loop_filter(vlen) \ > + c->vp8_v_loop_filter_simple = > ff_vp8_v_loop_filter16_simple_rvv##vlen; \ > + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_rvv##vlen; > + > int flags = av_get_cpu_flags(); > > - if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) { > + if (flags & AV_CPU_FLAG_RVV_I32 && vlenb >= 16) { > #if __riscv_xlen >= 64 > if (flags & AV_CPU_FLAG_RVV_I64) > c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_rvv; > @@ -159,6 +168,13 @@ av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c) > c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_rvv; > if (flags & AV_CPU_FLAG_RVV_I64) > c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_rvv; > + > + if (vlenb >= 32) { > + init_loop_filter(256); > + } else { > + init_loop_filter(128); > + } > } > +#undef init_loop_filter > #endif > } > diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S > index 0cbf1672f7..3cec4dd135 100644 > --- a/libavcodec/riscv/vp8dsp_rvv.S > +++ b/libavcodec/riscv/vp8dsp_rvv.S > @@ -275,6 +275,83 @@ func ff_vp78_idct_dc_add4uv_rvv, zve64x > ret > endfunc > > +.macro filter_fmin len, vlen, a, f1, p0f2, q0f1, p0, q0 > + vsetvlstatic16 \len, \vlen > + vsext.vf2 \q0f1, \a > + vmin.vx \p0f2, \q0f1, a6 > + vmin.vx \q0f1, \q0f1, t6 > + vadd.vi \p0f2, \p0f2, 3 > + vadd.vi \q0f1, \q0f1, 4 > + vsra.vi \p0f2, \p0f2, 3 > + vsra.vi \f1, \q0f1, 3 > + vadd.vv \p0f2, \p0f2, \p0 > + vsub.vv \q0f1, \q0, \f1 > + vmax.vx \p0f2, \p0f2, zero > + vmax.vx \q0f1, \q0f1, zero > +.endm > + > +.macro filter len, vlen, type, normal, inner, dst, stride, fE, fI, thresh > +.ifc \type,v > + sub t3, \dst, \stride // -1 > + sub t2, t3, \stride // -2 > + add t4, \dst, \stride // 1 > + vle8.v v3, (t2) // p1 > + vle8.v v4, (t3) // p0 > + vle8.v v5, (\dst) // q0 > + vle8.v v6, (t4) // q1 > +.else > + addi t2, \dst, -2 > + addi t3, \dst, -1 > + vlsseg4e8.v v3, (t2), \stride > +.endif > + vwsubu.vv v10, v3, v6 // p1-q1 > + vwsubu.vv v12, v5, v4 // q0-p0 > + > + vnclip.wi v16, v10, 0 // clip_int8(p1 - q1) > + vsetvlstatic16 \len, \vlen > + // vp8_simple_limit(dst + i, stride, flim) > + li a6, 2 > + vneg.v v22, v10 > + vneg.v v24, v12 > + vmax.vv v22, v22, v10 > + vmax.vv v24, v24, v12 > + vsrl.vi v22, v22, 1 > + vmacc.vx v22, a6, v24 > + vmsleu.vx v0, v22, \fE > + > + li a7, 3 > + li a6, 124 > + li t6, 123 > + vmul.vx v22, v12, a7 // 3 * (q0 - p0) > + vzext.vf2 v24, v4 // p0 > + vzext.vf2 v20, v5 // q0 > + vsetvlstatic8 \len, \vlen > + vwadd.wv v10, v22, v16 > + vnclip.wi v28, v10, 0 > + filter_fmin \len, \vlen, v28, v12, v26, v10, v24, v20 > + vsetvlstatic8 \len, \vlen > + vnclipu.wi v30, v26, 0 > + vnclipu.wi v31, v10, 0 > +.ifc \type,v > + vse8.v v30, (t3), v0.t > + vse8.v v31, (\dst), v0.t > +.else > + vssseg2e8.v v30, (t3), \stride, v0.t > +.endif > + > +.endm > + > +.irp type,v,h > +.irp vlen,256,128 > +func ff_vp8_\type\()_loop_filter16_simple_rvv\vlen, zve32x > + csrwi vxrm, 0 > + vsetvlstatic8 16, \vlen > + filter 16, \vlen, \type, 0, 0, a0, a1, a2, a3, a4 > + ret > +endfunc > +.endr > +.endr > + > .macro bilin_load_h dst mn > addi t5, a2, 1 > vle8.v \dst, (a2) > -- > 2.45.2 > > _______________________________________________ > ffmpeg-devel mailing list > [email protected] > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > [email protected] with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list [email protected] https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email [email protected] with subject "unsubscribe".
