c908:
dcmul_add_c: 88.0
dcmul_add_rvv_f64: 46.2
Did not use vlseg2e64, because it is much slower than vlse64
Did not use vsseg2e64, because it is slightly slower than vsse64
From 80b6694bc29ed1c37852dc079a6d91a24dd6f18e Mon Sep 17 00:00:00 2001
From: sunyuechi <[email protected]>
Date: Tue, 19 Dec 2023 09:11:28 +0800
Subject: [PATCH] libavfilter/af_afir: R-V V dcmul_add
c908:
dcmul_add_c: 88.0
dcmul_add_rvv_f64: 46.2
---
libavfilter/riscv/af_afir_init.c | 3 +++
libavfilter/riscv/af_afir_rvv.S | 41 ++++++++++++++++++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/libavfilter/riscv/af_afir_init.c b/libavfilter/riscv/af_afir_init.c
index 52aa18c126..f9a76f108b 100644
--- a/libavfilter/riscv/af_afir_init.c
+++ b/libavfilter/riscv/af_afir_init.c
@@ -27,6 +27,8 @@
void ff_fcmul_add_rvv(float *sum, const float *t, const float *c,
ptrdiff_t len);
+void ff_dcmul_add_rvv(double *sum, const double *t, const double *c,
+ ptrdiff_t len);
av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s)
{
@@ -36,6 +38,7 @@ av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s)
if (flags & AV_CPU_FLAG_RVV_F64) {
if (flags & AV_CPU_FLAG_RVB_ADDR) {
s->fcmul_add = ff_fcmul_add_rvv;
+ s->dcmul_add = ff_dcmul_add_rvv;
}
}
#endif
diff --git a/libavfilter/riscv/af_afir_rvv.S b/libavfilter/riscv/af_afir_rvv.S
index 04ec2e50d8..d1fa6e22e5 100644
--- a/libavfilter/riscv/af_afir_rvv.S
+++ b/libavfilter/riscv/af_afir_rvv.S
@@ -53,3 +53,44 @@ func ff_fcmul_add_rvv, zve64f
ret
endfunc
+
+func ff_dcmul_add_rvv, zve64f
+1:
+ vsetvli t0, a3, e64, m4, ta, ma
+ li t1, 16
+ li t2, 8
+ vlse64.v v0, (a1), t1
+ add a1, a1, t2
+ vlse64.v v4, (a2), t1
+ add a2, a2, t2
+ vlse64.v v12, (a0), t1
+ add a0, a0, t2
+ vfmacc.vv v12, v0, v4
+ sub a3, a3, t0
+ vlse64.v v8, (a2), t1
+ sub a2, a2, t2
+ sh3add a2, t0, a2
+ vlse64.v v16, (a0), t1
+ sub a0, a0, t2
+ vfmacc.vv v16, v0, v8
+ sh3add a2, t0, a2
+ vlse64.v v0, (a1), t1
+ sub a1, a1, t2
+ sh3add a1, t0, a1
+ vfnmsac.vv v12, v0, v8
+ sh3add a1, t0, a1
+ vfmacc.vv v16, v0, v4
+ vsse64.v v12, (a0), t1
+ add a0, a0, t2
+ vsse64.v v16, (a0), t1
+ sub a0, a0, t2
+ sh3add a0, t0, a0
+ sh3add a0, t0, a0
+ bgtz a3, 1b
+ fld fa0, 0(a1)
+ fld fa1, 0(a2)
+ fld fa2, 0(a0)
+ fmadd.d fa2, fa0, fa1, fa2
+ fsd fa2, 0(a0)
+ ret
+endfunc
--
2.43.0
_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".