ops_backend: add support for SWS_OP_FILTER_H

Niklas Haas via ffmpeg-cvslog Sat, 28 Mar 2026 11:28:44 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit 1a8c3d522e006f46283ae857ff769ecc11c4b55c
Author:     Niklas Haas <[email protected]>
AuthorDate: Tue Mar 10 18:59:48 2026 +0100
Commit:     Niklas Haas <[email protected]>
CommitDate: Sat Mar 28 18:50:14 2026 +0100

    swscale/ops_backend: add support for SWS_OP_FILTER_H
    
    Naive scalar loop to serve mainly as a reference for the asm backends.
    
    Sponsored-by: Sovereign Tech Fund
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_chain.h       |  5 ++++
 libswscale/ops_tmpl_common.c | 58 ++++++++++++++++++++++++++++++++++++++++++++
 libswscale/ops_tmpl_float.c  |  7 ++++++
 libswscale/ops_tmpl_int.c    |  9 +++++++
 4 files changed, 79 insertions(+)

diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h
index 648625b7fe..ffdcc577dd 100644
--- a/libswscale/ops_chain.h
+++ b/libswscale/ops_chain.h
@@ -150,6 +150,11 @@ static inline void ff_op_priv_free(SwsOpPriv *priv)
     av_freep(&priv->ptr);
 }
 
+static inline void ff_op_priv_unref(SwsOpPriv *priv)
+{
+    av_refstruct_unref(&priv->ptr);
+}
+
 struct SwsOpTable {
     unsigned cpu_flags;   /* required CPU flags for this table */
     int block_size;       /* fixed block size of this table */
diff --git a/libswscale/ops_tmpl_common.c b/libswscale/ops_tmpl_common.c
index c24aa5eb39..5a0a399f32 100644
--- a/libswscale/ops_tmpl_common.c
+++ b/libswscale/ops_tmpl_common.c
@@ -242,6 +242,59 @@ DECL_READ(filter_v, const int elems)
     CONTINUE(f32block_t, xs, ys, zs, ws);
 }
 
+DECL_SETUP(setup_filter_h, params, out)
+{
+    SwsFilterWeights *filter = params->op->rw.kernel;
+    out->priv.ptr = av_refstruct_ref(filter->weights);
+    out->priv.i32[2] = filter->filter_size;
+    out->free = ff_op_priv_unref;
+    return 0;
+}
+
+/* Fully general horizontal planar filter case */
+DECL_READ(filter_h, const int elems)
+{
+    const SwsOpExec *exec = iter->exec;
+    const int *restrict weights = impl->priv.ptr;
+    const int filter_size = impl->priv.i32[2];
+    const float scale = 1.0f / SWS_FILTER_SCALE;
+    const int xpos = iter->x;
+    weights += filter_size * iter->x;
+
+    f32block_t xs, ys, zs, ws;
+    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
+        const int offset = exec->in_offset_x[xpos + i];
+        pixel_t *start0 = bump_ptr(in0, offset);
+        pixel_t *start1 = bump_ptr(in1, offset);
+        pixel_t *start2 = bump_ptr(in2, offset);
+        pixel_t *start3 = bump_ptr(in3, offset);
+
+        inter_t sx = 0, sy = 0, sz = 0, sw = 0;
+        for (int j = 0; j < filter_size; j++) {
+            const int weight = weights[j];
+            sx += weight * start0[j];
+            if (elems > 1)
+                sy += weight * start1[j];
+            if (elems > 2)
+                sz += weight * start2[j];
+            if (elems > 3)
+                sw += weight * start3[j];
+        }
+
+        xs[i] = (float) sx * scale;
+        if (elems > 1)
+            ys[i] = (float) sy * scale;
+        if (elems > 2)
+            zs[i] = (float) sz * scale;
+        if (elems > 3)
+            ws[i] = (float) sw * scale;
+
+        weights += filter_size;
+    }
+
+    CONTINUE(f32block_t, xs, ys, zs, ws);
+}
+
 #define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX)                                  
 \
 DECL_IMPL(FUNC##ELEMS##SUFFIX)                                                 
 \
 {                                                                              
 \
@@ -260,6 +313,11 @@ WRAP_FILTER(filter, V, 2, _v)
 WRAP_FILTER(filter, V, 3, _v)
 WRAP_FILTER(filter, V, 4, _v)
 
+WRAP_FILTER(filter, H, 1, _h)
+WRAP_FILTER(filter, H, 2, _h)
+WRAP_FILTER(filter, H, 3, _h)
+WRAP_FILTER(filter, H, 4, _h)
+
 static void fn(process)(const SwsOpExec *exec, const void *priv,
                         const int bx_start, const int y_start,
                         int bx_end, int y_end)
diff --git a/libswscale/ops_tmpl_float.c b/libswscale/ops_tmpl_float.c
index cab51bb429..0d00714ff4 100644
--- a/libswscale/ops_tmpl_float.c
+++ b/libswscale/ops_tmpl_float.c
@@ -30,6 +30,7 @@
 #  define PIXEL_TYPE SWS_PIXEL_F32
 #  define PIXEL_MAX  FLT_MAX
 #  define pixel_t    float
+#  define inter_t    float
 #  define block_t    f32block_t
 #  define px         f32
 #else
@@ -260,6 +261,11 @@ static const SwsOpTable fn(op_table_float) = {
         &fn(op_filter3_v),
         &fn(op_filter4_v),
 
+        &fn(op_filter1_h),
+        &fn(op_filter2_h),
+        &fn(op_filter3_h),
+        &fn(op_filter4_h),
+
         NULL
     },
 };
@@ -267,6 +273,7 @@ static const SwsOpTable fn(op_table_float) = {
 #undef PIXEL_TYPE
 #undef PIXEL_MAX
 #undef pixel_t
+#undef inter_t
 #undef block_t
 #undef px
 
diff --git a/libswscale/ops_tmpl_int.c b/libswscale/ops_tmpl_int.c
index 87d09702d2..960d0d9527 100644
--- a/libswscale/ops_tmpl_int.c
+++ b/libswscale/ops_tmpl_int.c
@@ -32,6 +32,7 @@
 #  define PIXEL_MAX  0xFFFFFFFFu
 #  define SWAP_BYTES av_bswap32
 #  define pixel_t    uint32_t
+#  define inter_t    int64_t
 #  define block_t    u32block_t
 #  define px         u32
 #elif BIT_DEPTH == 16
@@ -39,12 +40,14 @@
 #  define PIXEL_MAX  0xFFFFu
 #  define SWAP_BYTES av_bswap16
 #  define pixel_t    uint16_t
+#  define inter_t    int64_t
 #  define block_t    u16block_t
 #  define px         u16
 #elif BIT_DEPTH == 8
 #  define PIXEL_TYPE SWS_PIXEL_U8
 #  define PIXEL_MAX  0xFFu
 #  define pixel_t    uint8_t
+#  define inter_t    int32_t
 #  define block_t    u8block_t
 #  define px         u8
 #else
@@ -496,6 +499,11 @@ static const SwsOpTable fn(op_table_int) = {
         &fn(op_filter3_v),
         &fn(op_filter4_v),
 
+        &fn(op_filter1_h),
+        &fn(op_filter2_h),
+        &fn(op_filter3_h),
+        &fn(op_filter4_h),
+
 #if BIT_DEPTH == 8
         &fn(op_read_bits1),
         &fn(op_read_nibbles1),
@@ -592,6 +600,7 @@ static const SwsOpTable fn(op_table_int) = {
 #undef PIXEL_MAX
 #undef SWAP_BYTES
 #undef pixel_t
+#undef inter_t
 #undef block_t
 #undef px
 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 18/31: swscale/ops_backend: add support for SWS_OP_FILTER_H

Reply via email to