ops: add filter kernel to SwsReadWriteOp

Niklas Haas via ffmpeg-cvslog Sat, 28 Mar 2026 11:26:48 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit bf0991029202542ca304bc992dfc61f0317c0c1b
Author:     Niklas Haas <[email protected]>
AuthorDate: Mon Mar 9 15:39:27 2026 +0100
Commit:     Niklas Haas <[email protected]>
CommitDate: Sat Mar 28 18:50:13 2026 +0100

    swscale/ops: add filter kernel to SwsReadWriteOp
    
    This allows reads to directly embed filter kernels. This is because, in
    practice, a filter needs to be combined with a read anyways. To accomplish
    this, we define filter ops as their semantic high-level operation types, and
    then have the optimizer fuse them with the corresponding read/write ops
    (where possible).
    
    Ultimately, something like this will be needed anyways for subsampled 
formats,
    and doing it here is just incredibly clean and beneficial compared to each
    of the several alternative designs I explored.
    
    Sponsored-by: Sovereign Tech Fund
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops.c           | 18 ++++++++++++++++++
 libswscale/ops.h           | 22 ++++++++++++++++------
 libswscale/ops_chain.c     |  8 ++++++--
 libswscale/ops_memcpy.c    |  4 ++--
 libswscale/ops_optimizer.c |  8 +++++---
 libswscale/vulkan/ops.c    |  4 ++--
 libswscale/x86/ops.c       |  2 +-
 7 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index 2ff33dc3b7..88bd289158 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -333,6 +333,11 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
                 op->comps.min[i]   = prev.min[i];
                 op->comps.max[i]   = prev.max[i];
             }
+
+            if (op->rw.filter) {
+                const SwsComps prev = op->comps;
+                apply_filter_weights(&op->comps, &prev, op->rw.kernel);
+            }
             break;
         case SWS_OP_SWAP_BYTES:
             for (int i = 0; i < 4; i++) {
@@ -540,6 +545,9 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
 static void op_uninit(SwsOp *op)
 {
     switch (op->op) {
+    case SWS_OP_READ:
+        av_refstruct_unref(&op->rw.kernel);
+        break;
     case SWS_OP_DITHER:
         av_refstruct_unref(&op->dither.matrix);
         break;
@@ -598,6 +606,10 @@ SwsOpList *ff_sws_op_list_duplicate(const SwsOpList *ops)
     for (int i = 0; i < copy->num_ops; i++) {
         const SwsOp *op = &copy->ops[i];
         switch (op->op) {
+        case SWS_OP_READ:
+            if (op->rw.kernel)
+                av_refstruct_ref(op->rw.kernel);
+            break;
         case SWS_OP_DITHER:
             av_refstruct_ref(op->dither.matrix);
             break;
@@ -811,6 +823,12 @@ void ff_sws_op_desc(AVBPrint *bp, const SwsOp *op, const 
bool unused[4])
         av_bprintf(bp, "%-20s: %d elem(s) %s >> %d", name,
                    op->rw.elems,  op->rw.packed ? "packed" : "planar",
                    op->rw.frac);
+        if (!op->rw.filter)
+            break;
+        const SwsFilterWeights *kernel = op->rw.kernel;
+        av_bprintf(bp, " + %d tap %s filter (%c)",
+                   kernel->filter_size, kernel->name,
+                   op->rw.filter == SWS_OP_FILTER_H ? 'H' : 'V');
         break;
     case SWS_OP_LSHIFT:
         av_bprintf(bp, "%-20s: << %u", name, op->c.u);
diff --git a/libswscale/ops.h b/libswscale/ops.h
index 651f1e8ecd..827ec04094 100644
--- a/libswscale/ops.h
+++ b/libswscale/ops.h
@@ -105,17 +105,27 @@ typedef struct SwsComps {
 } SwsComps;
 
 typedef struct SwsReadWriteOp {
+    /**
+     * Examples:
+     *   rgba      = 4x u8 packed
+     *   yuv444p   = 3x u8
+     *   rgb565    = 1x u16   <- use SWS_OP_UNPACK to unpack
+     *   monow     = 1x u8 (frac 3)
+     *   rgb4      = 1x u8 (frac 1)
+     */
     uint8_t elems; /* number of elements (of type `op.type`) to read/write */
     uint8_t frac;  /* fractional pixel step factor (log2) */
     bool packed;   /* read multiple elements from a single plane */
 
-    /** Examples:
-     *    rgba      = 4x u8 packed
-     *    yuv444p   = 3x u8
-     *    rgb565    = 1x u16   <- use SWS_OP_UNPACK to unpack
-     *    monow     = 1x u8 (frac 3)
-     *    rgb4      = 1x u8 (frac 1)
+    /**
+     * Filter kernel to apply to each plane while sampling. Currently, only
+     * one shared filter kernel is supported for all planes. (Optional)
+     *
+     * Note: As with SWS_OP_FILTER_*, if a filter kernel is in use, the read
+     * operation will always output floating point values.
      */
+    SwsOpType filter;         /* some value of SWS_OP_FILTER_* */
+    SwsFilterWeights *kernel; /* (refstruct) */
 } SwsReadWriteOp;
 
 typedef struct SwsPackOp {
diff --git a/libswscale/ops_chain.c b/libswscale/ops_chain.c
index 598e51a7db..dedc5f3026 100644
--- a/libswscale/ops_chain.c
+++ b/libswscale/ops_chain.c
@@ -72,8 +72,8 @@ int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func,
  * `op->linear.mask`, but may not contain any columns explicitly ignored by
  * `op->comps.unused`.
  *
- * For SWS_OP_READ, SWS_OP_WRITE, SWS_OP_SWAP_BYTES and SWS_OP_SWIZZLE, the
- * exact type is not checked, just the size.
+ * For unfiltered SWS_OP_READ/SWS_OP_WRITE, SWS_OP_SWAP_BYTES and
+ * SWS_OP_SWIZZLE, the exact type is not checked, just the size.
  *
  * Components set in `next.unused` are ignored when matching. If `flexible`
  * is true, the op body is ignored - only the operation, pixel type, and
@@ -88,6 +88,9 @@ static int op_match(const SwsOp *op, const SwsOpEntry *entry, 
const SwsComps nex
     switch (op->op) {
     case SWS_OP_READ:
     case SWS_OP_WRITE:
+        if (op->rw.filter && op->type != entry->type)
+            return 0;
+        /* fall through */;
     case SWS_OP_SWAP_BYTES:
     case SWS_OP_SWIZZLE:
         /* Only the size matters for these operations */
@@ -129,6 +132,7 @@ static int op_match(const SwsOp *op, const SwsOpEntry 
*entry, const SwsComps nex
     case SWS_OP_WRITE:
         if (op->rw.elems   != entry->rw.elems ||
             op->rw.frac    != entry->rw.frac  ||
+            op->rw.filter  != entry->rw.filter ||
             (op->rw.elems > 1 && op->rw.packed != entry->rw.packed))
             return 0;
         return score;
diff --git a/libswscale/ops_memcpy.c b/libswscale/ops_memcpy.c
index fd8b79ec34..a84067da6d 100644
--- a/libswscale/ops_memcpy.c
+++ b/libswscale/ops_memcpy.c
@@ -64,7 +64,7 @@ static int compile(SwsContext *ctx, SwsOpList *ops, 
SwsCompiledOp *out)
         const SwsOp *op = &ops->ops[n];
         switch (op->op) {
         case SWS_OP_READ:
-            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac)
+            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac || 
op->rw.filter)
                 return AVERROR(ENOTSUP);
             for (int i = 0; i < op->rw.elems; i++)
                 p.index[i] = i;
@@ -107,7 +107,7 @@ static int compile(SwsContext *ctx, SwsOpList *ops, 
SwsCompiledOp *out)
             break;
 
         case SWS_OP_WRITE:
-            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac)
+            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac || 
op->rw.filter)
                 return AVERROR(ENOTSUP);
             p.num_planes = op->rw.elems;
             break;
diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c
index 3f8f5b1319..ab1f4e6b01 100644
--- a/libswscale/ops_optimizer.c
+++ b/libswscale/ops_optimizer.c
@@ -331,7 +331,7 @@ retry:
                     op->rw.elems = nb_planes;
                     RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
                         .op = SWS_OP_SWIZZLE,
-                        .type = op->type,
+                        .type = op->rw.filter ? SWS_PIXEL_F32 : op->type,
                         .swizzle = swiz,
                     }));
                     goto retry;
@@ -695,7 +695,8 @@ int ff_sws_solve_shuffle(const SwsOpList *const ops, 
uint8_t shuffle[],
         return AVERROR(EINVAL);
 
     const SwsOp *read = ff_sws_op_list_input(ops);
-    if (!read || read->rw.frac || (!read->rw.packed && read->rw.elems > 1))
+    if (!read || read->rw.frac || read->rw.filter ||
+        (!read->rw.packed && read->rw.elems > 1))
         return AVERROR(ENOTSUP);
 
     const int read_size = ff_sws_pixel_type_size(read->type);
@@ -745,7 +746,8 @@ int ff_sws_solve_shuffle(const SwsOpList *const ops, 
uint8_t shuffle[],
         }
 
         case SWS_OP_WRITE: {
-            if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
+            if (op->rw.frac || op->rw.filter ||
+                (!op->rw.packed && op->rw.elems > 1))
                 return AVERROR(ENOTSUP);
 
             /* Initialize to no-op */
diff --git a/libswscale/vulkan/ops.c b/libswscale/vulkan/ops.c
index 7a6c53a5a5..fdb057b1cf 100644
--- a/libswscale/vulkan/ops.c
+++ b/libswscale/vulkan/ops.c
@@ -241,7 +241,7 @@ static int add_ops_glsl(VulkanPriv *p, FFVulkanOpsCtx *s,
 
         switch (op->op) {
         case SWS_OP_READ: {
-            if (op->rw.frac) {
+            if (op->rw.frac || op->rw.filter) {
                 return AVERROR(ENOTSUP);
             } else if (op->rw.packed) {
                 GLSLF(1, %s = %s(imageLoad(src_img[0], pos)).%c%c%c%c;         
,
@@ -257,7 +257,7 @@ static int add_ops_glsl(VulkanPriv *p, FFVulkanOpsCtx *s,
             break;
         }
         case SWS_OP_WRITE: {
-            if (op->rw.frac) {
+            if (op->rw.frac || op->rw.filter) {
                 return AVERROR(ENOTSUP);
             } else if (op->rw.packed) {
                 GLSLF(1, imageStore(dst_img[0], pos, %s(%s).%c%c%c%c);         
,
diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c
index f4d35ec37b..ce4e635cb3 100644
--- a/libswscale/x86/ops.c
+++ b/libswscale/x86/ops.c
@@ -594,7 +594,7 @@ static bool op_is_type_invariant(const SwsOp *op)
     switch (op->op) {
     case SWS_OP_READ:
     case SWS_OP_WRITE:
-        return !(op->rw.elems > 1 && op->rw.packed) && !op->rw.frac;
+        return !(op->rw.elems > 1 && op->rw.packed) && !op->rw.frac && 
!op->rw.filter;
     case SWS_OP_SWIZZLE:
     case SWS_OP_CLEAR:
         return true;

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 04/31: swscale/ops: add filter kernel to SwsReadWriteOp

Reply via email to