ops: add helper function to split filter subpasses

Niklas Haas via ffmpeg-cvslog Sat, 28 Mar 2026 12:45:07 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit cba54e9e3b2810243cc281244305848abd99f7dd
Author:     Niklas Haas <[email protected]>
AuthorDate: Sun Mar 8 21:03:28 2026 +0100
Commit:     Niklas Haas <[email protected]>
CommitDate: Sat Mar 28 18:50:13 2026 +0100

    swscale/ops: add helper function to split filter subpasses
    
    An operation list containing multiple filter passes, or containing 
nontrivial
    operations before a filter pass, need to be split up into multiple execution
    steps with temporary buffers in between; at least for CPU backends.
    
    This helper function introduces the necessary subpass splitting logic
    
    Sponsored-by: Sovereign Tech Fund
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_internal.h  |  10 +++
 libswscale/ops_optimizer.c | 160 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+)

diff --git a/libswscale/ops_internal.h b/libswscale/ops_internal.h
index 3db850c290..91509ce67d 100644
--- a/libswscale/ops_internal.h
+++ b/libswscale/ops_internal.h
@@ -117,4 +117,14 @@ int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList 
*ops, SwsCompiledOp *out
 int ff_sws_solve_shuffle(const SwsOpList *ops, uint8_t shuffle[], int size,
                          uint8_t clear_val, int *read_bytes, int *write_bytes);
 
+/**
+ * Eliminate SWS_OP_FILTER_* operations by merging them with prior SWS_OP_READ
+ * operations. This may require splitting the op list into multiple subpasses,
+ * along filter boundaries. After this function, `ops` will no longer contain
+ * bare filtering operations. The remainder, if any, is output to `out_rest`.
+ *
+ * Returns 0 or a negative error code.
+ */
+int ff_sws_op_list_subpass(SwsOpList *ops, SwsOpList **out_rest);
+
 #endif /* SWSCALE_OPS_INTERNAL_H */
diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c
index ab1f4e6b01..5e114cd512 100644
--- a/libswscale/ops_optimizer.c
+++ b/libswscale/ops_optimizer.c
@@ -782,3 +782,163 @@ int ff_sws_solve_shuffle(const SwsOpList *const ops, 
uint8_t shuffle[],
 
     return AVERROR(EINVAL);
 }
+
+/**
+ * Determine a suitable intermediate buffer format for a given combination
+ * of pixel types and number of planes. The exact interpretation of these
+ * formats does not matter at all; since they will only ever be used as
+ * temporary intermediate buffers. We still need to pick *some* format as
+ * a consequence of ff_sws_graph_add_pass() taking an AVPixelFormat for the
+ * output buffer.
+ */
+static enum AVPixelFormat get_planar_fmt(SwsPixelType type, int nb_planes)
+{
+    switch (ff_sws_pixel_type_size(type)) {
+    case 1:
+        switch (nb_planes) {
+        case 1: return AV_PIX_FMT_GRAY8;
+        case 2: return AV_PIX_FMT_YUV444P; // FIXME: no 2-plane planar fmt
+        case 3: return AV_PIX_FMT_YUV444P;
+        case 4: return AV_PIX_FMT_YUVA444P;
+        }
+        break;
+    case 2:
+        switch (nb_planes) {
+        case 1: return AV_PIX_FMT_GRAY16;
+        case 2: return AV_PIX_FMT_YUV444P16; // FIXME: no 2-plane planar fmt
+        case 3: return AV_PIX_FMT_YUV444P16;
+        case 4: return AV_PIX_FMT_YUVA444P16;
+        }
+        break;
+    case 4:
+        switch (nb_planes) {
+        case 1: return AV_PIX_FMT_GRAYF32;
+        case 2: return AV_PIX_FMT_GBRPF32; // FIXME: no 2-plane planar fmt
+        case 3: return AV_PIX_FMT_GBRPF32;
+        case 4: return AV_PIX_FMT_GBRAPF32;
+        }
+        break;
+    }
+
+    av_unreachable("Invalid pixel type or number of planes?");
+    return AV_PIX_FMT_NONE;
+}
+
+static void get_input_size(const SwsOpList *ops, SwsFormat *fmt)
+{
+    fmt->width  = ops->src.width;
+    fmt->height = ops->src.height;
+
+    const SwsOp *read = ff_sws_op_list_input(ops);
+    if (read && read->rw.filter == SWS_OP_FILTER_V) {
+        fmt->height = read->rw.kernel->dst_size;
+    } else if (read && read->rw.filter == SWS_OP_FILTER_H) {
+        fmt->width = read->rw.kernel->dst_size;
+    }
+}
+
+int ff_sws_op_list_subpass(SwsOpList *ops1, SwsOpList **out_rest)
+{
+    const SwsOp *op;
+    int ret, idx;
+
+    for (idx = 0; idx < ops1->num_ops; idx++) {
+        op = &ops1->ops[idx];
+        if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V)
+            break;
+    }
+
+    if (idx == ops1->num_ops) {
+        *out_rest = NULL;
+        return 0;
+    }
+
+    av_assert0(idx > 0);
+    const SwsOp *prev = &ops1->ops[idx - 1];
+
+    SwsOpList *ops2 = ff_sws_op_list_duplicate(ops1);
+    if (!ops2)
+        return AVERROR(ENOMEM);
+
+    /**
+     * Not all components may be needed; but we need the ones that *are*
+     * used to be contiguous for the write/read operations. So, first
+     * compress them into a linearly ascending list of components
+     */
+    int nb_planes = 0;
+    SwsSwizzleOp swiz_wr = SWS_SWIZZLE(0, 1, 2, 3);
+    SwsSwizzleOp swiz_rd = SWS_SWIZZLE(0, 1, 2, 3);
+    for (int i = 0; i < 4; i++) {
+        if (!op->comps.unused[i]) {
+            const int o = nb_planes++;
+            swiz_wr.in[o] = i;
+            swiz_rd.in[i] = o;
+        }
+    }
+
+    /* Determine metadata for the intermediate format */
+    const SwsPixelType type = op->type;
+    ops2->order_src = SWS_SWIZZLE(0, 1, 2, 3);
+    ops2->comps_src = prev->comps;
+    ops2->src.format = get_planar_fmt(type, nb_planes);
+    ops2->src.desc = av_pix_fmt_desc_get(ops2->src.format);
+    get_input_size(ops1, &ops2->src);
+
+    ops1->order_dst = SWS_SWIZZLE(0, 1, 2, 3);
+    ops1->dst = ops2->src;
+
+    ff_sws_op_list_remove_at(ops1, idx, ops1->num_ops - idx);
+    ff_sws_op_list_remove_at(ops2, 0, idx);
+    op = NULL; /* the above command may invalidate op */
+
+    if (swiz_wr.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) {
+        ret = ff_sws_op_list_append(ops1, &(SwsOp) {
+            .op      = SWS_OP_SWIZZLE,
+            .type    = type,
+            .swizzle = swiz_wr,
+        });
+        if (ret < 0)
+            goto fail;
+    }
+
+    ret = ff_sws_op_list_append(ops1, &(SwsOp) {
+        .op       = SWS_OP_WRITE,
+        .type     = type,
+        .rw.elems = nb_planes,
+    });
+    if (ret < 0)
+        goto fail;
+
+    ret = ff_sws_op_list_insert_at(ops2, 0, &(SwsOp) {
+        .op        = SWS_OP_READ,
+        .type      = type,
+        .rw.elems  = nb_planes,
+    });
+    if (ret < 0)
+        goto fail;
+
+    if (swiz_rd.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) {
+        ret = ff_sws_op_list_insert_at(ops2, 1, &(SwsOp) {
+            .op      = SWS_OP_SWIZZLE,
+            .type    = type,
+            .swizzle = swiz_rd,
+        });
+        if (ret < 0)
+            goto fail;
+    }
+
+    ret = ff_sws_op_list_optimize(ops1);
+    if (ret < 0)
+        goto fail;
+
+    ret = ff_sws_op_list_optimize(ops2);
+    if (ret < 0)
+        goto fail;
+
+    *out_rest = ops2;
+    return 0;
+
+fail:
+    ff_sws_op_list_free(&ops2);
+    return ret;
+}

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 05/31: swscale/ops: add helper function to split filter subpasses

Reply via email to