This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 015abfab38692e46d2733f8d4668ab447df31838
Author:     Niklas Haas <[email protected]>
AuthorDate: Mon Mar 9 16:18:52 2026 +0100
Commit:     Niklas Haas <[email protected]>
CommitDate: Sat Mar 28 18:50:14 2026 +0100

    swscale/ops_dispatch: precompute relative y bump map
    
    This is more useful for tight loops inside CPU backends, which can implement
    this by having a shared path for incrementing to the next line (as normal),
    and then a separate path for adding an extra position-dependent, stride
    multiplied line offset after each completed line.
    
    As a free upside, this encoding does not require any separate/special 
handling
    for the exec tail.
    
    Sponsored-by: Sovereign Tech Fund
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_dispatch.c     | 23 +++++++++++++++++++++--
 libswscale/ops_dispatch.h     | 15 +++++++++++++--
 libswscale/x86/ops_common.asm |  1 +
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c
index 48f907e6bb..e05a471b4e 100644
--- a/libswscale/ops_dispatch.c
+++ b/libswscale/ops_dispatch.c
@@ -112,6 +112,7 @@ static void op_pass_free(void *ptr)
 
     ff_sws_compiled_op_unref(&p->comp);
     av_refstruct_unref(&p->offsets_y);
+    av_free(p->exec_base.in_bump_y);
     av_free(p);
 }
 
@@ -370,8 +371,26 @@ static int compile(SwsGraph *graph, const SwsOpList *ops, 
SwsPass *input,
         p->idx_out[i] = i < p->planes_out ? ops->order_dst.in[i] : -1;
     }
 
-    if (read->rw.filter == SWS_OP_FILTER_V)
-        p->offsets_y = av_refstruct_ref(read->rw.kernel->offsets);
+    if (read->rw.filter == SWS_OP_FILTER_V) {
+        const SwsFilterWeights *filter = read->rw.kernel;
+        p->offsets_y = av_refstruct_ref(filter->offsets);
+
+        /* Compute relative pointer bumps for each output line */
+        int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump));
+        if (!bump) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        int line = filter->offsets[0];
+        for (int y = 0; y < filter->dst_size - 1; y++) {
+            int next = filter->offsets[y + 1];
+            bump[y] = next - line - 1;
+            line = next;
+        }
+        bump[filter->dst_size - 1] = 0;
+        p->exec_base.in_bump_y = bump;
+    }
 
     return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
                                  input, p->comp.slice_align, op_pass_run,
diff --git a/libswscale/ops_dispatch.h b/libswscale/ops_dispatch.h
index ca99457e28..bca4b886ef 100644
--- a/libswscale/ops_dispatch.h
+++ b/libswscale/ops_dispatch.h
@@ -46,7 +46,7 @@ typedef struct SwsOpExec {
     ptrdiff_t out_bump[4];
 
     /* Extra metadata, may or may not be useful */
-    int32_t width, height;      /* Overall image dimensions */
+    int32_t width, height;      /* Overall output image dimensions */
     int32_t slice_y, slice_h;   /* Start and height of current slice */
     int32_t block_size_in;      /* Size of a block of pixels in bytes */
     int32_t block_size_out;
@@ -54,11 +54,22 @@ typedef struct SwsOpExec {
     /* Subsampling factors for each plane */
     uint8_t in_sub_y[4], out_sub_y[4];
     uint8_t in_sub_x[4], out_sub_x[4];
+
+    /**
+     * Line bump; determines how many additional lines to advance (after
+     * incrementing normally to the next line), for each filtered output line.
+     *
+     * Indexed by the line's true y coordinate. If NULL, then the bumps are
+     * effectively all zero. Note that these bumps still need to be
+     * multiplied by the corresponding line stride.
+     */
+    int32_t *in_bump_y;
 } SwsOpExec;
 
 static_assert(sizeof(SwsOpExec) == 24 * sizeof(void *) +
                                    6  * sizeof(int32_t) +
-                                   16 * sizeof(uint8_t),
+                                   16 * sizeof(uint8_t) +
+                                   1  * sizeof(void *),
               "SwsOpExec layout mismatch");
 
 /**
diff --git a/libswscale/x86/ops_common.asm b/libswscale/x86/ops_common.asm
index e19bd84a8e..c7cc460447 100644
--- a/libswscale/x86/ops_common.asm
+++ b/libswscale/x86/ops_common.asm
@@ -141,6 +141,7 @@ struc SwsOpExec
     .out_sub_y4 resb 4
     .in_sub_x4 resb 4
     .out_sub_x4 resb 4
+    .in_bump_y resq 1
 endstruc
 
 struc SwsOpImpl

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to