This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 015abfab38692e46d2733f8d4668ab447df31838 Author: Niklas Haas <[email protected]> AuthorDate: Mon Mar 9 16:18:52 2026 +0100 Commit: Niklas Haas <[email protected]> CommitDate: Sat Mar 28 18:50:14 2026 +0100 swscale/ops_dispatch: precompute relative y bump map This is more useful for tight loops inside CPU backends, which can implement this by having a shared path for incrementing to the next line (as normal), and then a separate path for adding an extra position-dependent, stride multiplied line offset after each completed line. As a free upside, this encoding does not require any separate/special handling for the exec tail. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_dispatch.c | 23 +++++++++++++++++++++-- libswscale/ops_dispatch.h | 15 +++++++++++++-- libswscale/x86/ops_common.asm | 1 + 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c index 48f907e6bb..e05a471b4e 100644 --- a/libswscale/ops_dispatch.c +++ b/libswscale/ops_dispatch.c @@ -112,6 +112,7 @@ static void op_pass_free(void *ptr) ff_sws_compiled_op_unref(&p->comp); av_refstruct_unref(&p->offsets_y); + av_free(p->exec_base.in_bump_y); av_free(p); } @@ -370,8 +371,26 @@ static int compile(SwsGraph *graph, const SwsOpList *ops, SwsPass *input, p->idx_out[i] = i < p->planes_out ? ops->order_dst.in[i] : -1; } - if (read->rw.filter == SWS_OP_FILTER_V) - p->offsets_y = av_refstruct_ref(read->rw.kernel->offsets); + if (read->rw.filter == SWS_OP_FILTER_V) { + const SwsFilterWeights *filter = read->rw.kernel; + p->offsets_y = av_refstruct_ref(filter->offsets); + + /* Compute relative pointer bumps for each output line */ + int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump)); + if (!bump) { + ret = AVERROR(ENOMEM); + goto fail; + } + + int line = filter->offsets[0]; + for (int y = 0; y < filter->dst_size - 1; y++) { + int next = filter->offsets[y + 1]; + bump[y] = next - line - 1; + line = next; + } + bump[filter->dst_size - 1] = 0; + p->exec_base.in_bump_y = bump; + } return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height, input, p->comp.slice_align, op_pass_run, diff --git a/libswscale/ops_dispatch.h b/libswscale/ops_dispatch.h index ca99457e28..bca4b886ef 100644 --- a/libswscale/ops_dispatch.h +++ b/libswscale/ops_dispatch.h @@ -46,7 +46,7 @@ typedef struct SwsOpExec { ptrdiff_t out_bump[4]; /* Extra metadata, may or may not be useful */ - int32_t width, height; /* Overall image dimensions */ + int32_t width, height; /* Overall output image dimensions */ int32_t slice_y, slice_h; /* Start and height of current slice */ int32_t block_size_in; /* Size of a block of pixels in bytes */ int32_t block_size_out; @@ -54,11 +54,22 @@ typedef struct SwsOpExec { /* Subsampling factors for each plane */ uint8_t in_sub_y[4], out_sub_y[4]; uint8_t in_sub_x[4], out_sub_x[4]; + + /** + * Line bump; determines how many additional lines to advance (after + * incrementing normally to the next line), for each filtered output line. + * + * Indexed by the line's true y coordinate. If NULL, then the bumps are + * effectively all zero. Note that these bumps still need to be + * multiplied by the corresponding line stride. + */ + int32_t *in_bump_y; } SwsOpExec; static_assert(sizeof(SwsOpExec) == 24 * sizeof(void *) + 6 * sizeof(int32_t) + - 16 * sizeof(uint8_t), + 16 * sizeof(uint8_t) + + 1 * sizeof(void *), "SwsOpExec layout mismatch"); /** diff --git a/libswscale/x86/ops_common.asm b/libswscale/x86/ops_common.asm index e19bd84a8e..c7cc460447 100644 --- a/libswscale/x86/ops_common.asm +++ b/libswscale/x86/ops_common.asm @@ -141,6 +141,7 @@ struc SwsOpExec .out_sub_y4 resb 4 .in_sub_x4 resb 4 .out_sub_x4 resb 4 + .in_bump_y resq 1 endstruc struc SwsOpImpl _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
