This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit e3daeff9650e3fa8c72526358a730a1fa69f410a Author: Niklas Haas <[email protected]> AuthorDate: Thu Mar 26 23:53:19 2026 +0100 Commit: Niklas Haas <[email protected]> CommitDate: Sat Mar 28 18:50:14 2026 +0100 swscale/ops_dispatch: compute input x offset map for SwsOpExec This is cheap to precompute and can be used as-is for gather-style horizontal filter implementations. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_dispatch.c | 49 +++++++++++++++++++++++++++++++++++++++---- libswscale/ops_dispatch.h | 18 ++++++++++++++-- libswscale/x86/ops_common.asm | 1 + 3 files changed, 62 insertions(+), 6 deletions(-) diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c index 774a7e5641..70f6f7d24d 100644 --- a/libswscale/ops_dispatch.c +++ b/libswscale/ops_dispatch.c @@ -44,6 +44,7 @@ typedef struct SwsOpPass { int idx_in[4]; int idx_out[4]; int *offsets_y; + int filter_size; bool memcpy_first; bool memcpy_last; bool memcpy_out; @@ -117,6 +118,7 @@ static void op_pass_free(void *ptr) ff_sws_compiled_op_unref(&p->comp); av_refstruct_unref(&p->offsets_y); av_free(p->exec_base.in_bump_y); + av_free(p->exec_base.in_offset_x); av_free(p->tail_buf); av_free(p); } @@ -202,11 +204,18 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, const int safe_width = (num_blocks - 1) * block_size; const int tail_size = pass->width - safe_width; - p->tail_off_in = safe_width * p->pixel_bits_in >> 3; p->tail_off_out = safe_width * p->pixel_bits_out >> 3; - p->tail_size_in = (tail_size * p->pixel_bits_in + 7) >> 3; p->tail_size_out = (tail_size * p->pixel_bits_out + 7) >> 3; + if (exec->in_offset_x) { + p->tail_off_in = exec->in_offset_x[safe_width]; + p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in; + p->tail_size_in += (p->filter_size * p->pixel_bits_in + 7) >> 3; + } else { + p->tail_off_in = safe_width * p->pixel_bits_in >> 3; + p->tail_size_in = (tail_size * p->pixel_bits_in + 7) >> 3; + } + for (int i = 0; memcpy_in && i < p->planes_in; i++) { size_t block_size = (comp->block_size * p->pixel_bits_in + 7) >> 3; block_size += comp->over_read; @@ -225,6 +234,14 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, alloc_size += tail->out_stride[i] * out->height; } + if (memcpy_in && exec->in_offset_x) { + /* `in_offset_x` is indexed relative to the line start, not the start + * of the section being processed; so we need to over-allocate this + * array to the full width of the image, even though we will only + * partially fill in the offsets relevant to the tail region */ + alloc_size += aligned_w * sizeof(*exec->in_offset_x); + } + uint8_t *tail_buf = av_fast_realloc(p->tail_buf, &p->tail_buf_size, alloc_size); if (!tail_buf) return AVERROR(ENOMEM); @@ -240,6 +257,12 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, tail_buf += tail->out_stride[i] * out->height; } + if (memcpy_in && exec->in_offset_x) { + tail->in_offset_x = (int32_t *) tail_buf; + for (int i = safe_width; i < aligned_w; i++) + tail->in_offset_x[i] = exec->in_offset_x[i] - p->tail_off_in; + } + return 0; } @@ -308,7 +331,9 @@ static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y, tail.slice_h = h; for (int i = 0; i < p->planes_in; i++) { - exec.in[i] += p->tail_off_in; + /* Input offsets are relative to the base pointer */ + if (!exec.in_offset_x || memcpy_in) + exec.in[i] += p->tail_off_in; tail.in[i] += y * tail.in_stride[i]; } for (int i = 0; i < p->planes_out; i++) { @@ -397,8 +422,8 @@ static int compile(SwsGraph *graph, const SwsOpList *ops, SwsPass *input, p->idx_out[i] = i < p->planes_out ? ops->order_dst.in[i] : -1; } + const SwsFilterWeights *filter = read->rw.kernel; if (read->rw.filter == SWS_OP_FILTER_V) { - const SwsFilterWeights *filter = read->rw.kernel; p->offsets_y = av_refstruct_ref(filter->offsets); /* Compute relative pointer bumps for each output line */ @@ -416,6 +441,22 @@ static int compile(SwsGraph *graph, const SwsOpList *ops, SwsPass *input, } bump[filter->dst_size - 1] = 0; p->exec_base.in_bump_y = bump; + } else if (read->rw.filter == SWS_OP_FILTER_H) { + /* Compute pixel offset map for each output line */ + const int pixels = FFALIGN(filter->dst_size, p->comp.block_size); + int32_t *offset = av_malloc_array(pixels, sizeof(*offset)); + if (!offset) { + ret = AVERROR(ENOMEM); + goto fail; + } + + for (int x = 0; x < filter->dst_size; x++) + offset[x] = filter->offsets[x] * p->pixel_bits_in >> 3; + for (int x = filter->dst_size; x < pixels; x++) + offset[x] = offset[filter->dst_size - 1]; + p->exec_base.in_offset_x = offset; + p->exec_base.block_size_in = 0; /* ptr does not advance */ + p->filter_size = filter->filter_size; } return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height, diff --git a/libswscale/ops_dispatch.h b/libswscale/ops_dispatch.h index bca4b886ef..a35e9be907 100644 --- a/libswscale/ops_dispatch.h +++ b/libswscale/ops_dispatch.h @@ -41,7 +41,13 @@ typedef struct SwsOpExec { ptrdiff_t in_stride[4]; ptrdiff_t out_stride[4]; - /* Pointer bump, difference between stride and processed line size */ + /** + * Pointer bump, difference between stride and processed line size. + * + * Assumes that each read kernel increments pointers by the processed + * block size, except when using horizontal filtering, in which case + * this is always equal to the input stride. + */ ptrdiff_t in_bump[4]; ptrdiff_t out_bump[4]; @@ -64,12 +70,20 @@ typedef struct SwsOpExec { * multiplied by the corresponding line stride. */ int32_t *in_bump_y; + + /** + * Pixel offset map; for horizontal scaling, in bytes. Indexed by the x + * coordinate of the output pixel. This is always aligned up to a multiple + * of the block size, so implementations may safely over-read up to the + * next block boundary. + */ + int32_t *in_offset_x; } SwsOpExec; static_assert(sizeof(SwsOpExec) == 24 * sizeof(void *) + 6 * sizeof(int32_t) + 16 * sizeof(uint8_t) + - 1 * sizeof(void *), + 2 * sizeof(void *), "SwsOpExec layout mismatch"); /** diff --git a/libswscale/x86/ops_common.asm b/libswscale/x86/ops_common.asm index c7cc460447..8e4ea6e977 100644 --- a/libswscale/x86/ops_common.asm +++ b/libswscale/x86/ops_common.asm @@ -142,6 +142,7 @@ struc SwsOpExec .in_sub_x4 resb 4 .out_sub_x4 resb 4 .in_bump_y resq 1 + .in_offset_x resq 1 endstruc struc SwsOpImpl _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
