This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit ef114cedef3185a1f90797793c2b0634bdce7c42 Author: Niklas Haas <[email protected]> AuthorDate: Tue Mar 17 22:14:24 2026 +0100 Commit: Niklas Haas <[email protected]> CommitDate: Wed Mar 18 09:09:44 2026 +0000 swscale/ops_chain: refactor setup() signature This is basically a cosmetic commit that groups all of the parameters to setup() into a single struct, as well as the return type. This gives the immediate benefit of freeing up 8 bytes per op table entry, though the main motivation will come in the following commits. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_backend.h | 13 ++++++------ libswscale/ops_chain.c | 51 ++++++++++++++++++++++++--------------------- libswscale/ops_chain.h | 31 ++++++++++++++++++--------- libswscale/ops_tmpl_float.c | 21 +++++++++++-------- libswscale/x86/ops.c | 48 +++++++++++++++++++++++------------------- 5 files changed, 94 insertions(+), 70 deletions(-) diff --git a/libswscale/ops_backend.h b/libswscale/ops_backend.h index b1616f6b02..6ece263178 100644 --- a/libswscale/ops_backend.h +++ b/libswscale/ops_backend.h @@ -114,14 +114,15 @@ typedef struct SwsOpIter { (iter, &impl[1], __VA_ARGS__) /* Helper macros for common op setup code */ -#define DECL_SETUP(NAME) \ - static int fn(NAME)(const SwsOp *op, SwsOpPriv *out) +#define DECL_SETUP(NAME, PARAMS, OUT) \ + static int fn(NAME)(const SwsImplParams *PARAMS, SwsImplResult *OUT) -#define SETUP_MEMDUP(c) ff_setup_memdup(&(c), sizeof(c), out) -static inline int ff_setup_memdup(const void *c, size_t size, SwsOpPriv *out) +#define SETUP_MEMDUP(c, out) ff_setup_memdup(&(c), sizeof(c), out) +static inline int ff_setup_memdup(const void *c, size_t size, SwsImplResult *out) { - out->ptr = av_memdup(c, size); - return out->ptr ? 0 : AVERROR(ENOMEM); + out->priv.ptr = av_memdup(c, size); + out->free = ff_op_priv_free; + return out->priv.ptr ? 0 : AVERROR(ENOMEM); } /* Helper macro for declaring op table entries */ diff --git a/libswscale/ops_chain.c b/libswscale/ops_chain.c index c58cb9c8ea..5a003828b7 100644 --- a/libswscale/ops_chain.c +++ b/libswscale/ops_chain.c @@ -203,7 +203,6 @@ int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, const SwsOpEntry *best = NULL; const SwsOp *op = &ops->ops[0]; int ret, best_score = 0, best_cpu_flags; - SwsOpPriv priv = {0}; for (int n = 0; n < num_tables; n++) { const SwsOpTable *table = tables[n]; @@ -225,17 +224,20 @@ int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, if (!best) return AVERROR(ENOTSUP); + SwsImplResult res = {0}; if (best->setup) { - ret = best->setup(op, &priv); + const SwsImplParams params = { .op = op }; + ret = best->setup(¶ms, &res); if (ret < 0) return ret; } chain->cpu_flags |= best_cpu_flags; - ret = ff_sws_op_chain_append(chain, best->func, best->free, &priv); + ret = ff_sws_op_chain_append(chain, res.func ? res.func : best->func, + res.free, &res.priv); if (ret < 0) { - if (best->free) - best->free(&priv); + if (res.free) + res.free(&res.priv); return ret; } @@ -246,44 +248,45 @@ int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, #define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0) -int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out) +int ff_sws_setup_u8(const SwsImplParams *params, SwsImplResult *out) { - out->u8[0] = op->c.u; + out->priv.u8[0] = params->op->c.u; return 0; } -int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out) +int ff_sws_setup_u(const SwsImplParams *params, SwsImplResult *out) { + const SwsOp *op = params->op; switch (op->type) { - case SWS_PIXEL_U8: out->u8[0] = op->c.u; return 0; - case SWS_PIXEL_U16: out->u16[0] = op->c.u; return 0; - case SWS_PIXEL_U32: out->u32[0] = op->c.u; return 0; - case SWS_PIXEL_F32: out->f32[0] = op->c.u; return 0; + case SWS_PIXEL_U8: out->priv.u8[0] = op->c.u; return 0; + case SWS_PIXEL_U16: out->priv.u16[0] = op->c.u; return 0; + case SWS_PIXEL_U32: out->priv.u32[0] = op->c.u; return 0; + case SWS_PIXEL_F32: out->priv.f32[0] = op->c.u; return 0; default: return AVERROR(EINVAL); } } -int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out) +int ff_sws_setup_q(const SwsImplParams *params, SwsImplResult *out) { + const SwsOp *op = params->op; switch (op->type) { - case SWS_PIXEL_U8: out->u8[0] = q2pixel(uint8_t, op->c.q); return 0; - case SWS_PIXEL_U16: out->u16[0] = q2pixel(uint16_t, op->c.q); return 0; - case SWS_PIXEL_U32: out->u32[0] = q2pixel(uint32_t, op->c.q); return 0; - case SWS_PIXEL_F32: out->f32[0] = q2pixel(float, op->c.q); return 0; + case SWS_PIXEL_U8: out->priv.u8[0] = q2pixel(uint8_t, op->c.q); return 0; + case SWS_PIXEL_U16: out->priv.u16[0] = q2pixel(uint16_t, op->c.q); return 0; + case SWS_PIXEL_U32: out->priv.u32[0] = q2pixel(uint32_t, op->c.q); return 0; + case SWS_PIXEL_F32: out->priv.f32[0] = q2pixel(float, op->c.q); return 0; default: return AVERROR(EINVAL); } - - return 0; } -int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out) +int ff_sws_setup_q4(const SwsImplParams *params, SwsImplResult *out) { + const SwsOp *op = params->op; for (int i = 0; i < 4; i++) { switch (op->type) { - case SWS_PIXEL_U8: out->u8[i] = q2pixel(uint8_t, op->c.q4[i]); break; - case SWS_PIXEL_U16: out->u16[i] = q2pixel(uint16_t, op->c.q4[i]); break; - case SWS_PIXEL_U32: out->u32[i] = q2pixel(uint32_t, op->c.q4[i]); break; - case SWS_PIXEL_F32: out->f32[i] = q2pixel(float, op->c.q4[i]); break; + case SWS_PIXEL_U8: out->priv.u8[i] = q2pixel(uint8_t, op->c.q4[i]); break; + case SWS_PIXEL_U16: out->priv.u16[i] = q2pixel(uint16_t, op->c.q4[i]); break; + case SWS_PIXEL_U32: out->priv.u32[i] = q2pixel(uint32_t, op->c.q4[i]); break; + case SWS_PIXEL_F32: out->priv.f32[i] = q2pixel(float, op->c.q4[i]); break; default: return AVERROR(EINVAL); } } diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h index b88d647cc9..eb8e42acb0 100644 --- a/libswscale/ops_chain.h +++ b/libswscale/ops_chain.h @@ -37,6 +37,8 @@ * that is an implementation detail of the specific backend. */ +typedef struct SwsOpTable SwsOpTable; + /** * Private data for each kernel. */ @@ -60,12 +62,6 @@ typedef union SwsOpPriv { static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch"); -/* Setup helpers */ -int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out); -int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out); -int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out); -int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out); - /** * Per-kernel execution context. * @@ -104,6 +100,16 @@ static inline void ff_sws_op_chain_free(SwsOpChain *chain) int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void (*free)(SwsOpPriv *), const SwsOpPriv *priv); +typedef struct SwsImplParams { + const SwsOp *op; +} SwsImplParams; + +typedef struct SwsImplResult { + SwsFuncPtr func; /* overrides `SwsOpEntry.func` if non-NULL */ + SwsOpPriv priv; /* private data for this implementation instance */ + void (*free)(SwsOpPriv *priv); /* free function for `priv` */ +} SwsImplResult; + typedef struct SwsOpEntry { /* Kernel metadata; reduced size subset of SwsOp */ SwsOpType op; @@ -124,20 +130,25 @@ typedef struct SwsOpEntry { /* Kernel implementation */ SwsFuncPtr func; - int (*setup)(const SwsOp *op, SwsOpPriv *out); /* optional */ - void (*free)(SwsOpPriv *priv); + int (*setup)(const SwsImplParams *params, SwsImplResult *out); /* optional */ } SwsOpEntry; +/* Setup helpers */ +int ff_sws_setup_u(const SwsImplParams *params, SwsImplResult *out); +int ff_sws_setup_u8(const SwsImplParams *params, SwsImplResult *out); +int ff_sws_setup_q(const SwsImplParams *params, SwsImplResult *out); +int ff_sws_setup_q4(const SwsImplParams *params, SwsImplResult *out); + static inline void ff_op_priv_free(SwsOpPriv *priv) { av_freep(&priv->ptr); } -typedef struct SwsOpTable { +struct SwsOpTable { unsigned cpu_flags; /* required CPU flags for this table */ int block_size; /* fixed block size of this table */ const SwsOpEntry *entries[]; /* terminated by NULL */ -} SwsOpTable; +}; /** * "Compile" a single op by looking it up in a list of fixed size op tables. diff --git a/libswscale/ops_tmpl_float.c b/libswscale/ops_tmpl_float.c index d5ac7fb75f..ea5d3cf3d0 100644 --- a/libswscale/ops_tmpl_float.c +++ b/libswscale/ops_tmpl_float.c @@ -41,23 +41,27 @@ #define FMT_CHAR f #include "ops_tmpl_common.c" -DECL_SETUP(setup_dither) +DECL_SETUP(setup_dither, params, out) { + const SwsOp *op = params->op; const int size = 1 << op->dither.size_log2; if (size == 1) { /* We special case this value */ av_assert1(!av_cmp_q(op->dither.matrix[0], av_make_q(1, 2))); - out->ptr = NULL; + out->priv.ptr = NULL; return 0; } const int width = FFMAX(size, SWS_BLOCK_SIZE); - pixel_t *matrix = out->ptr = av_malloc(sizeof(pixel_t) * size * width); + pixel_t *matrix = out->priv.ptr = av_malloc(sizeof(pixel_t) * size * width); if (!matrix) return AVERROR(ENOMEM); + out->free = ff_op_priv_free; - static_assert(sizeof(out->ptr) <= sizeof(uint8_t[8]), ">8 byte pointers not supported"); - int8_t *offset = &out->i8[8]; + static_assert(sizeof(out->priv.ptr) <= sizeof(uint8_t[8]), + ">8 byte pointers not supported"); + + int8_t *offset = &out->priv.i8[8]; for (int i = 0; i < 4; i++) offset[i] = op->dither.y_offset[i]; @@ -107,7 +111,6 @@ DECL_ENTRY(dither##N, .op = SWS_OP_DITHER, \ .dither_size = N, \ .setup = fn(setup_dither), \ - .free = ff_op_priv_free, \ ); WRAP_DITHER(0) @@ -126,8 +129,9 @@ typedef struct { pixel_t k[4]; } fn(LinCoeffs); -DECL_SETUP(setup_linear) +DECL_SETUP(setup_linear, params, out) { + const SwsOp *op = params->op; fn(LinCoeffs) c; for (int i = 0; i < 4; i++) { @@ -136,7 +140,7 @@ DECL_SETUP(setup_linear) c.k[i] = av_q2pixel(op->lin.m[i][4]); } - return SETUP_MEMDUP(c); + return SETUP_MEMDUP(c, out); } /** @@ -193,7 +197,6 @@ DECL_IMPL(linear_##NAME) DECL_ENTRY(linear_##NAME, \ .op = SWS_OP_LINEAR, \ .setup = fn(setup_linear), \ - .free = ff_op_priv_free, \ .linear_mask = (MASK), \ ); diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c index 76d374430c..bd4b030333 100644 --- a/libswscale/x86/ops.c +++ b/libswscale/x86/ops.c @@ -83,11 +83,11 @@ .pack.pattern = {X, Y, Z, W}, \ ); \ -static int setup_swap_bytes(const SwsOp *op, SwsOpPriv *out) +static int setup_swap_bytes(const SwsImplParams *params, SwsImplResult *out) { - const int mask = ff_sws_pixel_type_size(op->type) - 1; + const int mask = ff_sws_pixel_type_size(params->op->type) - 1; for (int i = 0; i < 16; i++) - out->u8[i] = (i & ~mask) | (mask - (i & mask)); + out->priv.u8[i] = (i & ~mask) | (mask - (i & mask)); return 0; } @@ -113,10 +113,11 @@ static int setup_swap_bytes(const SwsOp *op, SwsOpPriv *out) .unused[IDX] = true, \ ); -static int setup_clear(const SwsOp *op, SwsOpPriv *out) +static int setup_clear(const SwsImplParams *params, SwsImplResult *out) { + const SwsOp *op = params->op; for (int i = 0; i < 4; i++) - out->u32[i] = (uint32_t) op->c.q4[i].num; + out->priv.u32[i] = (uint32_t) op->c.q4[i].num; return 0; } @@ -146,9 +147,9 @@ static int setup_clear(const SwsOp *op, SwsOpPriv *out) .convert.expand = true, \ ); -static int setup_shift(const SwsOp *op, SwsOpPriv *out) +static int setup_shift(const SwsImplParams *params, SwsImplResult *out) { - out->u16[0] = op->c.u; + out->priv.u16[0] = params->op->c.u; return 0; } @@ -191,12 +192,13 @@ static int setup_shift(const SwsOp *op, SwsOpPriv *out) .scale = { .num = ((1 << (BITS)) - 1), .den = 1 }, \ ); -static int setup_dither(const SwsOp *op, SwsOpPriv *out) +static int setup_dither(const SwsImplParams *params, SwsImplResult *out) { + const SwsOp *op = params->op; /* 1x1 matrix / single constant */ if (!op->dither.size_log2) { const AVRational k = op->dither.matrix[0]; - out->f32[0] = (float) k.num / k.den; + out->priv.f32[0] = (float) k.num / k.den; return 0; } @@ -214,9 +216,10 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out) * typically 320 bytes for a 16x16 dither matrix. */ const int stride = size * sizeof(float); const int num_rows = size + max_offset; - float *matrix = out->ptr = av_mallocz(num_rows * stride); + float *matrix = out->priv.ptr = av_mallocz(num_rows * stride); if (!matrix) return AVERROR(ENOMEM); + out->free = ff_op_priv_free; for (int i = 0; i < size * size; i++) matrix[i] = (float) op->dither.matrix[i].num / op->dither.matrix[i].den; @@ -224,9 +227,10 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out) memcpy(&matrix[size * size], matrix, max_offset * stride); /* Store relative pointer offset to each row inside extra space */ - static_assert(sizeof(out->ptr) <= sizeof(int16_t[4]), ">8 byte pointers not supported"); + static_assert(sizeof(out->priv.ptr) <= sizeof(int16_t[4]), + ">8 byte pointers not supported"); assert(max_offset * stride <= INT16_MAX); - int16_t *off_out = &out->i16[4]; + int16_t *off_out = &out->priv.i16[4]; for (int i = 0; i < 4; i++) off_out[i] = off[i] >= 0 ? (off[i] & (size - 1)) * stride : -1; @@ -237,15 +241,17 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out) DECL_MACRO(F32, dither##SIZE##EXT, \ .op = SWS_OP_DITHER, \ .setup = setup_dither, \ - .free = (SIZE) ? ff_op_priv_free : NULL, \ .dither_size = SIZE, \ ); -static int setup_linear(const SwsOp *op, SwsOpPriv *out) +static int setup_linear(const SwsImplParams *params, SwsImplResult *out) { - float *matrix = out->ptr = av_mallocz(sizeof(float[4][5])); + const SwsOp *op = params->op; + + float *matrix = out->priv.ptr = av_mallocz(sizeof(float[4][5])); if (!matrix) return AVERROR(ENOMEM); + out->free = ff_op_priv_free; for (int y = 0; y < 4; y++) { for (int x = 0; x < 5; x++) @@ -259,7 +265,6 @@ static int setup_linear(const SwsOp *op, SwsOpPriv *out) DECL_ASM(F32, NAME##EXT, \ .op = SWS_OP_LINEAR, \ .setup = setup_linear, \ - .free = ff_op_priv_free, \ .linear_mask = (MASK), \ ); @@ -651,20 +656,21 @@ do { static void normalize_clear(SwsOp *op) { static_assert(sizeof(uint32_t) == sizeof(int), "int size mismatch"); - SwsOpPriv priv; + SwsImplResult res; union { uint32_t u32; int i; } c; - ff_sws_setup_q4(op, &priv); + ff_sws_setup_q4(&(const SwsImplParams) { .op = op }, &res); + for (int i = 0; i < 4; i++) { if (!op->c.q4[i].den) continue; switch (ff_sws_pixel_type_size(op->type)) { - case 1: c.u32 = 0x1010101U * priv.u8[i]; break; - case 2: c.u32 = (uint32_t)priv.u16[i] << 16 | priv.u16[i]; break; - case 4: c.u32 = priv.u32[i]; break; + case 1: c.u32 = 0x1010101U * res.priv.u8[i]; break; + case 2: c.u32 = (uint32_t) res.priv.u16[i] << 16 | res.priv.u16[i]; break; + case 4: c.u32 = res.priv.u32[i]; break; } op->c.q4[i].num = c.i; _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
