ops_chain: refactor setup() signature

Niklas Haas via ffmpeg-cvslog Wed, 18 Mar 2026 02:13:07 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit ef114cedef3185a1f90797793c2b0634bdce7c42
Author:     Niklas Haas <[email protected]>
AuthorDate: Tue Mar 17 22:14:24 2026 +0100
Commit:     Niklas Haas <[email protected]>
CommitDate: Wed Mar 18 09:09:44 2026 +0000

    swscale/ops_chain: refactor setup() signature
    
    This is basically a cosmetic commit that groups all of the parameters to
    setup() into a single struct, as well as the return type. This gives the
    immediate benefit of freeing up 8 bytes per op table entry, though the
    main motivation will come in the following commits.
    
    Sponsored-by: Sovereign Tech Fund
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_backend.h    | 13 ++++++------
 libswscale/ops_chain.c      | 51 ++++++++++++++++++++++++---------------------
 libswscale/ops_chain.h      | 31 ++++++++++++++++++---------
 libswscale/ops_tmpl_float.c | 21 +++++++++++--------
 libswscale/x86/ops.c        | 48 +++++++++++++++++++++++-------------------
 5 files changed, 94 insertions(+), 70 deletions(-)

diff --git a/libswscale/ops_backend.h b/libswscale/ops_backend.h
index b1616f6b02..6ece263178 100644
--- a/libswscale/ops_backend.h
+++ b/libswscale/ops_backend.h
@@ -114,14 +114,15 @@ typedef struct SwsOpIter {
         (iter, &impl[1], __VA_ARGS__)
 
 /* Helper macros for common op setup code */
-#define DECL_SETUP(NAME)                                                       
 \
-    static int fn(NAME)(const SwsOp *op, SwsOpPriv *out)
+#define DECL_SETUP(NAME, PARAMS, OUT)                                          
 \
+    static int fn(NAME)(const SwsImplParams *PARAMS, SwsImplResult *OUT)
 
-#define SETUP_MEMDUP(c) ff_setup_memdup(&(c), sizeof(c), out)
-static inline int ff_setup_memdup(const void *c, size_t size, SwsOpPriv *out)
+#define SETUP_MEMDUP(c, out) ff_setup_memdup(&(c), sizeof(c), out)
+static inline int ff_setup_memdup(const void *c, size_t size, SwsImplResult 
*out)
 {
-    out->ptr = av_memdup(c, size);
-    return out->ptr ? 0 : AVERROR(ENOMEM);
+    out->priv.ptr = av_memdup(c, size);
+    out->free = ff_op_priv_free;
+    return out->priv.ptr ? 0 : AVERROR(ENOMEM);
 }
 
 /* Helper macro for declaring op table entries */
diff --git a/libswscale/ops_chain.c b/libswscale/ops_chain.c
index c58cb9c8ea..5a003828b7 100644
--- a/libswscale/ops_chain.c
+++ b/libswscale/ops_chain.c
@@ -203,7 +203,6 @@ int ff_sws_op_compile_tables(const SwsOpTable *const 
tables[], int num_tables,
     const SwsOpEntry *best = NULL;
     const SwsOp *op = &ops->ops[0];
     int ret, best_score = 0, best_cpu_flags;
-    SwsOpPriv priv = {0};
 
     for (int n = 0; n < num_tables; n++) {
         const SwsOpTable *table = tables[n];
@@ -225,17 +224,20 @@ int ff_sws_op_compile_tables(const SwsOpTable *const 
tables[], int num_tables,
     if (!best)
         return AVERROR(ENOTSUP);
 
+    SwsImplResult res = {0};
     if (best->setup) {
-        ret = best->setup(op, &priv);
+        const SwsImplParams params = { .op = op };
+        ret = best->setup(&params, &res);
         if (ret < 0)
             return ret;
     }
 
     chain->cpu_flags |= best_cpu_flags;
-    ret = ff_sws_op_chain_append(chain, best->func, best->free, &priv);
+    ret = ff_sws_op_chain_append(chain, res.func ? res.func : best->func,
+                                 res.free, &res.priv);
     if (ret < 0) {
-        if (best->free)
-            best->free(&priv);
+        if (res.free)
+            res.free(&res.priv);
         return ret;
     }
 
@@ -246,44 +248,45 @@ int ff_sws_op_compile_tables(const SwsOpTable *const 
tables[], int num_tables,
 
 #define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0)
 
-int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out)
+int ff_sws_setup_u8(const SwsImplParams *params, SwsImplResult *out)
 {
-    out->u8[0] = op->c.u;
+    out->priv.u8[0] = params->op->c.u;
     return 0;
 }
 
-int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out)
+int ff_sws_setup_u(const SwsImplParams *params, SwsImplResult *out)
 {
+    const SwsOp *op = params->op;
     switch (op->type) {
-    case SWS_PIXEL_U8:  out->u8[0]  = op->c.u; return 0;
-    case SWS_PIXEL_U16: out->u16[0] = op->c.u; return 0;
-    case SWS_PIXEL_U32: out->u32[0] = op->c.u; return 0;
-    case SWS_PIXEL_F32: out->f32[0] = op->c.u; return 0;
+    case SWS_PIXEL_U8:  out->priv.u8[0]  = op->c.u; return 0;
+    case SWS_PIXEL_U16: out->priv.u16[0] = op->c.u; return 0;
+    case SWS_PIXEL_U32: out->priv.u32[0] = op->c.u; return 0;
+    case SWS_PIXEL_F32: out->priv.f32[0] = op->c.u; return 0;
     default: return AVERROR(EINVAL);
     }
 }
 
-int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out)
+int ff_sws_setup_q(const SwsImplParams *params, SwsImplResult *out)
 {
+    const SwsOp *op = params->op;
     switch (op->type) {
-    case SWS_PIXEL_U8:  out->u8[0]  = q2pixel(uint8_t,  op->c.q); return 0;
-    case SWS_PIXEL_U16: out->u16[0] = q2pixel(uint16_t, op->c.q); return 0;
-    case SWS_PIXEL_U32: out->u32[0] = q2pixel(uint32_t, op->c.q); return 0;
-    case SWS_PIXEL_F32: out->f32[0] = q2pixel(float,    op->c.q); return 0;
+    case SWS_PIXEL_U8:  out->priv.u8[0]  = q2pixel(uint8_t,  op->c.q); return 
0;
+    case SWS_PIXEL_U16: out->priv.u16[0] = q2pixel(uint16_t, op->c.q); return 
0;
+    case SWS_PIXEL_U32: out->priv.u32[0] = q2pixel(uint32_t, op->c.q); return 
0;
+    case SWS_PIXEL_F32: out->priv.f32[0] = q2pixel(float,    op->c.q); return 
0;
     default: return AVERROR(EINVAL);
     }
-
-    return 0;
 }
 
-int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out)
+int ff_sws_setup_q4(const SwsImplParams *params, SwsImplResult *out)
 {
+    const SwsOp *op = params->op;
     for (int i = 0; i < 4; i++) {
         switch (op->type) {
-        case SWS_PIXEL_U8:  out->u8[i]  = q2pixel(uint8_t,  op->c.q4[i]); 
break;
-        case SWS_PIXEL_U16: out->u16[i] = q2pixel(uint16_t, op->c.q4[i]); 
break;
-        case SWS_PIXEL_U32: out->u32[i] = q2pixel(uint32_t, op->c.q4[i]); 
break;
-        case SWS_PIXEL_F32: out->f32[i] = q2pixel(float,    op->c.q4[i]); 
break;
+        case SWS_PIXEL_U8:  out->priv.u8[i]  = q2pixel(uint8_t,  op->c.q4[i]); 
break;
+        case SWS_PIXEL_U16: out->priv.u16[i] = q2pixel(uint16_t, op->c.q4[i]); 
break;
+        case SWS_PIXEL_U32: out->priv.u32[i] = q2pixel(uint32_t, op->c.q4[i]); 
break;
+        case SWS_PIXEL_F32: out->priv.f32[i] = q2pixel(float,    op->c.q4[i]); 
break;
         default: return AVERROR(EINVAL);
         }
     }
diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h
index b88d647cc9..eb8e42acb0 100644
--- a/libswscale/ops_chain.h
+++ b/libswscale/ops_chain.h
@@ -37,6 +37,8 @@
  * that is an implementation detail of the specific backend.
  */
 
+typedef struct SwsOpTable SwsOpTable;
+
 /**
  * Private data for each kernel.
  */
@@ -60,12 +62,6 @@ typedef union SwsOpPriv {
 
 static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch");
 
-/* Setup helpers */
-int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out);
-int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out);
-int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out);
-int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out);
-
 /**
  * Per-kernel execution context.
  *
@@ -104,6 +100,16 @@ static inline void ff_sws_op_chain_free(SwsOpChain *chain)
 int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func,
                            void (*free)(SwsOpPriv *), const SwsOpPriv *priv);
 
+typedef struct SwsImplParams {
+    const SwsOp *op;
+} SwsImplParams;
+
+typedef struct SwsImplResult {
+    SwsFuncPtr func; /* overrides `SwsOpEntry.func` if non-NULL */
+    SwsOpPriv priv; /* private data for this implementation instance */
+    void (*free)(SwsOpPriv *priv); /* free function for `priv` */
+} SwsImplResult;
+
 typedef struct SwsOpEntry {
     /* Kernel metadata; reduced size subset of SwsOp */
     SwsOpType op;
@@ -124,20 +130,25 @@ typedef struct SwsOpEntry {
 
     /* Kernel implementation */
     SwsFuncPtr func;
-    int (*setup)(const SwsOp *op, SwsOpPriv *out); /* optional */
-    void (*free)(SwsOpPriv *priv);
+    int (*setup)(const SwsImplParams *params, SwsImplResult *out); /* optional 
*/
 } SwsOpEntry;
 
+/* Setup helpers */
+int ff_sws_setup_u(const SwsImplParams *params, SwsImplResult *out);
+int ff_sws_setup_u8(const SwsImplParams *params, SwsImplResult *out);
+int ff_sws_setup_q(const SwsImplParams *params, SwsImplResult *out);
+int ff_sws_setup_q4(const SwsImplParams *params, SwsImplResult *out);
+
 static inline void ff_op_priv_free(SwsOpPriv *priv)
 {
     av_freep(&priv->ptr);
 }
 
-typedef struct SwsOpTable {
+struct SwsOpTable {
     unsigned cpu_flags;   /* required CPU flags for this table */
     int block_size;       /* fixed block size of this table */
     const SwsOpEntry *entries[]; /* terminated by NULL */
-} SwsOpTable;
+};
 
 /**
  * "Compile" a single op by looking it up in a list of fixed size op tables.
diff --git a/libswscale/ops_tmpl_float.c b/libswscale/ops_tmpl_float.c
index d5ac7fb75f..ea5d3cf3d0 100644
--- a/libswscale/ops_tmpl_float.c
+++ b/libswscale/ops_tmpl_float.c
@@ -41,23 +41,27 @@
 #define FMT_CHAR f
 #include "ops_tmpl_common.c"
 
-DECL_SETUP(setup_dither)
+DECL_SETUP(setup_dither, params, out)
 {
+    const SwsOp *op = params->op;
     const int size = 1 << op->dither.size_log2;
     if (size == 1) {
         /* We special case this value */
         av_assert1(!av_cmp_q(op->dither.matrix[0], av_make_q(1, 2)));
-        out->ptr = NULL;
+        out->priv.ptr = NULL;
         return 0;
     }
 
     const int width = FFMAX(size, SWS_BLOCK_SIZE);
-    pixel_t *matrix = out->ptr = av_malloc(sizeof(pixel_t) * size * width);
+    pixel_t *matrix = out->priv.ptr = av_malloc(sizeof(pixel_t) * size * 
width);
     if (!matrix)
         return AVERROR(ENOMEM);
+    out->free = ff_op_priv_free;
 
-    static_assert(sizeof(out->ptr) <= sizeof(uint8_t[8]), ">8 byte pointers 
not supported");
-    int8_t *offset = &out->i8[8];
+    static_assert(sizeof(out->priv.ptr) <= sizeof(uint8_t[8]),
+                  ">8 byte pointers not supported");
+
+    int8_t *offset = &out->priv.i8[8];
     for (int i = 0; i < 4; i++)
         offset[i] = op->dither.y_offset[i];
 
@@ -107,7 +111,6 @@ DECL_ENTRY(dither##N,
     .op = SWS_OP_DITHER,                                                       
 \
     .dither_size = N,                                                          
 \
     .setup = fn(setup_dither),                                                 
 \
-    .free = ff_op_priv_free,                                                   
 \
 );
 
 WRAP_DITHER(0)
@@ -126,8 +129,9 @@ typedef struct {
     pixel_t k[4];
 } fn(LinCoeffs);
 
-DECL_SETUP(setup_linear)
+DECL_SETUP(setup_linear, params, out)
 {
+    const SwsOp *op = params->op;
     fn(LinCoeffs) c;
 
     for (int i = 0; i < 4; i++) {
@@ -136,7 +140,7 @@ DECL_SETUP(setup_linear)
         c.k[i] = av_q2pixel(op->lin.m[i][4]);
     }
 
-    return SETUP_MEMDUP(c);
+    return SETUP_MEMDUP(c, out);
 }
 
 /**
@@ -193,7 +197,6 @@ DECL_IMPL(linear_##NAME)
 DECL_ENTRY(linear_##NAME,                                                      
 \
     .op    = SWS_OP_LINEAR,                                                    
 \
     .setup = fn(setup_linear),                                                 
 \
-    .free  = ff_op_priv_free,                                                  
 \
     .linear_mask = (MASK),                                                     
 \
 );
 
diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c
index 76d374430c..bd4b030333 100644
--- a/libswscale/x86/ops.c
+++ b/libswscale/x86/ops.c
@@ -83,11 +83,11 @@
         .pack.pattern = {X, Y, Z, W},                                          
 \
     );                                                                         
 \
 
-static int setup_swap_bytes(const SwsOp *op, SwsOpPriv *out)
+static int setup_swap_bytes(const SwsImplParams *params, SwsImplResult *out)
 {
-    const int mask = ff_sws_pixel_type_size(op->type) - 1;
+    const int mask = ff_sws_pixel_type_size(params->op->type) - 1;
     for (int i = 0; i < 16; i++)
-        out->u8[i] = (i & ~mask) | (mask - (i & mask));
+        out->priv.u8[i] = (i & ~mask) | (mask - (i & mask));
     return 0;
 }
 
@@ -113,10 +113,11 @@ static int setup_swap_bytes(const SwsOp *op, SwsOpPriv 
*out)
         .unused[IDX] = true,                                                   
 \
     );
 
-static int setup_clear(const SwsOp *op, SwsOpPriv *out)
+static int setup_clear(const SwsImplParams *params, SwsImplResult *out)
 {
+    const SwsOp *op = params->op;
     for (int i = 0; i < 4; i++)
-        out->u32[i] = (uint32_t) op->c.q4[i].num;
+        out->priv.u32[i] = (uint32_t) op->c.q4[i].num;
     return 0;
 }
 
@@ -146,9 +147,9 @@ static int setup_clear(const SwsOp *op, SwsOpPriv *out)
         .convert.expand = true,                                                
 \
     );
 
-static int setup_shift(const SwsOp *op, SwsOpPriv *out)
+static int setup_shift(const SwsImplParams *params, SwsImplResult *out)
 {
-    out->u16[0] = op->c.u;
+    out->priv.u16[0] = params->op->c.u;
     return 0;
 }
 
@@ -191,12 +192,13 @@ static int setup_shift(const SwsOp *op, SwsOpPriv *out)
         .scale = { .num = ((1 << (BITS)) - 1), .den = 1 },                     
 \
     );
 
-static int setup_dither(const SwsOp *op, SwsOpPriv *out)
+static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
 {
+    const SwsOp *op = params->op;
     /* 1x1 matrix / single constant */
     if (!op->dither.size_log2) {
         const AVRational k = op->dither.matrix[0];
-        out->f32[0] = (float) k.num / k.den;
+        out->priv.f32[0] = (float) k.num / k.den;
         return 0;
     }
 
@@ -214,9 +216,10 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out)
      * typically 320 bytes for a 16x16 dither matrix. */
     const int stride = size * sizeof(float);
     const int num_rows = size + max_offset;
-    float *matrix = out->ptr = av_mallocz(num_rows * stride);
+    float *matrix = out->priv.ptr = av_mallocz(num_rows * stride);
     if (!matrix)
         return AVERROR(ENOMEM);
+    out->free = ff_op_priv_free;
 
     for (int i = 0; i < size * size; i++)
         matrix[i] = (float) op->dither.matrix[i].num / 
op->dither.matrix[i].den;
@@ -224,9 +227,10 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out)
     memcpy(&matrix[size * size], matrix, max_offset * stride);
 
     /* Store relative pointer offset to each row inside extra space */
-    static_assert(sizeof(out->ptr) <= sizeof(int16_t[4]), ">8 byte pointers 
not supported");
+    static_assert(sizeof(out->priv.ptr) <= sizeof(int16_t[4]),
+                  ">8 byte pointers not supported");
     assert(max_offset * stride <= INT16_MAX);
-    int16_t *off_out = &out->i16[4];
+    int16_t *off_out = &out->priv.i16[4];
     for (int i = 0; i < 4; i++)
         off_out[i] = off[i] >= 0 ? (off[i] & (size - 1)) * stride : -1;
 
@@ -237,15 +241,17 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out)
     DECL_MACRO(F32, dither##SIZE##EXT,                                         
 \
         .op    = SWS_OP_DITHER,                                                
 \
         .setup = setup_dither,                                                 
 \
-        .free  = (SIZE) ? ff_op_priv_free : NULL,                              
 \
         .dither_size = SIZE,                                                   
 \
     );
 
-static int setup_linear(const SwsOp *op, SwsOpPriv *out)
+static int setup_linear(const SwsImplParams *params, SwsImplResult *out)
 {
-    float *matrix = out->ptr = av_mallocz(sizeof(float[4][5]));
+    const SwsOp *op = params->op;
+
+    float *matrix = out->priv.ptr = av_mallocz(sizeof(float[4][5]));
     if (!matrix)
         return AVERROR(ENOMEM);
+    out->free = ff_op_priv_free;
 
     for (int y = 0; y < 4; y++) {
         for (int x = 0; x < 5; x++)
@@ -259,7 +265,6 @@ static int setup_linear(const SwsOp *op, SwsOpPriv *out)
     DECL_ASM(F32, NAME##EXT,                                                   
 \
         .op    = SWS_OP_LINEAR,                                                
 \
         .setup = setup_linear,                                                 
 \
-        .free  = ff_op_priv_free,                                              
 \
         .linear_mask = (MASK),                                                 
 \
     );
 
@@ -651,20 +656,21 @@ do {
 static void normalize_clear(SwsOp *op)
 {
     static_assert(sizeof(uint32_t) == sizeof(int), "int size mismatch");
-    SwsOpPriv priv;
+    SwsImplResult res;
     union {
         uint32_t u32;
         int i;
     } c;
 
-    ff_sws_setup_q4(op, &priv);
+    ff_sws_setup_q4(&(const SwsImplParams) { .op = op }, &res);
+
     for (int i = 0; i < 4; i++) {
         if (!op->c.q4[i].den)
             continue;
         switch (ff_sws_pixel_type_size(op->type)) {
-        case 1: c.u32 = 0x1010101U * priv.u8[i]; break;
-        case 2: c.u32 = (uint32_t)priv.u16[i] << 16 | priv.u16[i]; break;
-        case 4: c.u32 = priv.u32[i]; break;
+        case 1: c.u32 = 0x1010101U * res.priv.u8[i]; break;
+        case 2: c.u32 = (uint32_t) res.priv.u16[i] << 16 | res.priv.u16[i]; 
break;
+        case 4: c.u32 = res.priv.u32[i]; break;
         }
 
         op->c.q4[i].num = c.i;

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 07/12: swscale/ops_chain: refactor setup() signature

Reply via email to