This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 92d06a8027649a2e823972115542418136a884bb Author: Andreas Rheinhardt <[email protected]> AuthorDate: Fri Mar 20 07:06:43 2026 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Tue Mar 24 18:12:00 2026 +0100 avcodec/vvc/ctu: Put scratchbufs into union to save space This reduces sizeof(VVCLocalContext) from 4580576B to 3408032B here. Reviewed-by: Frank Plowman <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/vvc/ctu.h | 35 +++++++++++++++++++++++++---------- libavcodec/vvc/filter.c | 20 ++++++-------------- libavcodec/vvc/inter.c | 34 +++++++++++++++++----------------- 3 files changed, 48 insertions(+), 41 deletions(-) diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h index b6f68d8432..59366a1756 100644 --- a/libavcodec/vvc/ctu.h +++ b/libavcodec/vvc/ctu.h @@ -87,6 +87,8 @@ #define ALF_GRADIENT_SIZE ((MAX_CU_SIZE + ALF_GRADIENT_BORDER * 2) / ALF_GRADIENT_STEP) #define ALF_NUM_DIR 4 +#define ALF_MAX_BLOCKS_IN_CTU (MAX_CTU_SIZE * MAX_CTU_SIZE / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE) +#define ALF_MAX_FILTER_SIZE (ALF_MAX_BLOCKS_IN_CTU * ALF_NUM_COEFF_LUMA) /** * Value of the luma sample at position (x, y) in the 2D array tab. @@ -437,16 +439,29 @@ typedef struct VVCLocalContext { NeighbourAvailable na; - /* *2 for high bit depths */ - DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[EDGE_EMU_BUFFER_STRIDE * EDGE_EMU_BUFFER_STRIDE * 2]; - DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE]; - DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE]; - DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE]; - DECLARE_ALIGNED(32, uint8_t, ciip_tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2]; - DECLARE_ALIGNED(32, uint8_t, sao_buffer)[(MAX_CTU_SIZE + 2 * SAO_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; - DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; - DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; - DECLARE_ALIGNED(32, int32_t, alf_gradient_tmp)[ALF_GRADIENT_SIZE * ALF_GRADIENT_SIZE * ALF_NUM_DIR]; + union { + struct { + /* *2 for high bit depths */ + DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[EDGE_EMU_BUFFER_STRIDE * EDGE_EMU_BUFFER_STRIDE * 2]; + DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE]; + DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE]; + union { + DECLARE_ALIGNED(32, int16_t, prof_tmp)[MAX_PB_SIZE * MAX_PB_SIZE]; + DECLARE_ALIGNED(32, uint8_t, ciip_tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2]; + }; + } pred; ///< only accessed in ff_vvc_predict_inter() and ff_vvc_predict_ciip() + ///< during the inter and reconstruction stages + struct { + DECLARE_ALIGNED(32, uint8_t, buffer)[(MAX_CTU_SIZE + 2 * SAO_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; + } sao; ///< only accessed in ff_vvc_sao_filter() during the sao processing stage + struct { + DECLARE_ALIGNED(32, uint8_t, buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; + DECLARE_ALIGNED(32, uint8_t, buffer_chroma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; + DECLARE_ALIGNED(32, int32_t, gradient_tmp)[ALF_GRADIENT_SIZE * ALF_GRADIENT_SIZE * ALF_NUM_DIR]; + DECLARE_ALIGNED(32, int16_t, coeff_tmp)[ALF_MAX_FILTER_SIZE]; + DECLARE_ALIGNED(32, int16_t, clip_tmp)[ALF_MAX_FILTER_SIZE]; + } alf; ///< only accessed in ff_vvc_alf_filter() during the alf processing stage + }; } VVCLocalContext; typedef struct VVCAllowedSplit { diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c index b99901e219..e3131cf808 100644 --- a/libavcodec/vvc/filter.c +++ b/libavcodec/vvc/filter.c @@ -20,8 +20,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <assert.h> - #include "libavutil/frame.h" #include "libavutil/imgutils.h" @@ -355,7 +353,7 @@ void ff_vvc_sao_filter(VVCLocalContext *lc, int x0, int y0) case SAO_EDGE: { const ptrdiff_t dst_stride = 2 * MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE; - uint8_t *dst = lc->sao_buffer + dst_stride + AV_INPUT_BUFFER_PADDING_SIZE; + uint8_t *dst = lc->sao.buffer + dst_stride + AV_INPUT_BUFFER_PADDING_SIZE; sao_extends_edges(dst, dst_stride, src, src_stride, width, height, fc, x0, y0, rx, ry, edges, c_idx); @@ -990,9 +988,6 @@ static void alf_prepare_buffer(VVCFrameContext *fc, uint8_t *_dst, const uint8_t alf_fill_border_v(dst, dst_stride, src, dst - (1 << ps), border_pixels, height, ps, edges, edges[RIGHT]); } -#define ALF_MAX_BLOCKS_IN_CTU (MAX_CTU_SIZE * MAX_CTU_SIZE / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE) -#define ALF_MAX_FILTER_SIZE (ALF_MAX_BLOCKS_IN_CTU * ALF_NUM_COEFF_LUMA) - static void alf_get_coeff_and_clip(VVCLocalContext *lc, int16_t *coeff, int16_t *clip, const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, const ALFParams *alf) { @@ -1018,7 +1013,7 @@ static void alf_get_coeff_and_clip(VVCLocalContext *lc, int16_t *coeff, int16_t class_to_filt = ff_vvc_alf_aps_class_to_filt_map; } fc->vvcdsp.alf.classify(class_idx, transpose_idx, src, src_stride, width, height, - vb_pos, lc->alf_gradient_tmp); + vb_pos, lc->alf.gradient_tmp); fc->vvcdsp.alf.recon_coeff_and_clip(coeff, clip, class_idx, transpose_idx, size, coeff_set, clip_idx_set, class_to_filt); } @@ -1029,11 +1024,8 @@ static void alf_filter_luma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *sr { const VVCFrameContext *fc = lc->fc; int vb_pos = _vb_pos - y0; - int16_t *coeff = (int16_t*)lc->tmp; - int16_t *clip = (int16_t *)lc->tmp1; - - static_assert(ALF_MAX_FILTER_SIZE <= sizeof(lc->tmp), "VVCLocalContext.tmp too small"); - static_assert(ALF_MAX_FILTER_SIZE * sizeof(int16_t) <= sizeof(lc->tmp1), "VVCLocalContext.tmp1 too small"); + int16_t *coeff = lc->alf.coeff_tmp; + int16_t *clip = lc->alf.clip_tmp; alf_get_coeff_and_clip(lc, coeff, clip, src, src_stride, width, height, vb_pos, alf); fc->vvcdsp.alf.filter[LUMA](dst, dst_stride, src, src_stride, width, height, coeff, clip, vb_pos); @@ -1217,7 +1209,7 @@ void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, const int y0) uint8_t *padded; if (alf->ctb_flag[c_idx] || (!c_idx && has_chroma && (alf->ctb_cc_idc[0] || alf->ctb_cc_idc[1]))) { - padded = (c_idx ? lc->alf_buffer_chroma : lc->alf_buffer_luma) + padded_offset; + padded = (c_idx ? lc->alf.buffer_chroma : lc->alf.buffer_luma) + padded_offset; alf_prepare_buffer(fc, padded, src, x, y, rx, ry, width, height, padded_stride, src_stride, c_idx, sb_edges[i]); } @@ -1231,7 +1223,7 @@ void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, const int y0) } } if (c_idx && alf->ctb_cc_idc[c_idx - 1]) { - padded = lc->alf_buffer_luma + padded_offset; + padded = lc->alf.buffer_luma + padded_offset; alf_filter_cc(lc, src, padded, src_stride, padded_stride, c_idx, width, height, hs, vs, ctu_end - sb->t - ALF_VB_POS_ABOVE_LUMA, alf); } diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c index 7d6e79e49b..fe7ce722e5 100644 --- a/libavcodec/vvc/inter.c +++ b/libavcodec/vvc/inter.c @@ -273,7 +273,7 @@ static void mc(VVCLocalContext *lc, int16_t *dst, const VVCFrame *ref, const Mv x_off += mv->x >> (4 + hs); y_off += mv->y >> (4 + vs); - MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off); + MC_EMULATED_EDGE(lc->pred.edge_emu_buffer, &src, &src_stride, x_off, y_off); fc->vvcdsp.inter.put[is_chroma][idx][!!my][!!mx](dst, src, src_stride, block_h, hf, vf, block_w); } @@ -302,7 +302,7 @@ static void mc_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride x_off += mv->x >> (4 + hs); y_off += mv->y >> (4 + vs); - MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off); + MC_EMULATED_EDGE(lc->pred.edge_emu_buffer, &src, &src_stride, x_off, y_off); if (derive_weight_uni(&denom, &wx, &ox, lc, mvf, c_idx)) { fc->vvcdsp.inter.put_uni_w[is_chroma][idx][!!my][!!mx](dst, dst_stride, src, src_stride, block_h, denom, wx, ox, hf, vf, block_w); @@ -323,7 +323,7 @@ static void mc_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride, const int vs = fc->ps.sps->vshift[c_idx]; const int idx = av_log2(block_w) - 1; const VVCFrame *refs[] = { ref0, ref1 }; - int16_t *tmp[] = { lc->tmp + sb_bdof_flag * PROF_TEMP_OFFSET, lc->tmp1 + sb_bdof_flag * PROF_TEMP_OFFSET }; + int16_t *tmp[] = { lc->pred.tmp + sb_bdof_flag * PROF_TEMP_OFFSET, lc->pred.tmp1 + sb_bdof_flag * PROF_TEMP_OFFSET }; const int is_chroma = !!c_idx; const int hpel_if_idx = is_chroma ? 0 : pu->mi.hpel_if_idx; @@ -344,9 +344,9 @@ static void mc_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride, const int x_sb = x_off + (orig_mv->mv[i].x >> (4 + hs)); const int y_sb = y_off + (orig_mv->mv[i].y >> (4 + vs)); - MC_EMULATED_EDGE_DMVR(lc->edge_emu_buffer, &src, &src_stride, x_sb, y_sb, ox, oy); + MC_EMULATED_EDGE_DMVR(lc->pred.edge_emu_buffer, &src, &src_stride, x_sb, y_sb, ox, oy); } else { - MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, ox, oy); + MC_EMULATED_EDGE(lc->pred.edge_emu_buffer, &src, &src_stride, ox, oy); } fc->vvcdsp.inter.put[is_chroma][idx][!!my][!!mx](tmp[i], src, src_stride, block_h, hf, vf, block_w); if (sb_bdof_flag) @@ -415,7 +415,7 @@ static void emulated_edge_scaled(VVCLocalContext *lc, const uint8_t **src, ptrdi const int block_h = *src_height = y_end - y_off + (y_end == y_last); const int wrap_enabled = 0; - MC_EMULATED_EDGE(lc->edge_emu_buffer, src, src_stride, x_off, y_off); + MC_EMULATED_EDGE(lc->pred.edge_emu_buffer, src, src_stride, x_off, y_off); } static void mc_scaled(VVCLocalContext *lc, int16_t *dst, const VVCRefPic *refp, const Mv *mv, @@ -470,7 +470,7 @@ static void mc_bi_scaled(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_ const int x_off, const int y_off, const int block_w, const int block_h, const int c_idx) { const VVCRefPic *refps[] = { refp0, refp1 }; - int16_t *tmp[] = { lc->tmp, lc->tmp1 }; + int16_t *tmp[] = { lc->pred.tmp, lc->pred.tmp1 }; for (int i = L0; i <= L1; i++) { const Mv *mv = mvf->mv + i; @@ -492,7 +492,7 @@ static void luma_prof_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst const VVCFrameContext *fc = lc->fc; const uint8_t *src = ref->frame->data[LUMA]; ptrdiff_t src_stride = ref->frame->linesize[LUMA]; - uint16_t *prof_tmp = lc->tmp + PROF_TEMP_OFFSET; + uint16_t *prof_tmp = lc->pred.tmp + PROF_TEMP_OFFSET; const int idx = av_log2(block_w) - 1; const int lx = mvf->pred_flag - PF_L0; const Mv *mv = mvf->mv + lx; @@ -508,7 +508,7 @@ static void luma_prof_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst x_off += mv->x >> 4; y_off += mv->y >> 4; - MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off); + MC_EMULATED_EDGE(lc->pred.edge_emu_buffer, &src, &src_stride, x_off, y_off); if (cb_prof_flag) { fc->vvcdsp.inter.put[LUMA][idx][!!my][!!mx](prof_tmp, src, src_stride, AFFINE_MIN_BLOCK_SIZE, hf, vf, AFFINE_MIN_BLOCK_SIZE); fc->vvcdsp.inter.fetch_samples(prof_tmp, src, src_stride, mx, my); @@ -535,14 +535,14 @@ static void luma_prof(VVCLocalContext *lc, int16_t *dst, const VVCFrame *ref, const int oy = y_off + (mv->y >> 4); const int idx = av_log2(block_w) - 1; const int is_chroma = 0; - uint16_t *prof_tmp = lc->tmp2 + PROF_TEMP_OFFSET; + int16_t *prof_tmp = lc->pred.prof_tmp + PROF_TEMP_OFFSET; ptrdiff_t src_stride = ref->frame->linesize[LUMA]; const uint8_t *src = ref->frame->data[LUMA]; const int8_t *hf = ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPE_AFFINE][mx]; const int8_t *vf = ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPE_AFFINE][my]; const int wrap_enabled = fc->ps.pps->r->pps_ref_wraparound_enabled_flag; - MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, ox, oy); + MC_EMULATED_EDGE(lc->pred.edge_emu_buffer, &src, &src_stride, ox, oy); if (!pu->cb_prof_flag[lx]) { fc->vvcdsp.inter.put[LUMA][idx][!!my][!!mx](dst, src, src_stride, block_h, hf, vf, block_w); } else { @@ -557,7 +557,7 @@ static void luma_prof_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_ const int block_w, const int block_h) { const VVCRefPic *refps[] = { ref0, ref1 }; - int16_t *tmp[] = { lc->tmp, lc->tmp1 }; + int16_t *tmp[] = { lc->pred.tmp, lc->pred.tmp1 }; for (int i = L0; i <= L1; i++) { const VVCRefPic *refp = refps[i]; @@ -608,7 +608,7 @@ static void pred_gpm_blk(VVCLocalContext *lc) const int c_end = fc->ps.sps->r->sps_chroma_format_idc ? 3 : 1; - int16_t *tmp[2] = {lc->tmp, lc->tmp1}; + int16_t *tmp[2] = {lc->pred.tmp, lc->pred.tmp1}; for (int c_idx = 0; c_idx < c_end; c_idx++) { const int hs = fc->ps.sps->hshift[c_idx]; @@ -691,7 +691,7 @@ static void pred_regular(VVCLocalContext *lc, const MvField *mvf, const MvField const int h = sbh >> vs; const int is_luma = !c_idx; const int do_ciip = lc->cu->ciip_flag && (is_luma || (w > 2)); - uint8_t *inter = do_ciip ? (uint8_t *)lc->ciip_tmp : dst; + uint8_t *inter = do_ciip ? lc->pred.ciip_tmp : dst; const ptrdiff_t inter_stride = do_ciip ? (MAX_PB_SIZE * sizeof(uint16_t)) : dst_stride; const int do_bdof = is_luma && sb_bdof_flag; @@ -774,7 +774,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv, const VVCFrameContext *fc = lc->fc; const int sr_range = 2; const VVCFrame *refs[] = { ref0, ref1 }; - int16_t *tmp[] = { lc->tmp, lc->tmp1 }; + int16_t *tmp[] = { lc->pred.tmp, lc->pred.tmp1 }; int sad[SAD_ARRAY_SIZE][SAD_ARRAY_SIZE]; int min_dx, min_dy, min_sad, dx, dy; @@ -794,7 +794,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv, const uint8_t *src = ref->frame->data[LUMA]; const int wrap_enabled = fc->ps.pps->r->pps_ref_wraparound_enabled_flag; - MC_EMULATED_EDGE_BILINEAR(lc->edge_emu_buffer, &src, &src_stride, ox, oy); + MC_EMULATED_EDGE_BILINEAR(lc->pred.edge_emu_buffer, &src, &src_stride, ox, oy); fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w); } @@ -808,7 +808,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv, for (dy = 0; dy < SAD_ARRAY_SIZE; dy++) { for (dx = 0; dx < SAD_ARRAY_SIZE; dx++) { if (dx != sr_range || dy != sr_range) { - sad[dy][dx] = fc->vvcdsp.inter.sad(lc->tmp, lc->tmp1, dx, dy, block_w, block_h); + sad[dy][dx] = fc->vvcdsp.inter.sad(lc->pred.tmp, lc->pred.tmp1, dx, dy, block_w, block_h); if (sad[dy][dx] < min_sad) { min_sad = sad[dy][dx]; min_dx = dx; _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
