This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit a10c7317234dc436b09268ede667d48120b58f06 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Fri Mar 20 03:48:28 2026 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Tue Mar 24 18:10:55 2026 +0100 avcodec/vvc/ctu: Move often accessed fields to the start of structs And move the big buffers to the end. This reduces codesize as offset+displacement addressing modes are either unavailable or require more bytes of displacement is too large. E.g. this saves 5952B on x64 here and 3008B on AArch64. This change should also improve data locality. Reviewed-by: Frank Plowman <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/vvc/ctu.h | 53 ++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h index e37bacf9dd..b6f68d8432 100644 --- a/libavcodec/vvc/ctu.h +++ b/libavcodec/vvc/ctu.h @@ -363,17 +363,17 @@ typedef struct VVCCabacState { typedef struct EntryPoint { int8_t qp_y; ///< QpY - int stat_coeff[VVC_MAX_SAMPLE_ARRAYS]; ///< StatCoeff - - Palette pp[VVC_MAX_SAMPLE_ARRAYS]; // PalettePredictor - - VVCCabacState cabac_state[VVC_CONTEXTS]; - CABACContext cc; + uint8_t is_first_qg; // first quantization group int ctu_start; int ctu_end; - uint8_t is_first_qg; // first quantization group + int stat_coeff[VVC_MAX_SAMPLE_ARRAYS]; ///< StatCoeff + + CABACContext cc; + VVCCabacState cabac_state[VVC_CONTEXTS]; + + Palette pp[VVC_MAX_SAMPLE_ARRAYS]; // PalettePredictor MvField hmvp[MAX_NUM_HMVP_CANDS]; ///< HmvpCandList int num_hmvp; ///< NumHmvpCand @@ -389,17 +389,6 @@ typedef struct VVCLocalContext { int end_of_tiles_x; int end_of_tiles_y; - /* *2 for high bit depths */ - DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[EDGE_EMU_BUFFER_STRIDE * EDGE_EMU_BUFFER_STRIDE * 2]; - DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE]; - DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE]; - DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE]; - DECLARE_ALIGNED(32, uint8_t, ciip_tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2]; - DECLARE_ALIGNED(32, uint8_t, sao_buffer)[(MAX_CTU_SIZE + 2 * SAO_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; - DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; - DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; - DECLARE_ALIGNED(32, int32_t, alf_gradient_tmp)[ALF_GRADIENT_SIZE * ALF_GRADIENT_SIZE * ALF_NUM_DIR]; - struct { int sbt_num_fourths_tb0; ///< SbtNumFourthsTb0 @@ -426,11 +415,12 @@ typedef struct VVCLocalContext { int y_vpdu; } lmcs; - CodingUnit *cu; - ReconstructedArea ras[2][MAX_PARTS_IN_CTU]; - int num_ras[2]; + SliceContext *sc; + VVCFrameContext *fc; + EntryPoint *ep; + int *coeffs; - NeighbourAvailable na; + CodingUnit *cu; #define BOUNDARY_LEFT_SLICE (1 << 0) #define BOUNDARY_LEFT_TILE (1 << 1) @@ -442,10 +432,21 @@ typedef struct VVCLocalContext { * of the deblocking filter */ int boundary_flags; - SliceContext *sc; - VVCFrameContext *fc; - EntryPoint *ep; - int *coeffs; + int num_ras[2]; + ReconstructedArea ras[2][MAX_PARTS_IN_CTU]; + + NeighbourAvailable na; + + /* *2 for high bit depths */ + DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[EDGE_EMU_BUFFER_STRIDE * EDGE_EMU_BUFFER_STRIDE * 2]; + DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE]; + DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE]; + DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE]; + DECLARE_ALIGNED(32, uint8_t, ciip_tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2]; + DECLARE_ALIGNED(32, uint8_t, sao_buffer)[(MAX_CTU_SIZE + 2 * SAO_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; + DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; + DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2]; + DECLARE_ALIGNED(32, int32_t, alf_gradient_tmp)[ALF_GRADIENT_SIZE * ALF_GRADIENT_SIZE * ALF_NUM_DIR]; } VVCLocalContext; typedef struct VVCAllowedSplit { _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
