ctu: Move often accessed fields to the start of structs

Andreas Rheinhardt via ffmpeg-cvslog Tue, 24 Mar 2026 10:47:00 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit a10c7317234dc436b09268ede667d48120b58f06
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Fri Mar 20 03:48:28 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Tue Mar 24 18:10:55 2026 +0100

    avcodec/vvc/ctu: Move often accessed fields to the start of structs
    
    And move the big buffers to the end. This reduces codesize
    as offset+displacement addressing modes are either unavailable
    or require more bytes of displacement is too large. E.g. this
    saves 5952B on x64 here and 3008B on AArch64. This change should
    also improve data locality.
    
    Reviewed-by: Frank Plowman <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/vvc/ctu.h | 53 ++++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
index e37bacf9dd..b6f68d8432 100644
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@@ -363,17 +363,17 @@ typedef struct VVCCabacState {
 typedef struct EntryPoint {
     int8_t qp_y;                                    ///< QpY
 
-    int stat_coeff[VVC_MAX_SAMPLE_ARRAYS];          ///< StatCoeff
-
-    Palette pp[VVC_MAX_SAMPLE_ARRAYS];              // PalettePredictor
-
-    VVCCabacState cabac_state[VVC_CONTEXTS];
-    CABACContext cc;
+    uint8_t is_first_qg;                            // first quantization group
 
     int ctu_start;
     int ctu_end;
 
-    uint8_t is_first_qg;                            // first quantization group
+    int stat_coeff[VVC_MAX_SAMPLE_ARRAYS];          ///< StatCoeff
+
+    CABACContext cc;
+    VVCCabacState cabac_state[VVC_CONTEXTS];
+
+    Palette pp[VVC_MAX_SAMPLE_ARRAYS];              // PalettePredictor
 
     MvField hmvp[MAX_NUM_HMVP_CANDS];               ///< HmvpCandList
     int     num_hmvp;                               ///< NumHmvpCand
@@ -389,17 +389,6 @@ typedef struct VVCLocalContext {
     int     end_of_tiles_x;
     int     end_of_tiles_y;
 
-    /* *2 for high bit depths */
-    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[EDGE_EMU_BUFFER_STRIDE * 
EDGE_EMU_BUFFER_STRIDE * 2];
-    DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE];
-    DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE];
-    DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE];
-    DECLARE_ALIGNED(32, uint8_t, ciip_tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2];
-    DECLARE_ALIGNED(32, uint8_t, sao_buffer)[(MAX_CTU_SIZE + 2 * 
SAO_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
-    DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * 
ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
-    DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * 
ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
-    DECLARE_ALIGNED(32, int32_t, alf_gradient_tmp)[ALF_GRADIENT_SIZE * 
ALF_GRADIENT_SIZE * ALF_NUM_DIR];
-
     struct {
         int sbt_num_fourths_tb0;                ///< SbtNumFourthsTb0
 
@@ -426,11 +415,12 @@ typedef struct VVCLocalContext {
         int y_vpdu;
     } lmcs;
 
-    CodingUnit *cu;
-    ReconstructedArea ras[2][MAX_PARTS_IN_CTU];
-    int num_ras[2];
+    SliceContext *sc;
+    VVCFrameContext *fc;
+    EntryPoint *ep;
+    int *coeffs;
 
-    NeighbourAvailable na;
+    CodingUnit *cu;
 
 #define BOUNDARY_LEFT_SLICE     (1 << 0)
 #define BOUNDARY_LEFT_TILE      (1 << 1)
@@ -442,10 +432,21 @@ typedef struct VVCLocalContext {
      * of the deblocking filter */
     int boundary_flags;
 
-    SliceContext *sc;
-    VVCFrameContext *fc;
-    EntryPoint *ep;
-    int *coeffs;
+    int num_ras[2];
+    ReconstructedArea ras[2][MAX_PARTS_IN_CTU];
+
+    NeighbourAvailable na;
+
+    /* *2 for high bit depths */
+    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[EDGE_EMU_BUFFER_STRIDE * 
EDGE_EMU_BUFFER_STRIDE * 2];
+    DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE];
+    DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE];
+    DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE];
+    DECLARE_ALIGNED(32, uint8_t, ciip_tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2];
+    DECLARE_ALIGNED(32, uint8_t, sao_buffer)[(MAX_CTU_SIZE + 2 * 
SAO_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
+    DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * 
ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
+    DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * 
ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
+    DECLARE_ALIGNED(32, int32_t, alf_gradient_tmp)[ALF_GRADIENT_SIZE * 
ALF_GRADIENT_SIZE * ALF_NUM_DIR];
 } VVCLocalContext;
 
 typedef struct VVCAllowedSplit {

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 02/03: avcodec/vvc/ctu: Move often accessed fields to the start of structs

Reply via email to