Re: [FFmpeg-devel] [PATCH 1/2] Provided support for MPEG-5 EVC (Essential Video Coding) codec

James Almer Mon, 01 Aug 2022 05:29:42 -0700

On 8/1/2022 6:28 AM, Dawid Kozinski wrote:

diff --git a/libavcodec/evc_parser.c b/libavcodec/evc_parser.c
new file mode 100644
index 0000000000..b2136a6b45
--- /dev/null
+++ b/libavcodec/evc_parser.c
@@ -0,0 +1,526 @@
+/*
+ * EVC format parser
+ *
+ * Copyright (C) 2021 Dawid Kozinski<[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/common.h"
+
+#include "parser.h"
+#include "golomb.h"
+
+// The length field that indicates the length in bytes of the following NAL 
unit is configured to be of 4 bytes
+#define EVC_NAL_UNIT_LENGTH_BYTE        (4)  /* byte */
+
+#define EVC_NAL_HEADER_SIZE             (2)  /* byte */
+#define MAX_SPS_CNT                     (16) /* defined value in EVC standard 
*/
+
+// NALU types
+// @see ISO_IEC_23094-1_2020 7.4.2.2 NAL unit header semantics
+//
+#define EVC_NUT_NONIDR                  (0)  /* Coded slice of a non-IDR 
picture */
+#define EVC_NUT_IDR                     (1)  /* Coded slice of an IDR picture 
*/
+#define EVC_NUT_SPS                     (24) /* Sequence parameter set */
+#define EVC_NUT_PPS                     (25) /* Picture paremeter set */
+#define EVC_NUT_APS                     (26) /* Adaptation parameter set */
+#define EVC_NUT_FD                      (27) /* Filler data */
+#define EVC_NUT_SEI                     (28) /* Supplemental enhancement 
information */
+
+// The sturcture reflects SPS RBSP(raw byte sequence payload) layout
+// @see ISO_IEC_23094-1 section 7.3.2.1
+//
+// The following descriptors specify the parsing process of each element
+// u(n) - unsigned integer using n bits
+// ue(v) - unsigned integer 0-th order Exp_Golomb-coded syntax element with 
the left bit first
+typedef struct EVCParserSPS {
+    int sps_seq_parameter_set_id;   // ue(v)
+    int profile_idc;                // u(8)
+    int level_idc;                  // u(8)
+    int toolset_idc_h;              // u(32)
+    int toolset_idc_l;              // u(32)
+    int chroma_format_idc;          // ue(v)
+    int pic_width_in_luma_samples;  // ue(v)
+    int pic_height_in_luma_samples; // ue(v)
+    int bit_depth_luma_minus8;      // ue(v)
+    int bit_depth_chroma_minus8;    // ue(v)
+
+    int sps_btt_flag;                           // u(1)
+    int log2_ctu_size_minus5;                   // ue(v)
+    int log2_min_cb_size_minus2;                // ue(v)
+    int log2_diff_ctu_max_14_cb_size;           // ue(v)
+    int log2_diff_ctu_max_tt_cb_size;           // ue(v)
+    int log2_diff_min_cb_min_tt_cb_size_minus2; // ue(v)
+
+    int sps_suco_flag;                       // u(1)
+    int log2_diff_ctu_size_max_suco_cb_size; // ue(v)
+    int log2_diff_max_suco_min_suco_cb_size; // ue(v)
+
+    int sps_admvp_flag;     // u(1)
+    int sps_affine_flag;    // u(1)
+    int sps_amvr_flag;      // u(1)
+    int sps_dmvr_flag;      // u(1)
+    int sps_mmvd_flag;      // u(1)
+    int sps_hmvp_flag;      // u(1)
+
+    int sps_eipd_flag;                 // u(1)
+    int sps_ibc_flag;                  // u(1)
+    int log2_max_ibc_cand_size_minus2; // ue(v)
+
+    int sps_cm_init_flag; // u(1)
+    int sps_adcc_flag;    // u(1)
+
+    int sps_iqt_flag; // u(1)
+    int sps_ats_flag; // u(1)
+
+    int sps_addb_flag;   // u(1)
+    int sps_alf_flag;    // u(1)
+    int sps_htdf_flag;   // u(1)
+    int sps_rpl_flag;    // u(1)
+    int sps_pocs_flag;   // u(1)
+    int sps_dquant_flag; // u(1)
+    int sps_dra_flag;    // u(1)
+
+    int log2_max_pic_order_cnt_lsb_minus4; // ue(v)
+    int log2_sub_gop_length;               // ue(v)
+    int log2_ref_pic_gap_length;           // ue(v)
+
+    int max_num_tid0_ref_pics; // ue(v)
+
+    int sps_max_dec_pic_buffering_minus1; // ue(v)
+    int long_term_ref_pic_flag;           // u(1)
+    int rpl1_same_as_rpl0_flag;           // u(1)
+    int num_ref_pic_list_in_sps[2];       // ue(v)
+
+    int picture_cropping_flag;      // u(1)
+    int picture_crop_left_offset;   // ue(v)
+    int picture_crop_right_offset;  // ue(v)
+    int picture_crop_top_offset;    // ue(v)
+    int picture_crop_bottom_offset; // ue(v)
+
+} EVCParserSPS;
+
+typedef struct EVCParserContext {
+    ParseContext pc;
+    EVCParserSPS sps[MAX_SPS_CNT];
+    int is_avc;
+    int nal_length_size;
+    int to_read;
+    int incomplete_nalu_prefix_read; // The flag is set to 1 when an 
incomplete NAL unit prefix has been read
+
+    int got_sps;
+    int got_pps;
+    int got_sei;
+    int got_slice;
+} EVCParserContext;
+
+static int get_nalu_type(const uint8_t *bits, int bits_size, AVCodecContext 
*avctx)
+{
+    int unit_type_plus1 = 0;
+
+    if(bits_size >= EVC_NAL_HEADER_SIZE) {
+        unsigned char *p = (unsigned char *)bits;
+        // forbidden_zero_bit
+        if ((p[0] & 0x80) != 0) {
+            av_log(avctx, AV_LOG_ERROR, "Cannot get bitstream information. 
Malformed bitstream.\n");
+            return -1;
+        }
+
+        // nal_unit_type
+        unit_type_plus1 = (p[0] >> 1) & 0x3F;
+    }
+
+    return unit_type_plus1 - 1;
+}
+
+static uint32_t read_nal_unit_length(const uint8_t *bits, int bits_size, 
AVCodecContext *avctx)
+{
+    uint32_t nalu_len = 0;
+
+    if(bits_size >= EVC_NAL_UNIT_LENGTH_BYTE) {
+
+        int t = 0;
+        unsigned char *p = (unsigned char *)bits;
+
+        for(int i = 0; i < EVC_NAL_UNIT_LENGTH_BYTE; i++)
+            t = (t << 8) | p[i];
+
+        nalu_len = t;
+        if(nalu_len == 0) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid bitstream size!\n");
+            return 0;
+        }
+    }
+
+    return nalu_len;
+}
+
+// @see ISO_IEC_23094-1 (7.3.2.1 SPS RBSP syntax)
+static EVCParserSPS *parse_sps(const uint8_t *bs, int bs_size, 
EVCParserContext *ev)
+{
+    GetBitContext gb;
+    EVCParserSPS *sps;
+    int sps_seq_parameter_set_id;
+
+    if(init_get_bits8(&gb, bs, bs_size) < 0)
+        return NULL;
+
+    sps_seq_parameter_set_id = get_ue_golomb(&gb);
+
+    if(sps_seq_parameter_set_id >= MAX_SPS_CNT)
+        return NULL;
+
+    sps = &ev->sps[sps_seq_parameter_set_id];
+    sps->sps_seq_parameter_set_id = sps_seq_parameter_set_id;
+
+    // the Baseline profile is indicated by profile_idc eqal to 0
+    // the Main profile is indicated by profile_idc eqal to 1
+    sps->profile_idc = get_bits(&gb, 8);
+
+    sps->level_idc = get_bits(&gb, 8);
+
+    skip_bits_long(&gb, 32); /* skip toolset_idc_h */
+    skip_bits_long(&gb, 32); /* skip toolset_idc_l */
+
+    // 0 - monochrome
+    // 1 - 4:2:0
+    // 2 - 4:2:2
+    // 3 - 4:4:4
+    sps->chroma_format_idc = get_ue_golomb(&gb);
+
+    sps->pic_width_in_luma_samples = get_ue_golomb(&gb);
+    sps->pic_height_in_luma_samples = get_ue_golomb(&gb);
+
+    sps->bit_depth_luma_minus8 = get_ue_golomb(&gb);
+    sps->bit_depth_chroma_minus8 = get_ue_golomb(&gb);
+
+    sps->sps_btt_flag = get_bits(&gb, 1);
+    if(sps->sps_btt_flag) {
+        sps->log2_ctu_size_minus5 = get_ue_golomb(&gb);
+        sps->log2_min_cb_size_minus2 = get_ue_golomb(&gb);
+        sps->log2_diff_ctu_max_14_cb_size = get_ue_golomb(&gb);
+        sps->log2_diff_ctu_max_tt_cb_size = get_ue_golomb(&gb);
+        sps->log2_diff_min_cb_min_tt_cb_size_minus2 = get_ue_golomb(&gb);
+    }
+
+    sps->sps_suco_flag = get_bits(&gb, 1);
+    if(sps->sps_suco_flag) {
+        sps->log2_diff_ctu_size_max_suco_cb_size = get_ue_golomb(&gb);
+        sps->log2_diff_max_suco_min_suco_cb_size = get_ue_golomb(&gb);
+    }
+
+    sps->sps_admvp_flag = get_bits(&gb, 1);
+    if(sps->sps_admvp_flag) {
+        sps->sps_affine_flag = get_bits(&gb, 1);
+        sps->sps_amvr_flag = get_bits(&gb, 1);
+        sps->sps_dmvr_flag = get_bits(&gb, 1);
+        sps->sps_mmvd_flag = get_bits(&gb, 1);
+        sps->sps_hmvp_flag = get_bits(&gb, 1);
+    }
+
+    sps->sps_eipd_flag =  get_bits(&gb, 1);
+    if(sps->sps_eipd_flag) {
+        sps->sps_ibc_flag = get_bits(&gb, 1);
+        if(sps->sps_ibc_flag)
+            sps->log2_max_ibc_cand_size_minus2 = get_ue_golomb(&gb);
+    }
+
+    sps->sps_cm_init_flag = get_bits(&gb, 1);
+    if(sps->sps_cm_init_flag)
+        sps->sps_adcc_flag = get_bits(&gb, 1);
+
+    sps->sps_iqt_flag = get_bits(&gb, 1);
+    if(sps->sps_iqt_flag)
+        sps->sps_ats_flag = get_bits(&gb, 1);
+
+    sps->sps_addb_flag = get_bits(&gb, 1);
+    sps->sps_alf_flag = get_bits(&gb, 1);
+    sps->sps_htdf_flag = get_bits(&gb, 1);
+    sps->sps_rpl_flag = get_bits(&gb, 1);
+    sps->sps_pocs_flag = get_bits(&gb, 1);
+    sps->sps_dquant_flag = get_bits(&gb, 1);
+    sps->sps_dra_flag = get_bits(&gb, 1);
+
+    if(sps->sps_pocs_flag)
+        sps->log2_max_pic_order_cnt_lsb_minus4 = get_ue_golomb(&gb);
+
+    if(!sps->sps_pocs_flag || !sps->sps_rpl_flag) {
+        sps->log2_sub_gop_length = get_ue_golomb(&gb);
+        if(sps->log2_sub_gop_length == 0)
+            sps->log2_ref_pic_gap_length = get_ue_golomb(&gb);
+    }
+
+    return sps;
+}
+
+static int parse_nal_units(AVCodecParserContext *s, const uint8_t *bs,
+                           int bs_size, AVCodecContext *avctx)
+{
+    EVCParserContext *ev = s->priv_data;
+    int nalu_type, nalu_size;
+    unsigned char *bits = (unsigned char *)bs;
+    int bits_size = bs_size;
+
+    avctx->codec_id = AV_CODEC_ID_EVC;
+
+    nalu_size = read_nal_unit_length(bits, bits_size, avctx);
+    if(nalu_size == 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit size: (%d)\n", 
nalu_size);
+        return -1;
+    }
+
+    bits += EVC_NAL_UNIT_LENGTH_BYTE;
+    bits_size -= EVC_NAL_UNIT_LENGTH_BYTE;
+
+    nalu_type = get_nalu_type(bits, bits_size, avctx);
+
+    bits += EVC_NAL_HEADER_SIZE;
+    bits_size -= EVC_NAL_HEADER_SIZE;
+
+    if (nalu_type == EVC_NUT_SPS) { // NAL Unit type: SPS (Sequence Parameter 
Set)
+        EVCParserSPS *sps;
+
+        sps = parse_sps(bits, bits_size, ev);
+
+        avctx->coded_width         = sps->pic_width_in_luma_samples;
+        avctx->coded_height        = sps->pic_height_in_luma_samples;
+        avctx->width               = sps->pic_width_in_luma_samples;
+        avctx->height              = sps->pic_height_in_luma_samples;


These should be set in the AVCodecParserContext, not the AVCodecContext.

+
+        if(sps->profile_idc == 0) avctx->profile = FF_PROFILE_EVC_BASELINE;
+        else if (sps->profile_idc == 1) avctx->profile = FF_PROFILE_EVC_MAIN;


Setting these two is ok.

+        else {
+            av_log(avctx, AV_LOG_ERROR, "Not supported profile (%d)\n", 
sps->profile_idc);
+            return -1;

Do you really need to abort here? Not recognizing the profile can'tpossibly mean the bitstream can't be parsed further to fetch otherinformation.

+        }
+
+        // Currently XEVD decoder supports ony YCBCR420_10LE chroma format for 
EVC stream
+        switch(sps->chroma_format_idc) {
+        case 0: /* YCBCR400_10LE */
+            av_log(avctx, AV_LOG_ERROR, "YCBCR400_10LE: Not supported chroma 
format\n");
+            avctx->pix_fmt = AV_PIX_FMT_GRAY10LE;
+            return -1;
+        case 1: /* YCBCR420_10LE */
+            avctx->pix_fmt = AV_PIX_FMT_YUV420P10LE;
+            break;
+        case 2: /* YCBCR422_10LE */
+            av_log(avctx, AV_LOG_ERROR, "YCBCR422_10LE: Not supported chroma 
format\n");
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P10LE;
+            return -1;
+        case 3: /* YCBCR444_10LE */
+            av_log(avctx, AV_LOG_ERROR, "YCBCR444_10LE: Not supported chroma 
format\n");
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P10LE;
+            return -1;
+        default:
+            avctx->pix_fmt = AV_PIX_FMT_NONE;


Fill s->format instead of avctx->pix_fmt.

+            av_log(avctx, AV_LOG_ERROR, "Unknown supported chroma format\n");
+            return -1;
+        }
+
+        // The current implementation of parse_sps function doesn't handle VUI 
parameters parsing,
+        // so at the moment it's impossible to initialize has_b_frames and 
max_b_frames AVCodecContex fields here.
+        // Currently, initialization of has_b_frames and max_b_frames 
AVCodecContex fields have been moved to
+        // libxevd_decode function where we can use xevd_config function being 
a part of xevd library API
+        // to get the needed information.
+        // However, if it will be needed, parse_sps function should be 
extended to handle VUI parameters parsing
+        // and the following lines should be used to initialize has_b_frames 
and max_b_frames fields of the AVCodecContex.

There's no need no fill any of those field in this parser. What you canfill is avctx->color{space,_primaries,_trc,_range}, so mention that toadd VUI parsing at some point.

+        //
+        // sps->vui_parameters.num_reorder_pics
+        // if (sps->bitstream_restriction_flag && 
sps->vui_parameters.num_reorder_pics) {
+        //     avctx->has_b_frames = sps->vui_parameters.num_reorder_pics;
+        // }
+
+        ev->got_sps = 1;
+
+    } else if (nalu_type == EVC_NUT_PPS) // NAL Unit type: PPS (Video 
Parameter Set)
+        ev->got_pps = 1;
+    else if(nalu_type == EVC_NUT_SEI) // NAL unit type: SEI (Supplemental 
Enhancement Information)
+        ev->got_sei = 1;


These got_* variables seem to be write only.

+    else if (nalu_type == EVC_NUT_IDR || nalu_type == EVC_NUT_NONIDR) // NAL 
Unit type: Coded slice of a IDR or non-IDR picture

In here you should parse enough data to get the picture type (I, P, B)and set s->pict_type, and based on (i suppose) nalu_type, also sets->key_frame to 1 or 0.At the start of this function, you'd ideally set them toAV_PICTURE_TYPE_NONE and -1 respectively, as "safe" defaults.

s->picture_structure should also be always set toAV_PICTURE_STRUCTURE_FRAME, as i assume this codec does not supportinterlacing.

+        ev->got_slice++;
+    else {
+        av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit type: %d\n", nalu_type);
+        return -1;
+    }
+
+    return 0;
+}
+
+/**
+ * Find the end of the current frame in the bitstream.
+ * @return the position of the first byte of the next frame, or END_NOT_FOUND
+ */
+static int evc_find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
+                              int buf_size, AVCodecContext *avctx)
+{
+    EVCParserContext *ev = s->priv_data;
+
+    if(!ev->to_read) {
+        int nal_unit_size = 0;
+        int next = END_NOT_FOUND;
+
+        // This is the case when buffer size is not enough for buffer to store 
NAL unit length
+        if(buf_size < EVC_NAL_UNIT_LENGTH_BYTE) {
+            ev->to_read = EVC_NAL_UNIT_LENGTH_BYTE;
+            ev->nal_length_size = buf_size;
+            ev->incomplete_nalu_prefix_read  = 1;
+
+            return END_NOT_FOUND;
+        }
+
+        nal_unit_size = read_nal_unit_length(buf, buf_size, avctx);
+        ev->nal_length_size = EVC_NAL_UNIT_LENGTH_BYTE;
+
+        next = nal_unit_size + EVC_NAL_UNIT_LENGTH_BYTE;
+        ev->to_read = next;
+        if(next < buf_size)
+            return next;
+        else
+            return END_NOT_FOUND;
+    } else if(ev->to_read > buf_size)
+        return END_NOT_FOUND;
+    else {
+        if(ev->incomplete_nalu_prefix_read  == 1) {
+            EVCParserContext *ev = s->priv_data;
+            ParseContext *pc = &ev->pc;
+            uint8_t nalu_len[EVC_NAL_UNIT_LENGTH_BYTE] = {0};
+            int nal_unit_size = 0;
+
+            // 1. pc->buffer contains previously read bytes of NALU prefix
+            // 2. buf contains the rest of NAL unit prefix bytes
+            //
+            // ~~~~~~~
+            // EXAMPLE
+            // ~~~~~~~
+            //
+            // In the following example we assumed that the number of already 
read NAL Unit prefix bytes is equal 1
+            //
+            // ----------
+            // pc->buffer -> conatins already read bytes
+            // ----------
+            //              __ pc->index == 1
+            //             |
+            //             V
+            // -------------------------------------------------------
+            // |   0   |   1   |   2   |   3   |   4   | ... |   N   |
+            // -------------------------------------------------------
+            // |  0x00 |  0xXX |  0xXX |  0xXX |  0xXX | ... |  0xXX |
+            // -------------------------------------------------------
+            //
+            // ----------
+            // buf -> contains newly read bytes
+            // ----------
+            // -------------------------------------------------------
+            // |   0   |   1   |   2   |   3   |   4   | ... |   N   |
+            // -------------------------------------------------------
+            // |  0x00 |  0x00 |  0x3C |  0xXX |  0xXX | ... |  0xXX |
+            // -------------------------------------------------------
+            //
+            for(int i = 0; i < EVC_NAL_UNIT_LENGTH_BYTE; i++) {
+                if(i < pc->index)
+                    nalu_len[i] = pc->buffer[i];
+                else
+                    nalu_len[i] = buf[i - pc->index];
+            }
+
+            // ----------
+            // nalu_len
+            // ----------
+            // ---------------------------------
+            // |   0   |   1   |   2   |   3   |
+            // ---------------------------------
+            // |  0x00 |  0x00 |  0x00 |  0x3C |
+            // ---------------------------------
+            // | NALU LENGTH                   |
+            // ---------------------------------
+            // NAL Unit lenght =  60 (0x0000003C)
+
+            nal_unit_size = read_nal_unit_length(nalu_len, 
EVC_NAL_UNIT_LENGTH_BYTE, avctx);
+
+            ev->to_read = nal_unit_size + EVC_NAL_UNIT_LENGTH_BYTE - pc->index;
+
+            ev->incomplete_nalu_prefix_read = 0;
+
+            if(ev->to_read > buf_size)
+                return END_NOT_FOUND;
+            else
+                return ev->to_read;
+        }
+        return ev->to_read;
+    }
+
+    return END_NOT_FOUND;
+}
+
+static int evc_parser_init(AVCodecParserContext *s)
+{
+    EVCParserContext *ev = s->priv_data;
+
+    ev->got_sps = 0;
+    ev->got_pps = 0;
+    ev->got_sei = 0;
+    ev->got_slice = 0;
+    ev->nal_length_size = EVC_NAL_UNIT_LENGTH_BYTE;
+    ev->incomplete_nalu_prefix_read = 0;
+
+    return 0;
+}
+
+static int evc_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                     const uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size)
+{
+    int next;
+    EVCParserContext *ev = s->priv_data;
+    ParseContext *pc = &ev->pc;
+    int is_dummy_buf = !buf_size;
+    const uint8_t *dummy_buf = buf;
+
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES)
+        next = buf_size;
+    else {
+        next = evc_find_frame_end(s, buf, buf_size, avctx);
+        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+            *poutbuf      = NULL;
+            *poutbuf_size = 0;
+            ev->to_read -= buf_size;
+            return buf_size;
+        }
+    }
+
+    is_dummy_buf &= (dummy_buf == buf);
+
+    if (!is_dummy_buf)
+        parse_nal_units(s, buf, buf_size, avctx);
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    ev->to_read -= next;
+
+    return next;
+}
+
+const AVCodecParser ff_evc_parser = {
+    .codec_ids      = { AV_CODEC_ID_EVC },
+    .priv_data_size = sizeof(EVCParserContext),
+    .parser_init    = evc_parser_init,
+    .parser_parse   = evc_parse,
+    .parser_close   = ff_parse_close,
+};

_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 1/2] Provided support for MPEG-5 EVC (Essential Video Coding) codec

Reply via email to