On 8/1/2022 6:28 AM, Dawid Kozinski wrote:
diff --git a/libavcodec/evc_parser.c b/libavcodec/evc_parser.c
new file mode 100644
index 0000000000..b2136a6b45
--- /dev/null
+++ b/libavcodec/evc_parser.c
@@ -0,0 +1,526 @@
+/*
+ * EVC format parser
+ *
+ * Copyright (C) 2021 Dawid Kozinski<[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/common.h"
+
+#include "parser.h"
+#include "golomb.h"
+
+// The length field that indicates the length in bytes of the following NAL
unit is configured to be of 4 bytes
+#define EVC_NAL_UNIT_LENGTH_BYTE (4) /* byte */
+
+#define EVC_NAL_HEADER_SIZE (2) /* byte */
+#define MAX_SPS_CNT (16) /* defined value in EVC standard
*/
+
+// NALU types
+// @see ISO_IEC_23094-1_2020 7.4.2.2 NAL unit header semantics
+//
+#define EVC_NUT_NONIDR (0) /* Coded slice of a non-IDR
picture */
+#define EVC_NUT_IDR (1) /* Coded slice of an IDR picture
*/
+#define EVC_NUT_SPS (24) /* Sequence parameter set */
+#define EVC_NUT_PPS (25) /* Picture paremeter set */
+#define EVC_NUT_APS (26) /* Adaptation parameter set */
+#define EVC_NUT_FD (27) /* Filler data */
+#define EVC_NUT_SEI (28) /* Supplemental enhancement
information */
+
+// The sturcture reflects SPS RBSP(raw byte sequence payload) layout
+// @see ISO_IEC_23094-1 section 7.3.2.1
+//
+// The following descriptors specify the parsing process of each element
+// u(n) - unsigned integer using n bits
+// ue(v) - unsigned integer 0-th order Exp_Golomb-coded syntax element with
the left bit first
+typedef struct EVCParserSPS {
+ int sps_seq_parameter_set_id; // ue(v)
+ int profile_idc; // u(8)
+ int level_idc; // u(8)
+ int toolset_idc_h; // u(32)
+ int toolset_idc_l; // u(32)
+ int chroma_format_idc; // ue(v)
+ int pic_width_in_luma_samples; // ue(v)
+ int pic_height_in_luma_samples; // ue(v)
+ int bit_depth_luma_minus8; // ue(v)
+ int bit_depth_chroma_minus8; // ue(v)
+
+ int sps_btt_flag; // u(1)
+ int log2_ctu_size_minus5; // ue(v)
+ int log2_min_cb_size_minus2; // ue(v)
+ int log2_diff_ctu_max_14_cb_size; // ue(v)
+ int log2_diff_ctu_max_tt_cb_size; // ue(v)
+ int log2_diff_min_cb_min_tt_cb_size_minus2; // ue(v)
+
+ int sps_suco_flag; // u(1)
+ int log2_diff_ctu_size_max_suco_cb_size; // ue(v)
+ int log2_diff_max_suco_min_suco_cb_size; // ue(v)
+
+ int sps_admvp_flag; // u(1)
+ int sps_affine_flag; // u(1)
+ int sps_amvr_flag; // u(1)
+ int sps_dmvr_flag; // u(1)
+ int sps_mmvd_flag; // u(1)
+ int sps_hmvp_flag; // u(1)
+
+ int sps_eipd_flag; // u(1)
+ int sps_ibc_flag; // u(1)
+ int log2_max_ibc_cand_size_minus2; // ue(v)
+
+ int sps_cm_init_flag; // u(1)
+ int sps_adcc_flag; // u(1)
+
+ int sps_iqt_flag; // u(1)
+ int sps_ats_flag; // u(1)
+
+ int sps_addb_flag; // u(1)
+ int sps_alf_flag; // u(1)
+ int sps_htdf_flag; // u(1)
+ int sps_rpl_flag; // u(1)
+ int sps_pocs_flag; // u(1)
+ int sps_dquant_flag; // u(1)
+ int sps_dra_flag; // u(1)
+
+ int log2_max_pic_order_cnt_lsb_minus4; // ue(v)
+ int log2_sub_gop_length; // ue(v)
+ int log2_ref_pic_gap_length; // ue(v)
+
+ int max_num_tid0_ref_pics; // ue(v)
+
+ int sps_max_dec_pic_buffering_minus1; // ue(v)
+ int long_term_ref_pic_flag; // u(1)
+ int rpl1_same_as_rpl0_flag; // u(1)
+ int num_ref_pic_list_in_sps[2]; // ue(v)
+
+ int picture_cropping_flag; // u(1)
+ int picture_crop_left_offset; // ue(v)
+ int picture_crop_right_offset; // ue(v)
+ int picture_crop_top_offset; // ue(v)
+ int picture_crop_bottom_offset; // ue(v)
+
+} EVCParserSPS;
+
+typedef struct EVCParserContext {
+ ParseContext pc;
+ EVCParserSPS sps[MAX_SPS_CNT];
+ int is_avc;
+ int nal_length_size;
+ int to_read;
+ int incomplete_nalu_prefix_read; // The flag is set to 1 when an
incomplete NAL unit prefix has been read
+
+ int got_sps;
+ int got_pps;
+ int got_sei;
+ int got_slice;
+} EVCParserContext;
+
+static int get_nalu_type(const uint8_t *bits, int bits_size, AVCodecContext
*avctx)
+{
+ int unit_type_plus1 = 0;
+
+ if(bits_size >= EVC_NAL_HEADER_SIZE) {
+ unsigned char *p = (unsigned char *)bits;
+ // forbidden_zero_bit
+ if ((p[0] & 0x80) != 0) {
+ av_log(avctx, AV_LOG_ERROR, "Cannot get bitstream information.
Malformed bitstream.\n");
+ return -1;
+ }
+
+ // nal_unit_type
+ unit_type_plus1 = (p[0] >> 1) & 0x3F;
+ }
+
+ return unit_type_plus1 - 1;
+}
+
+static uint32_t read_nal_unit_length(const uint8_t *bits, int bits_size,
AVCodecContext *avctx)
+{
+ uint32_t nalu_len = 0;
+
+ if(bits_size >= EVC_NAL_UNIT_LENGTH_BYTE) {
+
+ int t = 0;
+ unsigned char *p = (unsigned char *)bits;
+
+ for(int i = 0; i < EVC_NAL_UNIT_LENGTH_BYTE; i++)
+ t = (t << 8) | p[i];
+
+ nalu_len = t;
+ if(nalu_len == 0) {
+ av_log(avctx, AV_LOG_ERROR, "Invalid bitstream size!\n");
+ return 0;
+ }
+ }
+
+ return nalu_len;
+}
+
+// @see ISO_IEC_23094-1 (7.3.2.1 SPS RBSP syntax)
+static EVCParserSPS *parse_sps(const uint8_t *bs, int bs_size,
EVCParserContext *ev)
+{
+ GetBitContext gb;
+ EVCParserSPS *sps;
+ int sps_seq_parameter_set_id;
+
+ if(init_get_bits8(&gb, bs, bs_size) < 0)
+ return NULL;
+
+ sps_seq_parameter_set_id = get_ue_golomb(&gb);
+
+ if(sps_seq_parameter_set_id >= MAX_SPS_CNT)
+ return NULL;
+
+ sps = &ev->sps[sps_seq_parameter_set_id];
+ sps->sps_seq_parameter_set_id = sps_seq_parameter_set_id;
+
+ // the Baseline profile is indicated by profile_idc eqal to 0
+ // the Main profile is indicated by profile_idc eqal to 1
+ sps->profile_idc = get_bits(&gb, 8);
+
+ sps->level_idc = get_bits(&gb, 8);
+
+ skip_bits_long(&gb, 32); /* skip toolset_idc_h */
+ skip_bits_long(&gb, 32); /* skip toolset_idc_l */
+
+ // 0 - monochrome
+ // 1 - 4:2:0
+ // 2 - 4:2:2
+ // 3 - 4:4:4
+ sps->chroma_format_idc = get_ue_golomb(&gb);
+
+ sps->pic_width_in_luma_samples = get_ue_golomb(&gb);
+ sps->pic_height_in_luma_samples = get_ue_golomb(&gb);
+
+ sps->bit_depth_luma_minus8 = get_ue_golomb(&gb);
+ sps->bit_depth_chroma_minus8 = get_ue_golomb(&gb);
+
+ sps->sps_btt_flag = get_bits(&gb, 1);
+ if(sps->sps_btt_flag) {
+ sps->log2_ctu_size_minus5 = get_ue_golomb(&gb);
+ sps->log2_min_cb_size_minus2 = get_ue_golomb(&gb);
+ sps->log2_diff_ctu_max_14_cb_size = get_ue_golomb(&gb);
+ sps->log2_diff_ctu_max_tt_cb_size = get_ue_golomb(&gb);
+ sps->log2_diff_min_cb_min_tt_cb_size_minus2 = get_ue_golomb(&gb);
+ }
+
+ sps->sps_suco_flag = get_bits(&gb, 1);
+ if(sps->sps_suco_flag) {
+ sps->log2_diff_ctu_size_max_suco_cb_size = get_ue_golomb(&gb);
+ sps->log2_diff_max_suco_min_suco_cb_size = get_ue_golomb(&gb);
+ }
+
+ sps->sps_admvp_flag = get_bits(&gb, 1);
+ if(sps->sps_admvp_flag) {
+ sps->sps_affine_flag = get_bits(&gb, 1);
+ sps->sps_amvr_flag = get_bits(&gb, 1);
+ sps->sps_dmvr_flag = get_bits(&gb, 1);
+ sps->sps_mmvd_flag = get_bits(&gb, 1);
+ sps->sps_hmvp_flag = get_bits(&gb, 1);
+ }
+
+ sps->sps_eipd_flag = get_bits(&gb, 1);
+ if(sps->sps_eipd_flag) {
+ sps->sps_ibc_flag = get_bits(&gb, 1);
+ if(sps->sps_ibc_flag)
+ sps->log2_max_ibc_cand_size_minus2 = get_ue_golomb(&gb);
+ }
+
+ sps->sps_cm_init_flag = get_bits(&gb, 1);
+ if(sps->sps_cm_init_flag)
+ sps->sps_adcc_flag = get_bits(&gb, 1);
+
+ sps->sps_iqt_flag = get_bits(&gb, 1);
+ if(sps->sps_iqt_flag)
+ sps->sps_ats_flag = get_bits(&gb, 1);
+
+ sps->sps_addb_flag = get_bits(&gb, 1);
+ sps->sps_alf_flag = get_bits(&gb, 1);
+ sps->sps_htdf_flag = get_bits(&gb, 1);
+ sps->sps_rpl_flag = get_bits(&gb, 1);
+ sps->sps_pocs_flag = get_bits(&gb, 1);
+ sps->sps_dquant_flag = get_bits(&gb, 1);
+ sps->sps_dra_flag = get_bits(&gb, 1);
+
+ if(sps->sps_pocs_flag)
+ sps->log2_max_pic_order_cnt_lsb_minus4 = get_ue_golomb(&gb);
+
+ if(!sps->sps_pocs_flag || !sps->sps_rpl_flag) {
+ sps->log2_sub_gop_length = get_ue_golomb(&gb);
+ if(sps->log2_sub_gop_length == 0)
+ sps->log2_ref_pic_gap_length = get_ue_golomb(&gb);
+ }
+
+ return sps;
+}
+
+static int parse_nal_units(AVCodecParserContext *s, const uint8_t *bs,
+ int bs_size, AVCodecContext *avctx)
+{
+ EVCParserContext *ev = s->priv_data;
+ int nalu_type, nalu_size;
+ unsigned char *bits = (unsigned char *)bs;
+ int bits_size = bs_size;
+
+ avctx->codec_id = AV_CODEC_ID_EVC;
+
+ nalu_size = read_nal_unit_length(bits, bits_size, avctx);
+ if(nalu_size == 0) {
+ av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit size: (%d)\n",
nalu_size);
+ return -1;
+ }
+
+ bits += EVC_NAL_UNIT_LENGTH_BYTE;
+ bits_size -= EVC_NAL_UNIT_LENGTH_BYTE;
+
+ nalu_type = get_nalu_type(bits, bits_size, avctx);
+
+ bits += EVC_NAL_HEADER_SIZE;
+ bits_size -= EVC_NAL_HEADER_SIZE;
+
+ if (nalu_type == EVC_NUT_SPS) { // NAL Unit type: SPS (Sequence Parameter
Set)
+ EVCParserSPS *sps;
+
+ sps = parse_sps(bits, bits_size, ev);
+
+ avctx->coded_width = sps->pic_width_in_luma_samples;
+ avctx->coded_height = sps->pic_height_in_luma_samples;
+ avctx->width = sps->pic_width_in_luma_samples;
+ avctx->height = sps->pic_height_in_luma_samples;
These should be set in the AVCodecParserContext, not the AVCodecContext.
+
+ if(sps->profile_idc == 0) avctx->profile = FF_PROFILE_EVC_BASELINE;
+ else if (sps->profile_idc == 1) avctx->profile = FF_PROFILE_EVC_MAIN;
Setting these two is ok.
+ else {
+ av_log(avctx, AV_LOG_ERROR, "Not supported profile (%d)\n",
sps->profile_idc);
+ return -1;
Do you really need to abort here? Not recognizing the profile can't
possibly mean the bitstream can't be parsed further to fetch other
information.
+ }
+
+ // Currently XEVD decoder supports ony YCBCR420_10LE chroma format for
EVC stream
+ switch(sps->chroma_format_idc) {
+ case 0: /* YCBCR400_10LE */
+ av_log(avctx, AV_LOG_ERROR, "YCBCR400_10LE: Not supported chroma
format\n");
+ avctx->pix_fmt = AV_PIX_FMT_GRAY10LE;
+ return -1;
+ case 1: /* YCBCR420_10LE */
+ avctx->pix_fmt = AV_PIX_FMT_YUV420P10LE;
+ break;
+ case 2: /* YCBCR422_10LE */
+ av_log(avctx, AV_LOG_ERROR, "YCBCR422_10LE: Not supported chroma
format\n");
+ avctx->pix_fmt = AV_PIX_FMT_YUV422P10LE;
+ return -1;
+ case 3: /* YCBCR444_10LE */
+ av_log(avctx, AV_LOG_ERROR, "YCBCR444_10LE: Not supported chroma
format\n");
+ avctx->pix_fmt = AV_PIX_FMT_YUV444P10LE;
+ return -1;
+ default:
+ avctx->pix_fmt = AV_PIX_FMT_NONE;
Fill s->format instead of avctx->pix_fmt.
+ av_log(avctx, AV_LOG_ERROR, "Unknown supported chroma format\n");
+ return -1;
+ }
+
+ // The current implementation of parse_sps function doesn't handle VUI
parameters parsing,
+ // so at the moment it's impossible to initialize has_b_frames and
max_b_frames AVCodecContex fields here.
+ // Currently, initialization of has_b_frames and max_b_frames
AVCodecContex fields have been moved to
+ // libxevd_decode function where we can use xevd_config function being
a part of xevd library API
+ // to get the needed information.
+ // However, if it will be needed, parse_sps function should be
extended to handle VUI parameters parsing
+ // and the following lines should be used to initialize has_b_frames
and max_b_frames fields of the AVCodecContex.
There's no need no fill any of those field in this parser. What you can
fill is avctx->color{space,_primaries,_trc,_range}, so mention that to
add VUI parsing at some point.
+ //
+ // sps->vui_parameters.num_reorder_pics
+ // if (sps->bitstream_restriction_flag &&
sps->vui_parameters.num_reorder_pics) {
+ // avctx->has_b_frames = sps->vui_parameters.num_reorder_pics;
+ // }
+
+ ev->got_sps = 1;
+
+ } else if (nalu_type == EVC_NUT_PPS) // NAL Unit type: PPS (Video
Parameter Set)
+ ev->got_pps = 1;
+ else if(nalu_type == EVC_NUT_SEI) // NAL unit type: SEI (Supplemental
Enhancement Information)
+ ev->got_sei = 1;
These got_* variables seem to be write only.
+ else if (nalu_type == EVC_NUT_IDR || nalu_type == EVC_NUT_NONIDR) // NAL
Unit type: Coded slice of a IDR or non-IDR picture
In here you should parse enough data to get the picture type (I, P, B)
and set s->pict_type, and based on (i suppose) nalu_type, also set
s->key_frame to 1 or 0.
At the start of this function, you'd ideally set them to
AV_PICTURE_TYPE_NONE and -1 respectively, as "safe" defaults.
s->picture_structure should also be always set to
AV_PICTURE_STRUCTURE_FRAME, as i assume this codec does not support
interlacing.
+ ev->got_slice++;
+ else {
+ av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit type: %d\n", nalu_type);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Find the end of the current frame in the bitstream.
+ * @return the position of the first byte of the next frame, or END_NOT_FOUND
+ */
+static int evc_find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
+ int buf_size, AVCodecContext *avctx)
+{
+ EVCParserContext *ev = s->priv_data;
+
+ if(!ev->to_read) {
+ int nal_unit_size = 0;
+ int next = END_NOT_FOUND;
+
+ // This is the case when buffer size is not enough for buffer to store
NAL unit length
+ if(buf_size < EVC_NAL_UNIT_LENGTH_BYTE) {
+ ev->to_read = EVC_NAL_UNIT_LENGTH_BYTE;
+ ev->nal_length_size = buf_size;
+ ev->incomplete_nalu_prefix_read = 1;
+
+ return END_NOT_FOUND;
+ }
+
+ nal_unit_size = read_nal_unit_length(buf, buf_size, avctx);
+ ev->nal_length_size = EVC_NAL_UNIT_LENGTH_BYTE;
+
+ next = nal_unit_size + EVC_NAL_UNIT_LENGTH_BYTE;
+ ev->to_read = next;
+ if(next < buf_size)
+ return next;
+ else
+ return END_NOT_FOUND;
+ } else if(ev->to_read > buf_size)
+ return END_NOT_FOUND;
+ else {
+ if(ev->incomplete_nalu_prefix_read == 1) {
+ EVCParserContext *ev = s->priv_data;
+ ParseContext *pc = &ev->pc;
+ uint8_t nalu_len[EVC_NAL_UNIT_LENGTH_BYTE] = {0};
+ int nal_unit_size = 0;
+
+ // 1. pc->buffer contains previously read bytes of NALU prefix
+ // 2. buf contains the rest of NAL unit prefix bytes
+ //
+ // ~~~~~~~
+ // EXAMPLE
+ // ~~~~~~~
+ //
+ // In the following example we assumed that the number of already
read NAL Unit prefix bytes is equal 1
+ //
+ // ----------
+ // pc->buffer -> conatins already read bytes
+ // ----------
+ // __ pc->index == 1
+ // |
+ // V
+ // -------------------------------------------------------
+ // | 0 | 1 | 2 | 3 | 4 | ... | N |
+ // -------------------------------------------------------
+ // | 0x00 | 0xXX | 0xXX | 0xXX | 0xXX | ... | 0xXX |
+ // -------------------------------------------------------
+ //
+ // ----------
+ // buf -> contains newly read bytes
+ // ----------
+ // -------------------------------------------------------
+ // | 0 | 1 | 2 | 3 | 4 | ... | N |
+ // -------------------------------------------------------
+ // | 0x00 | 0x00 | 0x3C | 0xXX | 0xXX | ... | 0xXX |
+ // -------------------------------------------------------
+ //
+ for(int i = 0; i < EVC_NAL_UNIT_LENGTH_BYTE; i++) {
+ if(i < pc->index)
+ nalu_len[i] = pc->buffer[i];
+ else
+ nalu_len[i] = buf[i - pc->index];
+ }
+
+ // ----------
+ // nalu_len
+ // ----------
+ // ---------------------------------
+ // | 0 | 1 | 2 | 3 |
+ // ---------------------------------
+ // | 0x00 | 0x00 | 0x00 | 0x3C |
+ // ---------------------------------
+ // | NALU LENGTH |
+ // ---------------------------------
+ // NAL Unit lenght = 60 (0x0000003C)
+
+ nal_unit_size = read_nal_unit_length(nalu_len,
EVC_NAL_UNIT_LENGTH_BYTE, avctx);
+
+ ev->to_read = nal_unit_size + EVC_NAL_UNIT_LENGTH_BYTE - pc->index;
+
+ ev->incomplete_nalu_prefix_read = 0;
+
+ if(ev->to_read > buf_size)
+ return END_NOT_FOUND;
+ else
+ return ev->to_read;
+ }
+ return ev->to_read;
+ }
+
+ return END_NOT_FOUND;
+}
+
+static int evc_parser_init(AVCodecParserContext *s)
+{
+ EVCParserContext *ev = s->priv_data;
+
+ ev->got_sps = 0;
+ ev->got_pps = 0;
+ ev->got_sei = 0;
+ ev->got_slice = 0;
+ ev->nal_length_size = EVC_NAL_UNIT_LENGTH_BYTE;
+ ev->incomplete_nalu_prefix_read = 0;
+
+ return 0;
+}
+
+static int evc_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+ const uint8_t **poutbuf, int *poutbuf_size,
+ const uint8_t *buf, int buf_size)
+{
+ int next;
+ EVCParserContext *ev = s->priv_data;
+ ParseContext *pc = &ev->pc;
+ int is_dummy_buf = !buf_size;
+ const uint8_t *dummy_buf = buf;
+
+ if (s->flags & PARSER_FLAG_COMPLETE_FRAMES)
+ next = buf_size;
+ else {
+ next = evc_find_frame_end(s, buf, buf_size, avctx);
+ if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+ *poutbuf = NULL;
+ *poutbuf_size = 0;
+ ev->to_read -= buf_size;
+ return buf_size;
+ }
+ }
+
+ is_dummy_buf &= (dummy_buf == buf);
+
+ if (!is_dummy_buf)
+ parse_nal_units(s, buf, buf_size, avctx);
+
+ *poutbuf = buf;
+ *poutbuf_size = buf_size;
+ ev->to_read -= next;
+
+ return next;
+}
+
+const AVCodecParser ff_evc_parser = {
+ .codec_ids = { AV_CODEC_ID_EVC },
+ .priv_data_size = sizeof(EVCParserContext),
+ .parser_init = evc_parser_init,
+ .parser_parse = evc_parse,
+ .parser_close = ff_parse_close,
+};
_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".