Re: [FFmpeg-devel] [PATCH] avcodec: add an AV1 parser

James Almer Mon, 24 Sep 2018 17:17:10 -0700

On 9/24/2018 8:41 PM, Mark Thompson wrote:
> On 24/09/18 01:12, James Almer wrote:
>> Simple parser to set keyframes, frame type, structure, width, height, and 
>> pixel
>> format, plus stream profile and level.
>>
>> Signed-off-by: James Almer <[email protected]>
>> ---
>> Missing Changelog entry and version bump.
>>
>> This depends on "[PATCH v2 2/3] lavc: Add coded bitstream read/write support
>> for AV1" which should be committed in the coming days.
>>
>> The AVCodecParser.split() implementation, added for the sake of completeness,
>> is very naive and much like the h264 and hevc ones can result in useless OBUs
>> being "extracted", but since it's no longer used by libavformat to fill 
>> global
>> headers when reading raw containers it shouldn't really matter. It's pretty
>> much used only by the remove_extradata bsf at this point.
>>
>>  configure               |   1 +
>>  libavcodec/Makefile     |   1 +
>>  libavcodec/av1_parser.c | 218 ++++++++++++++++++++++++++++++++++++++++
>>  libavcodec/parsers.c    |   1 +
>>  4 files changed, 221 insertions(+)
>>  create mode 100644 libavcodec/av1_parser.c
>>
>> diff --git a/configure b/configure
>> index ca8b599b63..b46c86ec95 100755
>> --- a/configure
>> +++ b/configure
>> @@ -3020,6 +3020,7 @@ wmv3_crystalhd_decoder_select="crystalhd"
>>  
>>  # parsers
>>  aac_parser_select="adts_header"
>> +av1_parser_select="cbs_av1"
>>  h264_parser_select="golomb h264dsp h264parse"
>>  hevc_parser_select="hevcparse"
>>  mpegaudio_parser_select="mpegaudioheader"
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index b2c6995f9a..dc28892e64 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -1006,6 +1006,7 @@ OBJS-$(CONFIG_AAC_PARSER)              += aac_parser.o 
>> aac_ac3_parser.o \
>>                                            mpeg4audio.o
>>  OBJS-$(CONFIG_AC3_PARSER)              += ac3tab.o aac_ac3_parser.o
>>  OBJS-$(CONFIG_ADX_PARSER)              += adx_parser.o adx.o
>> +OBJS-$(CONFIG_AV1_PARSER)              += av1_parser.o
>>  OBJS-$(CONFIG_AVS2_PARSER)             += avs2_parser.o
>>  OBJS-$(CONFIG_BMP_PARSER)              += bmp_parser.o
>>  OBJS-$(CONFIG_CAVSVIDEO_PARSER)        += cavs_parser.o
>> diff --git a/libavcodec/av1_parser.c b/libavcodec/av1_parser.c
>> new file mode 100644
>> index 0000000000..b2e19e2119
>> --- /dev/null
>> +++ b/libavcodec/av1_parser.c
>> @@ -0,0 +1,218 @@
>> +/*
>> + * AV1 parser
>> + *
>> + * Copyright (C) 2018 James Almer <[email protected]>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
>> USA
>> + */
>> +
>> +#include "av1_parse.h"
>> +#include "cbs.h"
>> +#include "cbs_av1.h"
>> +#include "parser.h"
>> +
>> +typedef struct AV1ParseContext {
>> +    CodedBitstreamContext *cbc;
>> +    CodedBitstreamFragment temporal_unit;
>> +    int parsed_extradata;
>> +} AV1ParseContext;
>> +
>> +static int av1_parser_parse(AVCodecParserContext *ctx,
>> +                            AVCodecContext *avctx,
>> +                            const uint8_t **out_data, int *out_size,
>> +                            const uint8_t *data, int size)
>> +{
>> +    AV1ParseContext *s = ctx->priv_data;
>> +    CodedBitstreamFragment *td = &s->temporal_unit;
>> +    CodedBitstreamAV1Context *av1 = s->cbc->priv_data;
>> +    int ret;
>> +
>> +    *out_data = data;
>> +    *out_size = size;
>> +
>> +    ctx->key_frame         = -1;
>> +    ctx->pict_type         = AV_PICTURE_TYPE_NONE;
>> +    ctx->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
>> +
>> +    if (avctx->extradata_size && !s->parsed_extradata) {
>> +        ret = ff_cbs_read(s->cbc, td, avctx->extradata, 
>> avctx->extradata_size);
>> +        if (ret < 0) {
>> +            av_log(avctx, AV_LOG_ERROR, "Failed to parse extradata.\n");
>> +            return size;
>> +        }
>> +
>> +        s->parsed_extradata = 1;


I'll move this above the ff_cbs_read() call, btw. Otherwise if it fails,
it will keep being called on every av_parser_parse2() call and no packet
will ever be parsed.

>> +
>> +        ff_cbs_fragment_uninit(s->cbc, td);
>> +    }
>> +
>> +    ret = ff_cbs_read(s->cbc, td, data, size);
>> +    if (ret < 0) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to parse temporal unit.\n");
>> +        return size;
>> +    }
>> +
>> +    if (!av1->sequence_header) {
>> +        av_log(avctx, AV_LOG_ERROR, "No sequence header available\n");
>> +        goto end;
>> +    }
>> +
>> +    for (int i = 0; i < td->nb_units; i++) {
>> +        CodedBitstreamUnit *unit = &td->units[i];
>> +        AV1RawOBU *obu = unit->content;
>> +        AV1RawSequenceHeader *seq = av1->sequence_header;
>> +        AV1RawFrameHeader *frame;
>> +        int frame_type, bitdepth, subsampling;
>> +
>> +        if (unit->type == AV1_OBU_FRAME)
>> +            frame = &obu->obu.frame.header;
>> +        else if (unit->type == AV1_OBU_FRAME_HEADER)
>> +            frame = &obu->obu.frame_header;
>> +        else
>> +            continue;
>> +
>> +        if (frame->show_existing_frame) {
>> +            AV1ReferenceFrameState *ref = 
>> &av1->ref[frame->frame_to_show_map_idx];
>> +
>> +            if (!ref->valid) {
>> +                av_log(avctx, AV_LOG_ERROR, "Invalid reference frame\n");
>> +                goto end;
>> +            }
>> +
>> +            ctx->width  = ref->frame_width;
>> +            ctx->height = ref->frame_height;
>> +            frame_type  = ref->frame_type;
>> +
>> +            ctx->key_frame = 0;
>> +        } else if (!frame->show_frame) {
>> +            continue;
> 
> I think you want to set the key_frame flag if the you see any key frame, even 
> an invisible one.
> 
> (E.g. if the first output frame is in a packet after an invisible key frame 
> then you would miss it here, which possibly leads to incorrectly discarding 
> some of the stream.)

No, the only frame that matters here is the visible one for the current
temporal unit, be it a show_frame == 1 one or a show_existing_frame == 1
one. And the latter is not meant to be tagged as a key frame, hence
setting it to 0 above.

See section 7.6.2 in the AV1 spec. "Key Frame Dependent Recovery Point",
AKA frames where show_existing_frame == 1 and the frame_to_show_map_idx
value pointing to a "Delayed Random Access Point" (frame where
show_frame == 0, showable_frame == 1, and frame_type == KEY_FRAME,
parsed in a previous OBU), are not considered actual random access
points. Not even libaom signals them as such. They are meant to be
handled in a custom way by the container.

> 
>> +        } else {
>> +            ctx->width  = av1->frame_width;
>> +            ctx->height = av1->frame_height;
>> +            frame_type  = frame->frame_type;
>> +
>> +            ctx->key_frame = frame_type == AV1_FRAME_KEY;
>> +        }
>> +
>> +        avctx->profile = seq->seq_profile;
>> +        avctx->level   = seq->seq_level_idx[0];
>> +
>> +        switch (frame_type) {
>> +        case AV1_FRAME_KEY:
>> +        case AV1_FRAME_INTRA_ONLY:
>> +            ctx->pict_type = AV_PICTURE_TYPE_I;
>> +            break;
>> +        case AV1_FRAME_INTER:
>> +            ctx->pict_type = AV_PICTURE_TYPE_P;
>> +            break;
>> +        case AV1_FRAME_SWITCH:
>> +            ctx->pict_type = AV_PICTURE_TYPE_SP;
>> +            break;
>> +        }
>> +
>> +        ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
>> +
>> +        subsampling = seq->color_config.subsampling_x << 1 & 
>> seq->color_config.subsampling_y;
>> +        bitdepth    = 8 + seq->color_config.high_bitdepth * 2 + 
>> seq->color_config.twelve_bit * 2;
>> +        switch (bitdepth) {
>> +        case 8:
>> +            if (subsampling == 3)      ctx->format = 
>> seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY8 :
>> +                                                                            
>>          AV_PIX_FMT_YUV420P;
>> +            else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P;
>> +            else                       ctx->format = AV_PIX_FMT_YUV444P;
>> +            break;
>> +        case 10:
>> +            if (subsampling == 3)      ctx->format = 
>> seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY10 :
>> +                                                                            
>>          AV_PIX_FMT_YUV420P10;
>> +            else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P10;
>> +            else                       ctx->format = AV_PIX_FMT_YUV444P10;
>> +            break;
>> +        case 12:
>> +            if (subsampling == 3)      ctx->format = 
>> seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY12 :
>> +                                                                            
>>          AV_PIX_FMT_YUV420P12;
>> +            else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P12;
>> +            else                       ctx->format = AV_PIX_FMT_YUV444P12;
>> +            break;
>> +        }
> 
> I think I'd put mono_chrome outside the switch - while it does imply that 
> subsampling_(x|y) are both 1, that is still slightly confusing.

Sure.

> 
> Also, perhaps this would be nicer as a lookup table?

I'll see if i can get it to look nicer.

> 
>> +    }
>> +
>> +end:
>> +    ff_cbs_fragment_uninit(s->cbc, td);
>> +
>> +    return size;
>> +}
>> +
>> +static const CodedBitstreamUnitType decompose_unit_types[] = {
>> +    AV1_OBU_TEMPORAL_DELIMITER,
>> +    AV1_OBU_SEQUENCE_HEADER,
>> +    AV1_OBU_FRAME_HEADER,
>> +    AV1_OBU_TILE_GROUP,
>> +    AV1_OBU_FRAME,
>> +};
>> +
>> +static av_cold int av1_parser_init(AVCodecParserContext *ctx)
>> +{
>> +    AV1ParseContext *s = ctx->priv_data;
>> +    int ret;
>> +
>> +    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, NULL);
> 
> Can we forge a logging context here?  Having NULL is not very nice if it does 
> find errors.
> 
> (Or overwrite it with the passed AVCodecContext only during the parse call?)

So a s->cbc.log_ctx = avctx in av1_parser_parse()?

> 
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    s->cbc->decompose_unit_types    = (CodedBitstreamUnitType 
>> *)decompose_unit_types;
>> +    s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
>> +
>> +    return 0;
>> +}
>> +
>> +static void av1_parser_close(AVCodecParserContext *ctx)
>> +{
>> +    AV1ParseContext *s = ctx->priv_data;
>> +
>> +    ff_cbs_close(&s->cbc);
>> +}
>> +
>> +static int av1_parser_split(AVCodecContext *avctx,
>> +                            const uint8_t *buf, int buf_size)
>> +{
>> +    AV1OBU obu;
>> +    const uint8_t *ptr = buf, *end = buf + buf_size;
>> +
>> +    while (ptr < end) {
>> +        int len = ff_av1_extract_obu(&obu, ptr, buf_size, avctx);
>> +        if (len < 0)
>> +            break;
>> +
>> +        if (obu.type == AV1_OBU_FRAME_HEADER ||
>> +            obu.type == AV1_OBU_FRAME) {
>> +            return ptr - buf;
>> +        }
>> +        ptr      += len;
>> +        buf_size -= len;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +AVCodecParser ff_av1_parser = {
>> +    .codec_ids      = { AV_CODEC_ID_AV1 },
>> +    .priv_data_size = sizeof(AV1ParseContext),
>> +    .parser_init    = av1_parser_init,
>> +    .parser_close   = av1_parser_close,
>> +    .parser_parse   = av1_parser_parse,
>> +    .split          = av1_parser_split,
>> +};
>> diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
>> index cb86cceecc..f01cad4c84 100644
>> --- a/libavcodec/parsers.c
>> +++ b/libavcodec/parsers.c
>> @@ -26,6 +26,7 @@ extern AVCodecParser ff_aac_parser;
>>  extern AVCodecParser ff_aac_latm_parser;
>>  extern AVCodecParser ff_ac3_parser;
>>  extern AVCodecParser ff_adx_parser;
>> +extern AVCodecParser ff_av1_parser;
>>  extern AVCodecParser ff_avs2_parser;
>>  extern AVCodecParser ff_bmp_parser;
>>  extern AVCodecParser ff_cavsvideo_parser;
>>
> 
> Looks good!
> 
> Thanks,
> 
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> [email protected]
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 

_______________________________________________
ffmpeg-devel mailing list
[email protected]
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Re: [FFmpeg-devel] [PATCH] avcodec: add an AV1 parser

Reply via email to