On 9/24/2018 8:41 PM, Mark Thompson wrote:
> On 24/09/18 01:12, James Almer wrote:
>> Simple parser to set keyframes, frame type, structure, width, height, and
>> pixel
>> format, plus stream profile and level.
>>
>> Signed-off-by: James Almer <[email protected]>
>> ---
>> Missing Changelog entry and version bump.
>>
>> This depends on "[PATCH v2 2/3] lavc: Add coded bitstream read/write support
>> for AV1" which should be committed in the coming days.
>>
>> The AVCodecParser.split() implementation, added for the sake of completeness,
>> is very naive and much like the h264 and hevc ones can result in useless OBUs
>> being "extracted", but since it's no longer used by libavformat to fill
>> global
>> headers when reading raw containers it shouldn't really matter. It's pretty
>> much used only by the remove_extradata bsf at this point.
>>
>> configure | 1 +
>> libavcodec/Makefile | 1 +
>> libavcodec/av1_parser.c | 218 ++++++++++++++++++++++++++++++++++++++++
>> libavcodec/parsers.c | 1 +
>> 4 files changed, 221 insertions(+)
>> create mode 100644 libavcodec/av1_parser.c
>>
>> diff --git a/configure b/configure
>> index ca8b599b63..b46c86ec95 100755
>> --- a/configure
>> +++ b/configure
>> @@ -3020,6 +3020,7 @@ wmv3_crystalhd_decoder_select="crystalhd"
>>
>> # parsers
>> aac_parser_select="adts_header"
>> +av1_parser_select="cbs_av1"
>> h264_parser_select="golomb h264dsp h264parse"
>> hevc_parser_select="hevcparse"
>> mpegaudio_parser_select="mpegaudioheader"
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index b2c6995f9a..dc28892e64 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -1006,6 +1006,7 @@ OBJS-$(CONFIG_AAC_PARSER) += aac_parser.o
>> aac_ac3_parser.o \
>> mpeg4audio.o
>> OBJS-$(CONFIG_AC3_PARSER) += ac3tab.o aac_ac3_parser.o
>> OBJS-$(CONFIG_ADX_PARSER) += adx_parser.o adx.o
>> +OBJS-$(CONFIG_AV1_PARSER) += av1_parser.o
>> OBJS-$(CONFIG_AVS2_PARSER) += avs2_parser.o
>> OBJS-$(CONFIG_BMP_PARSER) += bmp_parser.o
>> OBJS-$(CONFIG_CAVSVIDEO_PARSER) += cavs_parser.o
>> diff --git a/libavcodec/av1_parser.c b/libavcodec/av1_parser.c
>> new file mode 100644
>> index 0000000000..b2e19e2119
>> --- /dev/null
>> +++ b/libavcodec/av1_parser.c
>> @@ -0,0 +1,218 @@
>> +/*
>> + * AV1 parser
>> + *
>> + * Copyright (C) 2018 James Almer <[email protected]>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
>> USA
>> + */
>> +
>> +#include "av1_parse.h"
>> +#include "cbs.h"
>> +#include "cbs_av1.h"
>> +#include "parser.h"
>> +
>> +typedef struct AV1ParseContext {
>> + CodedBitstreamContext *cbc;
>> + CodedBitstreamFragment temporal_unit;
>> + int parsed_extradata;
>> +} AV1ParseContext;
>> +
>> +static int av1_parser_parse(AVCodecParserContext *ctx,
>> + AVCodecContext *avctx,
>> + const uint8_t **out_data, int *out_size,
>> + const uint8_t *data, int size)
>> +{
>> + AV1ParseContext *s = ctx->priv_data;
>> + CodedBitstreamFragment *td = &s->temporal_unit;
>> + CodedBitstreamAV1Context *av1 = s->cbc->priv_data;
>> + int ret;
>> +
>> + *out_data = data;
>> + *out_size = size;
>> +
>> + ctx->key_frame = -1;
>> + ctx->pict_type = AV_PICTURE_TYPE_NONE;
>> + ctx->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
>> +
>> + if (avctx->extradata_size && !s->parsed_extradata) {
>> + ret = ff_cbs_read(s->cbc, td, avctx->extradata,
>> avctx->extradata_size);
>> + if (ret < 0) {
>> + av_log(avctx, AV_LOG_ERROR, "Failed to parse extradata.\n");
>> + return size;
>> + }
>> +
>> + s->parsed_extradata = 1;
I'll move this above the ff_cbs_read() call, btw. Otherwise if it fails,
it will keep being called on every av_parser_parse2() call and no packet
will ever be parsed.
>> +
>> + ff_cbs_fragment_uninit(s->cbc, td);
>> + }
>> +
>> + ret = ff_cbs_read(s->cbc, td, data, size);
>> + if (ret < 0) {
>> + av_log(avctx, AV_LOG_ERROR, "Failed to parse temporal unit.\n");
>> + return size;
>> + }
>> +
>> + if (!av1->sequence_header) {
>> + av_log(avctx, AV_LOG_ERROR, "No sequence header available\n");
>> + goto end;
>> + }
>> +
>> + for (int i = 0; i < td->nb_units; i++) {
>> + CodedBitstreamUnit *unit = &td->units[i];
>> + AV1RawOBU *obu = unit->content;
>> + AV1RawSequenceHeader *seq = av1->sequence_header;
>> + AV1RawFrameHeader *frame;
>> + int frame_type, bitdepth, subsampling;
>> +
>> + if (unit->type == AV1_OBU_FRAME)
>> + frame = &obu->obu.frame.header;
>> + else if (unit->type == AV1_OBU_FRAME_HEADER)
>> + frame = &obu->obu.frame_header;
>> + else
>> + continue;
>> +
>> + if (frame->show_existing_frame) {
>> + AV1ReferenceFrameState *ref =
>> &av1->ref[frame->frame_to_show_map_idx];
>> +
>> + if (!ref->valid) {
>> + av_log(avctx, AV_LOG_ERROR, "Invalid reference frame\n");
>> + goto end;
>> + }
>> +
>> + ctx->width = ref->frame_width;
>> + ctx->height = ref->frame_height;
>> + frame_type = ref->frame_type;
>> +
>> + ctx->key_frame = 0;
>> + } else if (!frame->show_frame) {
>> + continue;
>
> I think you want to set the key_frame flag if the you see any key frame, even
> an invisible one.
>
> (E.g. if the first output frame is in a packet after an invisible key frame
> then you would miss it here, which possibly leads to incorrectly discarding
> some of the stream.)
No, the only frame that matters here is the visible one for the current
temporal unit, be it a show_frame == 1 one or a show_existing_frame == 1
one. And the latter is not meant to be tagged as a key frame, hence
setting it to 0 above.
See section 7.6.2 in the AV1 spec. "Key Frame Dependent Recovery Point",
AKA frames where show_existing_frame == 1 and the frame_to_show_map_idx
value pointing to a "Delayed Random Access Point" (frame where
show_frame == 0, showable_frame == 1, and frame_type == KEY_FRAME,
parsed in a previous OBU), are not considered actual random access
points. Not even libaom signals them as such. They are meant to be
handled in a custom way by the container.
>
>> + } else {
>> + ctx->width = av1->frame_width;
>> + ctx->height = av1->frame_height;
>> + frame_type = frame->frame_type;
>> +
>> + ctx->key_frame = frame_type == AV1_FRAME_KEY;
>> + }
>> +
>> + avctx->profile = seq->seq_profile;
>> + avctx->level = seq->seq_level_idx[0];
>> +
>> + switch (frame_type) {
>> + case AV1_FRAME_KEY:
>> + case AV1_FRAME_INTRA_ONLY:
>> + ctx->pict_type = AV_PICTURE_TYPE_I;
>> + break;
>> + case AV1_FRAME_INTER:
>> + ctx->pict_type = AV_PICTURE_TYPE_P;
>> + break;
>> + case AV1_FRAME_SWITCH:
>> + ctx->pict_type = AV_PICTURE_TYPE_SP;
>> + break;
>> + }
>> +
>> + ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
>> +
>> + subsampling = seq->color_config.subsampling_x << 1 &
>> seq->color_config.subsampling_y;
>> + bitdepth = 8 + seq->color_config.high_bitdepth * 2 +
>> seq->color_config.twelve_bit * 2;
>> + switch (bitdepth) {
>> + case 8:
>> + if (subsampling == 3) ctx->format =
>> seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY8 :
>> +
>> AV_PIX_FMT_YUV420P;
>> + else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P;
>> + else ctx->format = AV_PIX_FMT_YUV444P;
>> + break;
>> + case 10:
>> + if (subsampling == 3) ctx->format =
>> seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY10 :
>> +
>> AV_PIX_FMT_YUV420P10;
>> + else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P10;
>> + else ctx->format = AV_PIX_FMT_YUV444P10;
>> + break;
>> + case 12:
>> + if (subsampling == 3) ctx->format =
>> seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY12 :
>> +
>> AV_PIX_FMT_YUV420P12;
>> + else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P12;
>> + else ctx->format = AV_PIX_FMT_YUV444P12;
>> + break;
>> + }
>
> I think I'd put mono_chrome outside the switch - while it does imply that
> subsampling_(x|y) are both 1, that is still slightly confusing.
Sure.
>
> Also, perhaps this would be nicer as a lookup table?
I'll see if i can get it to look nicer.
>
>> + }
>> +
>> +end:
>> + ff_cbs_fragment_uninit(s->cbc, td);
>> +
>> + return size;
>> +}
>> +
>> +static const CodedBitstreamUnitType decompose_unit_types[] = {
>> + AV1_OBU_TEMPORAL_DELIMITER,
>> + AV1_OBU_SEQUENCE_HEADER,
>> + AV1_OBU_FRAME_HEADER,
>> + AV1_OBU_TILE_GROUP,
>> + AV1_OBU_FRAME,
>> +};
>> +
>> +static av_cold int av1_parser_init(AVCodecParserContext *ctx)
>> +{
>> + AV1ParseContext *s = ctx->priv_data;
>> + int ret;
>> +
>> + ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, NULL);
>
> Can we forge a logging context here? Having NULL is not very nice if it does
> find errors.
>
> (Or overwrite it with the passed AVCodecContext only during the parse call?)
So a s->cbc.log_ctx = avctx in av1_parser_parse()?
>
>> + if (ret < 0)
>> + return ret;
>> +
>> + s->cbc->decompose_unit_types = (CodedBitstreamUnitType
>> *)decompose_unit_types;
>> + s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
>> +
>> + return 0;
>> +}
>> +
>> +static void av1_parser_close(AVCodecParserContext *ctx)
>> +{
>> + AV1ParseContext *s = ctx->priv_data;
>> +
>> + ff_cbs_close(&s->cbc);
>> +}
>> +
>> +static int av1_parser_split(AVCodecContext *avctx,
>> + const uint8_t *buf, int buf_size)
>> +{
>> + AV1OBU obu;
>> + const uint8_t *ptr = buf, *end = buf + buf_size;
>> +
>> + while (ptr < end) {
>> + int len = ff_av1_extract_obu(&obu, ptr, buf_size, avctx);
>> + if (len < 0)
>> + break;
>> +
>> + if (obu.type == AV1_OBU_FRAME_HEADER ||
>> + obu.type == AV1_OBU_FRAME) {
>> + return ptr - buf;
>> + }
>> + ptr += len;
>> + buf_size -= len;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +AVCodecParser ff_av1_parser = {
>> + .codec_ids = { AV_CODEC_ID_AV1 },
>> + .priv_data_size = sizeof(AV1ParseContext),
>> + .parser_init = av1_parser_init,
>> + .parser_close = av1_parser_close,
>> + .parser_parse = av1_parser_parse,
>> + .split = av1_parser_split,
>> +};
>> diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
>> index cb86cceecc..f01cad4c84 100644
>> --- a/libavcodec/parsers.c
>> +++ b/libavcodec/parsers.c
>> @@ -26,6 +26,7 @@ extern AVCodecParser ff_aac_parser;
>> extern AVCodecParser ff_aac_latm_parser;
>> extern AVCodecParser ff_ac3_parser;
>> extern AVCodecParser ff_adx_parser;
>> +extern AVCodecParser ff_av1_parser;
>> extern AVCodecParser ff_avs2_parser;
>> extern AVCodecParser ff_bmp_parser;
>> extern AVCodecParser ff_cavsvideo_parser;
>>
>
> Looks good!
>
> Thanks,
>
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> [email protected]
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
_______________________________________________
ffmpeg-devel mailing list
[email protected]
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel