On 2017/6/14 7:41, Mark Thompson wrote:
> On 07/06/17 01:53, Jun Zhao wrote:
>> From 5c88956e36e7318cf1d1b7c41a9d4108fcf9d0a5 Mon Sep 17 00:00:00 2001
>> From: Jun Zhao <[email protected]>
>> Date: Fri, 12 May 2017 08:30:43 +0800
>> Subject: [PATCH] lavc/vaapi_encode_h26[45]: respect "slices" in h26[45] vaapi
>> encoder.
>>
>> Enable multi-slice support in AVC/HEVC vaapi encoder.
>>
>> Signed-off-by: Wang, Yi A <[email protected]>
>> Signed-off-by: Jun Zhao <[email protected]>
>> ---
>
> I think this should be three patches:
>
>> libavcodec/vaapi_encode.c | 36 ++++++++++++++++++++++++++++++++----
>> libavcodec/vaapi_encode.h | 9 +++++++--
>
> (1) Change the slice/parameter buffers to be allocated dynamically.
>
>> libavcodec/vaapi_encode_h264.c | 24 ++++++++++++++++++------
>
> (2) Respect slices option in H.264 encoder.
>
>> libavcodec/vaapi_encode_h265.c | 28 ++++++++++++++++++++++------
>
> (3) Respect slices option in H.265 encoder.
>
>
> I'm not entirely sure that the first one is worth it - do you have a use-case
> for very large numbers of slices? Given that VAAPI lacks the ability to
> limit slices by size (for H.323 / RTP packet limits), the other use I can
> think of is for parallelism or related conformance requirements, both of
> which tend to want relatively small numbers.
>
For the first one, I don't have a user case for large number of slices, but I
think dynamic allocate is a general solution if the driver/GPU HW change the
max slices number support in GEN10, GEN11, ...
And I will split the patch as the comments.
>
>> 4 files changed, 79 insertions(+), 18 deletions(-)
>>
>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>> index 7e9c00f51d..14a3fba7b1 100644
>> --- a/libavcodec/vaapi_encode.c
>> +++ b/libavcodec/vaapi_encode.c
>> @@ -36,13 +36,18 @@ static int
>> vaapi_encode_make_packed_header(AVCodecContext *avctx,
>> VAAPIEncodeContext *ctx = avctx->priv_data;
>> VAStatus vas;
>> VABufferID param_buffer, data_buffer;
>> + VABufferID *tmp;
>> VAEncPackedHeaderParameterBuffer params = {
>> .type = type,
>> .bit_length = bit_len,
>> .has_emulation_bytes = 1,
>> };
>>
>> - av_assert0(pic->nb_param_buffers + 2 <= MAX_PARAM_BUFFERS);
>> + tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp),
>> (pic->nb_param_buffers + 2));
>> + if (!tmp) {
>> + return AVERROR(ENOMEM);
>
> This failure case leaks the already-allocated buffers. Unfortunately you
> can't use av_realloc_array().
>
>> + }
>> + pic->param_buffers = tmp;
>>
>> vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>> VAEncPackedHeaderParameterBufferType,
>> @@ -77,9 +82,14 @@ static int vaapi_encode_make_param_buffer(AVCodecContext
>> *avctx,
>> {
>> VAAPIEncodeContext *ctx = avctx->priv_data;
>> VAStatus vas;
>> + VABufferID *tmp;
>> VABufferID buffer;
>>
>> - av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
>> + tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp),
>> (pic->nb_param_buffers + 1));
>> + if (!tmp) {
>> + return AVERROR(ENOMEM);
>
> Same.
>
>> + }
>> + pic->param_buffers = tmp;
>>
>> vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>> type, len, 1, data, &buffer);
>> @@ -122,6 +132,8 @@ static int vaapi_encode_wait(AVCodecContext *avctx,
>> // Input is definitely finished with now.
>> av_frame_free(&pic->input_image);
>>
>> + av_freep(&pic->param_buffers);
>> +
>> pic->encode_complete = 1;
>> return 0;
>> }
>> @@ -313,7 +325,10 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>> }
>> }
>>
>> - av_assert0(pic->nb_slices <= MAX_PICTURE_SLICES);
>> + pic->slices = (VAAPIEncodeSlice **)av_malloc(sizeof(VAAPIEncodeSlice *)
>> * pic->nb_slices);
>> + if (pic->slices == NULL)
>> + goto fail;
>> +
>> for (i = 0; i < pic->nb_slices; i++) {
>> slice = av_mallocz(sizeof(*slice));
>> if (!slice) {
>> @@ -322,7 +337,6 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>> }
>> slice->index = i;
>> pic->slices[i] = slice;
>> -
>
> Stray change?
Will clean
>
>> if (ctx->codec->slice_params_size > 0) {
>> slice->codec_slice_params =
>> av_mallocz(ctx->codec->slice_params_size);
>> if (!slice->codec_slice_params) {
>> @@ -427,6 +441,8 @@ fail:
>> vaDestroyBuffer(ctx->hwctx->display, pic->param_buffers[i]);
>> fail_at_end:
>> av_freep(&pic->codec_picture_params);
>> + av_freep(&pic->param_buffers);
>> + av_freep(&pic->slices);
>> av_frame_free(&pic->recon_image);
>> return err;
>> }
>> @@ -542,6 +558,8 @@ static int vaapi_encode_free(AVCodecContext *avctx,
>> av_frame_free(&pic->input_image);
>> av_frame_free(&pic->recon_image);
>>
>> + av_freep(&pic->param_buffers);
>> + av_freep(&pic->slices);
>> // Output buffer should already be destroyed.
>> av_assert0(pic->output_buffer == VA_INVALID_ID);
>>
>> @@ -949,6 +967,7 @@ static av_cold int
>> vaapi_encode_config_attributes(AVCodecContext *avctx)
>> { VAConfigAttribRTFormat },
>> { VAConfigAttribRateControl },
>> { VAConfigAttribEncMaxRefFrames },
>> + { VAConfigAttribEncMaxSlices },
>> { VAConfigAttribEncPackedHeaders },
>> };
>>
>> @@ -1079,6 +1098,15 @@ static av_cold int
>> vaapi_encode_config_attributes(AVCodecContext *avctx)
>> }
>> }
>> break;
>> + case VAConfigAttribEncMaxSlices:
>> + if (avctx->slices > attr[i].value) {
>> + av_log(avctx, AV_LOG_ERROR, "Slices per frame more than %#x
>> "
>> + "is not supported.\n", attr[i].value);
>> + err = AVERROR(EINVAL);
>> + goto fail;
>> + }
>> + ctx->multi_slices_available = 1;
>> + break;
>
> This can also be VA_ATTRIB_NOT_SUPPORTED, which presumably implies one slice
> per frame only? Unfortunately it is always that with the i965 driver for
> MPEG-2 despite the need for slices, so I'm not sure what you actually want to
> do there.
>
>> case VAConfigAttribEncPackedHeaders:
>> if (ctx->va_packed_headers & ~attr[i].value) {
>> // This isn't fatal, but packed headers are always
>> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
>> index 0edf27e4cb..4afe4fa103 100644
>> --- a/libavcodec/vaapi_encode.h
>> +++ b/libavcodec/vaapi_encode.h
>> @@ -73,7 +73,7 @@ typedef struct VAAPIEncodePicture {
>> VASurfaceID recon_surface;
>>
>> int nb_param_buffers;
>> - VABufferID param_buffers[MAX_PARAM_BUFFERS];
>> + VABufferID *param_buffers;
>>
>> AVBufferRef *output_buffer_ref;
>> VABufferID output_buffer;
>> @@ -85,7 +85,10 @@ typedef struct VAAPIEncodePicture {
>> struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
>>
>> int nb_slices;
>> - VAAPIEncodeSlice *slices[MAX_PICTURE_SLICES];
>> + VAAPIEncodeSlice **slices;
>> + int slice_of_mbs;
>> + int slice_mod_mbs;
>> + int last_mb_index;
>
> Macroblocks are not a generic concept - H.265 does not have them. I think
> I'd prefer this calculated stuff to go in the private data of the individual
> codes rather than in the generic header (the generic code never touches it,
> after all).
Will use a private struct for this
>
>> } VAAPIEncodePicture;
>>
>> typedef struct VAAPIEncodeContext {
>> @@ -105,6 +108,8 @@ typedef struct VAAPIEncodeContext {
>> // Supported packed headers (initially the desired set, modified
>> // later to what is actually supported).
>> unsigned int va_packed_headers;
>> + // Supported multi-slices per frame
>> + int multi_slices_available;
>>
>> // The required size of surfaces. This is probably the input
>> // size (AVCodecContext.width|height) aligned up to whatever
>> diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
>> index 92e29554ed..f325346433 100644
>> --- a/libavcodec/vaapi_encode_h264.c
>> +++ b/libavcodec/vaapi_encode_h264.c
>> @@ -1002,7 +1002,16 @@ static int
>> vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
>> vpic->pic_fields.bits.idr_pic_flag = (pic->type == PICTURE_TYPE_IDR);
>> vpic->pic_fields.bits.reference_pic_flag = (pic->type !=
>> PICTURE_TYPE_B);
>>
>> - pic->nb_slices = 1;
>> + if (ctx->multi_slices_available)
>> + avctx->slices = av_clip(avctx->slices, 1, priv->mb_height);
>> + else
>> + avctx->slices = 1;
>
> If the user requests slices they probably have some clear reason for doing so
> (e.g. to work with some old device which only accepts 1080p in four slices).
> Do we really want to silently ignore that if we can't actually do it?
Will give a warning message when driver can't support multi-slices but user
setting the "slices" > 1.
>
>> +
>> + pic->nb_slices = avctx->slices;
>> +
>> + pic->slice_of_mbs = (priv->mb_width * priv->mb_height) / pic->nb_slices;
>> + pic->slice_mod_mbs = (priv->mb_width * priv->mb_height) %
>> pic->nb_slices;
>> + pic->last_mb_index = 0;
>>
>> return 0;
>> }
>> @@ -1052,15 +1061,18 @@ static int
>> vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
>> av_assert0(0 && "invalid picture type");
>> }
>>
>> - // Only one slice per frame.
>> - vslice->macroblock_address = 0;
>> - vslice->num_macroblocks = priv->mb_width * priv->mb_height;
>> + vslice->macroblock_address = pic->last_mb_index;
>> + vslice->num_macroblocks = pic->slice_of_mbs + (pic->slice_mod_mbs > 0 ?
>> 1 : 0);
>
> I think it would be cuter if rounded properly rather than top-loading the
> rounding error.
>
> That is, for N in [0, slices) use:
> macroblock_address = N * total_macroblocks / slices;
> num_macroblocks = (N + 1) * total_macroblocks / slices -
> macroblock_address;
>
> That makes it cut in half nicely on, for example, four slices in a 7x6 image,
> rather than having a stray macroblock over the centre line.
>
> (That is: 1111111 vs. 1111111
> 1111222 1112222
> 2222222 2222222
> 2333333 3333333
> 3333444 3334444
> 4444444 4444444 ).
>
Will double-check this part
>> + if (pic->slice_mod_mbs > 0)
>> + pic->slice_mod_mbs --;
>> + pic->last_mb_index += vslice->num_macroblocks;
>>
>> vslice->macroblock_info = VA_INVALID_ID;
>>
>> vslice->pic_parameter_set_id = vpic->pic_parameter_set_id;
>> - vslice->idr_pic_id = priv->idr_pic_count++;
>> -
>> + vslice->idr_pic_id = priv->idr_pic_count;
>> + if (pic->last_mb_index == priv->mb_width * priv->mb_height)
>> + priv->idr_pic_count ++;
>> vslice->pic_order_cnt_lsb = (pic->display_order - priv->last_idr_frame)
>> &
>> ((1 << (4 +
>> vseq->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4)) - 1);
>>
>> diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
>> index 6e008b7b9c..e930026184 100644
>> --- a/libavcodec/vaapi_encode_h265.c
>> +++ b/libavcodec/vaapi_encode_h265.c
>> @@ -1025,7 +1025,15 @@ static int
>> vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
>> av_assert0(0 && "invalid picture type");
>> }
>>
>> - pic->nb_slices = 1;
>> + if (ctx->multi_slices_available)
>> + avctx->slices = av_clip(avctx->slices, 1, priv->ctu_height);
>> + else
>> + avctx->slices = 1;
>> +
>> + pic->nb_slices = avctx->slices;
>> + pic->slice_of_mbs = (priv->ctu_width * priv->ctu_height) /
>> pic->nb_slices;
>> + pic->slice_mod_mbs = (priv->ctu_width * priv->ctu_height) %
>> pic->nb_slices;
>> + pic->last_mb_index = 0;
>
> They are CTUs, don't call them macroblocks.
>
In split patch, will change this.
>>
>> return 0;
>> }
>> @@ -1048,9 +1056,13 @@ static int
>> vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
>> pslice = slice->priv_data;
>> mslice = &pslice->misc_slice_params;
>>
>> - // Currently we only support one slice per frame.
>> - vslice->slice_segment_address = 0;
>> - vslice->num_ctu_in_slice = priv->ctu_width * priv->ctu_height;
>> + vslice->slice_segment_address = pic->last_mb_index;
>> + mslice->slice_segment_address = pic->last_mb_index;
>
> Duplication, oops. Just remove the one in VAAPIEncodeH265MiscSliceParams, I
> think?
Yes, I think we need to remove the slice_segment_address in
VAAPIEncodeH265MiscSliceParams
>
>> + vslice->num_ctu_in_slice = pic->slice_of_mbs + (pic->slice_mod_mbs > 0
>> ? 1 : 0);
>
> Same point on rounding as above.
>
>> +
>> + if (pic->slice_mod_mbs > 0)
>> + pic->slice_mod_mbs --;
>> + pic->last_mb_index += vslice->num_ctu_in_slice;
>>
>> switch (pic->type) {
>> case PICTURE_TYPE_IDR:
>> @@ -1104,9 +1116,13 @@ static int
>> vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
>> else
>> vslice->slice_qp_delta = priv->fixed_qp_idr - vpic->pic_init_qp;
>>
>> - vslice->slice_fields.bits.last_slice_of_pic_flag = 1;
>> + if (pic->last_mb_index == priv->ctu_width * priv->ctu_height)
>> + vslice->slice_fields.bits.last_slice_of_pic_flag = 1;
>>
>> - mslice->first_slice_segment_in_pic_flag = 1;
>> + if (vslice->slice_segment_address == 0)
>> + mslice->first_slice_segment_in_pic_flag = 1;
>> + else
>> + mslice->first_slice_segment_in_pic_flag = 0;
>>
>> if (pic->type == PICTURE_TYPE_IDR) {
>> // No reference pictures.
>> --
>> 2.11.0
>>
> _______________________________________________
> ffmpeg-devel mailing list
> [email protected]
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
_______________________________________________
ffmpeg-devel mailing list
[email protected]
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel