Re: [FFmpeg-devel] [PATCH, v2] lavc/qsvdec: Add GPU-accelerated memory copy support

Li, Zhong Sat, 28 Sep 2019 20:57:53 -0700

> From: ffmpeg-devel <[email protected]> On Behalf Of Linjie Fu
> Sent: Friday, September 27, 2019 1:47 PM
> To: [email protected]
> Cc: ChaoX A Liu <[email protected]>; Fu, Linjie <[email protected]>
> Subject: [FFmpeg-devel] [PATCH, v2] lavc/qsvdec: Add GPU-accelerated memory
> copy support
> 
> GPU copy enables or disables GPU accelerated copying between video and
> system memory. This may lead to a notable performance improvement.
> Memory must be sequent and aligned with 128x64.
> (first introduced in FFmpeg 3.3.1)


This line should be removed. FFmpeg 3.3.1 mainline never support GPU copy.

> 
> CMD:
> ffmpeg -init_hw_device qsv=hw -filter_hw_device hw -c:v h264_qsv
>                     -gpu_copy on -i input.h264 -f null -
> or:
> ffmpeg -c:v h264_qsv -gpu_copy on -i input.h264 -f null -
> 
> Signed-off-by: Linjie Fu <[email protected]>
> Signed-off-by: ChaoX A Liu <[email protected]>
> ---
> Rebased and send again.
> 
>  libavcodec/qsv.c          | 31 +++++++++++++++++-------
>  libavcodec/qsv_internal.h |  7 +++---
>  libavcodec/qsvdec.c       | 50 ++++++++++++++++++++++++++++++++++-----
>  libavcodec/qsvdec.h       |  2 ++
>  libavcodec/qsvdec_h2645.c | 10 ++++++++  libavcodec/qsvdec_other.c |  5 ++++
>  libavcodec/qsvenc.c       |  8 ++++---
>  7 files changed, 92 insertions(+), 21 deletions(-)
> 
> diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c index 994c9ebcb0..9e66fbc9da
> 100644
> --- a/libavcodec/qsv.c
> +++ b/libavcodec/qsv.c
> @@ -412,15 +412,19 @@ static int ff_qsv_set_display_handle(AVCodecContext
> *avctx, QSVSession *qs)  #endif //AVCODEC_QSV_LINUX_SESSION_HANDLE
> 
>  int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,
> -                                 const char *load_plugins)
> +                                 const char *load_plugins, int
> + gpu_copy)
>  {
> -    mfxIMPL impl   = MFX_IMPL_AUTO_ANY;
> -    mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };
> +    mfxIMPL          impl = MFX_IMPL_AUTO_ANY;
> +    mfxVersion        ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };
> +    mfxInitParam init_par = { MFX_IMPL_AUTO_ANY };
> 
>      const char *desc;
>      int ret;
> 
> -    ret = MFXInit(impl, &ver, &qs->session);
> +    init_par.GPUCopy        = gpu_copy;

GPUCopy field is introduced from API 1.16, would better to check it to avoid 
compile issue with old API.  

> +    init_par.Implementation = impl;
> +    init_par.Version        = ver;
> +    ret = MFXInitEx(init_par, &qs->session);
>      if (ret < 0)
>          return ff_qsv_print_error(avctx, ret,
>                                    "Error initializing an internal MFX 
> session"); @@ -712,7
> +716,8 @@ static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid,
> mfxHDL *hdl)  }
> 
>  int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession,
> -                               AVBufferRef *device_ref, const char 
> *load_plugins)
> +                               AVBufferRef *device_ref, const char 
> *load_plugins,
> +                               int gpu_copy)
>  {
>      static const mfxHandleType handle_types[] = {
>          MFX_HANDLE_VA_DISPLAY,
> @@ -722,11 +727,12 @@ int ff_qsv_init_session_device(AVCodecContext
> *avctx, mfxSession *psession,
>      AVHWDeviceContext    *device_ctx = (AVHWDeviceContext*)device_ref-
> >data;
>      AVQSVDeviceContext *device_hwctx = device_ctx->hwctx;
>      mfxSession        parent_session = device_hwctx->session;
> +    mfxInitParam            init_par = { MFX_IMPL_AUTO_ANY };
> +    mfxHDL                    handle = NULL;
> 
>      mfxSession    session;
>      mfxVersion    ver;
>      mfxIMPL       impl;
> -    mfxHDL        handle = NULL;
>      mfxHandleType handle_type;
>      mfxStatus err;
> 
> @@ -752,7 +758,10 @@ int ff_qsv_init_session_device(AVCodecContext *avctx,
> mfxSession *psession,
>                 "from the session\n");
>      }
> 
> -    err = MFXInit(impl, &ver, &session);
> +    init_par.GPUCopy        = gpu_copy;
> +    init_par.Implementation = impl;
> +    init_par.Version        = ver;
> +    err = MFXInitEx(init_par, &session);
>      if (err != MFX_ERR_NONE)
>          return ff_qsv_print_error(avctx, err,
>                                    "Error initializing a child MFX session"); 
> @@ -783,7 +792,7
> @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession
> *psession,
> 
>  int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession,
>                                 QSVFramesContext *qsv_frames_ctx,
> -                               const char *load_plugins, int opaque)
> +                               const char *load_plugins, int opaque,
> + int gpu_copy)
>  {
>      mfxFrameAllocator frame_allocator = {
>          .pthis  = qsv_frames_ctx,
> @@ -802,8 +811,12 @@ int ff_qsv_init_session_frames(AVCodecContext *avctx,
> mfxSession *psession,
> 
>      int ret;
> 
> +    if (gpu_copy == MFX_GPUCOPY_ON)
> +        av_log(avctx, AV_LOG_WARNING, "GPU-accelerated memory copy "
> +                                    "only works in
> + MFX_IOPATTERN_OUT_SYSTEM_MEMORY.\n");

This looks weird：
1.  the waring log will always turn on if gpu_copy is true no matter what 
iopattern.
     So would be better:
    if (gpu_copy == MFX_GPUCOPY_ON && iopattern != system memory)
        print a warning. 

2. It is only added for ff_qsv_init_session_frames(), but looks like should be 
apply for qsv_init_session()

>      ret = ff_qsv_init_session_device(avctx, &session,
> -                                     frames_ctx->device_ref, load_plugins);
> +                                     frames_ctx->device_ref,
> + load_plugins, gpu_copy);
>      if (ret < 0)
>          return ret;
> 
> diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h index
> 8b44a9b6f4..37559270e5 100644
> --- a/libavcodec/qsv_internal.h
> +++ b/libavcodec/qsv_internal.h
> @@ -127,16 +127,17 @@ enum AVPictureType ff_qsv_map_pictype(int
> mfx_pic_type);  enum AVFieldOrder ff_qsv_map_picstruct(int mfx_pic_struct);
> 
>  int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,
> -                                 const char *load_plugins);
> +                                 const char *load_plugins, int
> + gpu_copy);
> 
>  int ff_qsv_close_internal_session(QSVSession *qs);
> 
>  int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession,
> -                               AVBufferRef *device_ref, const char 
> *load_plugins);
> +                               AVBufferRef *device_ref, const char 
> *load_plugins,
> +                               int gpu_copy);
> 
>  int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *session,
>                                 QSVFramesContext *qsv_frames_ctx,
> -                               const char *load_plugins, int opaque);
> +                               const char *load_plugins, int opaque,
> + int gpu_copy);
> 
>  int ff_qsv_find_surface_idx(QSVFramesContext *ctx, QSVFrame *frame);
> 
> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index
> 9299596e33..a947583702 100644
> --- a/libavcodec/qsvdec.c
> +++ b/libavcodec/qsvdec.c
> @@ -34,9 +34,11 @@
>  #include "libavutil/pixdesc.h"
>  #include "libavutil/pixfmt.h"
>  #include "libavutil/time.h"
> +#include "libavutil/imgutils.h"
> 
>  #include "avcodec.h"
>  #include "internal.h"
> +#include "decode.h"
>  #include "qsv.h"
>  #include "qsv_internal.h"
>  #include "qsvdec.h"
> @@ -54,6 +56,31 @@ const AVCodecHWConfigInternal *ff_qsv_hw_configs[] = {
>      NULL
>  };
> 
> +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, AVFrame
> +*frame, AVBufferPool *pool) {
> +    int ret = 0;
> +
> +    ff_decode_frame_props(avctx, frame);
> +
> +    frame->width       = avctx->width;
> +    frame->height      = avctx->height;
> +    frame->linesize[0] = FFALIGN(avctx->width, 128);
> +    frame->linesize[1] = frame->linesize[0];
> +    frame->buf[0]      = av_buffer_pool_get(pool);
> +    if (!frame->buf[0])
> +        return AVERROR(ENOMEM);
> +
> +    frame->data[0] = frame->buf[0]->data;
> +    frame->data[1] = frame->data[0] +
> +                            frame->linesize[0] * FFALIGN(avctx->height,
> + 64);
> +
> +    ret = ff_attach_decode_data(frame);

Could you please explain why need this function? I don't see private_ref is 
needed from qsv decoding.

> +    if (ret < 0)
> +        return ret;
> +
> +    return 0;
> +}
> +
>  static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession
> session,
>                              AVBufferRef *hw_frames_ref, AVBufferRef 
> *hw_device_ref)
> { @@ -74,7 +101,8 @@ static int qsv_init_session(AVCodecContext *avctx,
> QSVContext *q, mfxSession ses
> 
>          ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session,
>                                           &q->frames_ctx, q->load_plugins,
> -                                         q->iopattern ==
> MFX_IOPATTERN_OUT_OPAQUE_MEMORY);
> +                                         q->iopattern ==
> MFX_IOPATTERN_OUT_OPAQUE_MEMORY,
> +                                         q->gpu_copy);
>          if (ret < 0) {
>              av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
>              return ret;
> @@ -88,7 +116,7 @@ static int qsv_init_session(AVCodecContext *avctx,
> QSVContext *q, mfxSession ses
>          }
> 
>          ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session,
> -                                         hw_device_ref, q->load_plugins);
> +                                         hw_device_ref,
> + q->load_plugins, q->gpu_copy);
>          if (ret < 0)
>              return ret;
> 
> @@ -96,7 +124,7 @@ static int qsv_init_session(AVCodecContext *avctx,
> QSVContext *q, mfxSession ses
>      } else {
>          if (!q->internal_qs.session) {
>              ret = ff_qsv_init_internal_session(avctx, &q->internal_qs,
> -                                               q->load_plugins);
> +                                               q->load_plugins,
> + q->gpu_copy);
>              if (ret < 0)
>                  return ret;
>          }
> @@ -229,6 +257,9 @@ static int qsv_decode_init(AVCodecContext *avctx,
> QSVContext *q, mfxVideoParam *
> 
>      q->frame_info = param->mfx.FrameInfo;
> 
> +    if (!avctx->hw_frames_ctx)
> +        q->pool = 
> av_buffer_pool_init(av_image_get_buffer_size(avctx->pix_fmt,
> +                    FFALIGN(avctx->width, 128), FFALIGN(avctx->height,
> + 64), 1), av_buffer_allocz);
>      return 0;
>  }
> 
> @@ -275,9 +306,15 @@ static int alloc_frame(AVCodecContext *avctx,
> QSVContext *q, QSVFrame *frame)  {
>      int ret;
> 
> -    ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF);
> -    if (ret < 0)
> -        return ret;
> +    if (!q->pool) {
> +        ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF);
> +        if (ret < 0)
> +            return ret;
> +    } else {
> +        ret = ff_qsv_get_continuous_buffer(avctx, frame->frame, q->pool);
> +        if (ret < 0)
> +            return ret;
> +    }
> 
>      if (frame->frame->format == AV_PIX_FMT_QSV) {
>          frame->surface = *(mfxFrameSurface1*)frame->frame->data[3];
> @@ -535,6 +572,7 @@ int ff_qsv_decode_close(QSVContext *q)
> 
>      av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
>      av_buffer_unref(&q->frames_ctx.mids_buf);
> +    av_buffer_pool_uninit(&q->pool);
> 
>      return 0;
>  }
> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h index
> 64dc8d2f47..dec1f61ceb 100644
> --- a/libavcodec/qsvdec.h
> +++ b/libavcodec/qsvdec.h
> @@ -59,12 +59,14 @@ typedef struct QSVContext {
>      enum AVPixelFormat orig_pix_fmt;
>      uint32_t fourcc;
>      mfxFrameInfo frame_info;
> +    AVBufferPool *pool;
> 
>      int initialized;
> 
>      // options set by the caller
>      int async_depth;
>      int iopattern;
> +    int gpu_copy;
> 
>      char *load_plugins;
> 
> diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c index
> eb1dc336a4..d7ac00689a 100644
> --- a/libavcodec/qsvdec_h2645.c
> +++ b/libavcodec/qsvdec_h2645.c
> @@ -193,6 +193,11 @@ static const AVOption hevc_options[] = {
> 
>      { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load 
> in an
> internal session",
>          OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, 
> VD },
> +
> +    { "gpu_copy", "A GPU-accelerated memory copy between video and system
> memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 =
> MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD,
> "gpu_copy"},
> +        { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},
> +        { "on",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },
> 0, 0, VD, "gpu_copy"},
> +        { "off",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },
> 0, 0, VD, "gpu_copy"},
>      { NULL },
>  };
> 
> @@ -228,6 +233,11 @@ AVCodec ff_hevc_qsv_decoder = {  #if
> CONFIG_H264_QSV_DECODER  static const AVOption options[] = {
>      { "async_depth", "Internal parallelization depth, the higher the value 
> the higher
> the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 =
> ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
> +
> +    { "gpu_copy", "A GPU-accelerated copy between video and system memory",
> OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT },
> MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"},
> +    { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},
> +    { "on",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },      
> 0,
> 0, VD, "gpu_copy"},
> +    { "off",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },     
> 0,
> 0, VD, "gpu_copy"},
>      { NULL },
>  };
> 
> diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c index
> b044c11540..13b1a99113 100644
> --- a/libavcodec/qsvdec_other.c
> +++ b/libavcodec/qsvdec_other.c
> @@ -181,6 +181,11 @@ static void qsv_decode_flush(AVCodecContext *avctx)
> #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
> static const AVOption options[] = {
>      { "async_depth", "Internal parallelization depth, the higher the value 
> the higher
> the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 =
> ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
> +
> +    { "gpu_copy", "A GPU-accelerated memory copy between video and system
> memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 =
> MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD,
> "gpu_copy"},
> +    { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},
> +    { "on",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },      
> 0,
> 0, VD, "gpu_copy"},
> +    { "off",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },     
> 0,
> 0, VD, "gpu_copy"},
>      { NULL },
>  };
> 
> diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index
> 207cdc1d61..ba85d645ca 100644
> --- a/libavcodec/qsvenc.c
> +++ b/libavcodec/qsvenc.c
> @@ -956,7 +956,8 @@ static int qsvenc_init_session(AVCodecContext *avctx,
> QSVEncContext *q)
> 
>          ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session,
>                                           &q->frames_ctx, q->load_plugins,
> -                                         q->param.IOPattern ==
> MFX_IOPATTERN_IN_OPAQUE_MEMORY);
> +                                         q->param.IOPattern ==
> MFX_IOPATTERN_IN_OPAQUE_MEMORY,
> +                                         MFX_GPUCOPY_OFF);
>          if (ret < 0) {
>              av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
>              return ret;
> @@ -965,14 +966,15 @@ static int qsvenc_init_session(AVCodecContext *avctx,
> QSVEncContext *q)
>          q->session = q->internal_qs.session;
>      } else if (avctx->hw_device_ctx) {
>          ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session,
> -                                         avctx->hw_device_ctx, 
> q->load_plugins);
> +                                         avctx->hw_device_ctx, 
> q->load_plugins,
> +                                         MFX_GPUCOPY_OFF);
>          if (ret < 0)
>              return ret;
> 
>          q->session = q->internal_qs.session;
>      } else {
>          ret = ff_qsv_init_internal_session(avctx, &q->internal_qs,
> -                                           q->load_plugins);
> +                                           q->load_plugins,
> + MFX_GPUCOPY_OFF);
>          if (ret < 0)
>              return ret;
> 
> --
> 2.17.1
_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH, v2] lavc/qsvdec: Add GPU-accelerated memory copy support

Reply via email to