From c5cd90174f2f260870f994858b65bba561454035 Mon Sep 17 00:00:00 2001
From: Roman Arzumanyan <rarzumanyan@nvidia.com>
Date: Fri, 2 Nov 2018 15:26:20 +0300
Subject: [PATCH] Adding NV12 support to transpose_npp filter

---
 libavfilter/vf_transpose_npp.c | 128 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 113 insertions(+), 15 deletions(-)

diff --git a/libavfilter/vf_transpose_npp.c b/libavfilter/vf_transpose_npp.c
index 1b3a5c0c69..7c9cbba354 100644
--- a/libavfilter/vf_transpose_npp.c
+++ b/libavfilter/vf_transpose_npp.c
@@ -34,12 +34,15 @@
 
 static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_NV12,
     AV_PIX_FMT_YUV444P
 };
 
 enum TransposeStage {
+    STAGE_DEINTERLEAVE,
     STAGE_ROTATE,
     STAGE_TRANSPOSE,
+    STAGE_INTERLEAVE,
     STAGE_NB
 };
 
@@ -182,13 +185,31 @@ static int format_is_supported(enum AVPixelFormat fmt)
     return 0;
 }
 
+static const enum AVPixelFormat deinterleaved_formats[][2] = {
+    { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P },
+};
+
+static enum AVPixelFormat get_deinterleaved_format(enum AVPixelFormat fmt)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+    int i, planes;
+    planes = av_pix_fmt_count_planes(fmt);
+    if (planes == desc->nb_components)
+        return fmt;
+    for (i = 0; i < FF_ARRAY_ELEMS(deinterleaved_formats); i++)
+        if (deinterleaved_formats[i][0] == fmt)
+            return deinterleaved_formats[i][1];
+    return AV_PIX_FMT_NONE;
+}
+
 static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
                                  int out_width, int out_height)
 {
     NPPTransposeContext *s = ctx->priv;
     AVHWFramesContext *in_frames_ctx;
-    enum AVPixelFormat format;
+    enum AVPixelFormat format, in_dformat, out_dformat;
     int i, ret, last_stage = -1;
+    int deinterleave_needed = 0;
     int rot_width = out_width, rot_height = out_height;
 
     /* check that we have a hw context */
@@ -199,13 +220,27 @@ static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_heig
 
     in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
     format        = in_frames_ctx->sw_format;
+    in_dformat    = get_deinterleaved_format(format);
+    out_dformat   = get_deinterleaved_format(format);
 
     if (!format_is_supported(format)) {
         av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
                av_get_pix_fmt_name(format));
         return AVERROR(ENOSYS);
+    }  
+
+    if (!format_is_supported(in_dformat)) {
+        av_log(ctx, AV_LOG_ERROR, "Unsupported deinterleaving format: %s\n",
+               av_get_pix_fmt_name(in_dformat));
+        return AVERROR(ENOSYS);
     }
 
+    if (!format_is_supported(out_dformat)) {
+        av_log(ctx, AV_LOG_ERROR, "Unsupported deinterleaving format: %s\n",
+               av_get_pix_fmt_name(out_dformat));
+        return AVERROR(ENOSYS);
+    }    
+
     if (s->dir != NPP_TRANSPOSE_CCLOCK_FLIP) {
         s->stages[STAGE_ROTATE].stage_needed = 1;
     }
@@ -217,19 +252,35 @@ static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_heig
         rot_width = in_width;
         rot_height = in_height;
     }
-
-    s->stages[STAGE_ROTATE].in_fmt               = format;
-    s->stages[STAGE_ROTATE].out_fmt              = format;
-    s->stages[STAGE_ROTATE].planes_in[0].width   = in_width;
-    s->stages[STAGE_ROTATE].planes_in[0].height  = in_height;
-    s->stages[STAGE_ROTATE].planes_out[0].width  = rot_width;
-    s->stages[STAGE_ROTATE].planes_out[0].height = rot_height;
-    s->stages[STAGE_TRANSPOSE].in_fmt               = format;
-    s->stages[STAGE_TRANSPOSE].out_fmt              = format;
-    s->stages[STAGE_TRANSPOSE].planes_in[0].width   = rot_width;
-    s->stages[STAGE_TRANSPOSE].planes_in[0].height  = rot_height;
-    s->stages[STAGE_TRANSPOSE].planes_out[0].width  = out_width;
-    s->stages[STAGE_TRANSPOSE].planes_out[0].height = out_height;
+    
+    deinterleave_needed = (s->stages[STAGE_ROTATE].stage_needed || 
+        s->stages[STAGE_TRANSPOSE].stage_needed) && (format != in_dformat);
+    
+    s->stages[STAGE_DEINTERLEAVE].stage_needed          = deinterleave_needed;
+    s->stages[STAGE_DEINTERLEAVE].in_fmt                = format;
+    s->stages[STAGE_DEINTERLEAVE].out_fmt               = in_dformat;
+    s->stages[STAGE_DEINTERLEAVE].planes_in[0].width    = in_width;
+    s->stages[STAGE_DEINTERLEAVE].planes_in[0].height   = in_height;
+
+    s->stages[STAGE_ROTATE].in_fmt                      = in_dformat;
+    s->stages[STAGE_ROTATE].out_fmt                     = out_dformat;
+    s->stages[STAGE_ROTATE].planes_in[0].width          = in_width;
+    s->stages[STAGE_ROTATE].planes_in[0].height         = in_height;
+    s->stages[STAGE_ROTATE].planes_out[0].width         = rot_width;
+    s->stages[STAGE_ROTATE].planes_out[0].height        = rot_height;
+
+    s->stages[STAGE_TRANSPOSE].in_fmt                   = in_dformat;
+    s->stages[STAGE_TRANSPOSE].out_fmt                  = out_dformat;
+    s->stages[STAGE_TRANSPOSE].planes_in[0].width       = rot_width;
+    s->stages[STAGE_TRANSPOSE].planes_in[0].height      = rot_height;
+    s->stages[STAGE_TRANSPOSE].planes_out[0].width      = out_width;
+    s->stages[STAGE_TRANSPOSE].planes_out[0].height     = out_height;
+
+    s->stages[STAGE_INTERLEAVE].stage_needed            = deinterleave_needed;
+    s->stages[STAGE_INTERLEAVE].in_fmt                  = out_dformat;
+    s->stages[STAGE_INTERLEAVE].out_fmt                 = format;
+    s->stages[STAGE_INTERLEAVE].planes_in[0].width      = out_width;
+    s->stages[STAGE_INTERLEAVE].planes_in[0].height     = out_height;
 
     /* init the hardware contexts */
     for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
@@ -292,6 +343,51 @@ static int npptranspose_config_props(AVFilterLink *outlink)
     return 0;
 }
 
+static int npptranspose_deinterleave(AVFilterContext *ctx, NPPTransposeStageContext *stage,
+                                 AVFrame *out, AVFrame *in)
+{
+    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
+    NppStatus err;
+    switch (in_frames_ctx->sw_format) {
+    case AV_PIX_FMT_NV12:
+        err = nppiYCbCr420_8u_P2P3R(in->data[0], in->linesize[0],
+                                    in->data[1], in->linesize[1],
+                                    out->data, out->linesize,
+                                    (NppiSize){ in->width, in->height });
+        break;
+    default:
+        return AVERROR_BUG;
+    }
+    if (err != NPP_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err);
+        return AVERROR_UNKNOWN;
+    }
+    return 0;
+}
+
+static int npptranspose_interleave(AVFilterContext *ctx, NPPTransposeStageContext *stage,
+                               AVFrame *out, AVFrame *in)
+{
+    AVHWFramesContext *out_frames_ctx = (AVHWFramesContext*)out->hw_frames_ctx->data;
+    NppStatus err;
+    switch (out_frames_ctx->sw_format) {
+    case AV_PIX_FMT_NV12:
+        err = nppiYCbCr420_8u_P3P2R((const uint8_t**)in->data,
+                                    in->linesize,
+                                    out->data[0], out->linesize[0],
+                                    out->data[1], out->linesize[1],
+                                    (NppiSize){ in->width, in->height });
+        break;
+    default:
+        return AVERROR_BUG;
+    }
+    if (err != NPP_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err);
+        return AVERROR_UNKNOWN;
+    }
+    return 0;
+}
+
 static int npptranspose_rotate(AVFilterContext *ctx, NPPTransposeStageContext *stage,
                                AVFrame *out, AVFrame *in)
 {
@@ -350,8 +446,10 @@ static int npptranspose_transpose(AVFilterContext *ctx, NPPTransposeStageContext
 
 static int (*const npptranspose_process[])(AVFilterContext *ctx, NPPTransposeStageContext *stage,
                                            AVFrame *out, AVFrame *in) = {
+    [STAGE_DEINTERLEAVE] = npptranspose_deinterleave,
     [STAGE_ROTATE]       = npptranspose_rotate,
-    [STAGE_TRANSPOSE]    = npptranspose_transpose
+    [STAGE_TRANSPOSE]    = npptranspose_transpose,
+    [STAGE_INTERLEAVE]   = npptranspose_interleave
 };
 
 static int npptranspose_filter(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
-- 
2.15.1.windows.2

