From 5fcc87093ce1f2aea3a43b52399b09fdf8acda36 Mon Sep 17 00:00:00 2001
From: Yingming Fan <yingmingfan@gmail.com>
Date: Thu, 23 Apr 2015 23:19:05 +0800
Subject: [PATCH] Add simple P frame support for ffv1.

---
 libavcodec/ffv1.c    | 10 ++++++
 libavcodec/ffv1.h    |  1 +
 libavcodec/ffv1dec.c | 54 ++++++++++++++++++++++++--------
 libavcodec/ffv1enc.c | 87 ++++++++++++++++++++++++++++++++++++++++++----------
 4 files changed, 124 insertions(+), 28 deletions(-)

diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index 7a38bf9..7fb0a0f 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -143,12 +143,21 @@ av_cold int ffv1_init_slice_contexts(FFV1Context *f)
             av_freep(&f->slice_context[i]);
             goto memfail;
         }
+
+        fs->ref_sample_buffer = av_malloc_array((fs->width + 6), 3 * MAX_PLANES *
+                                      sizeof(*fs->ref_sample_buffer));
+        if (!fs->ref_sample_buffer) {
+            av_freep(&f->slice_context[i]->sample_buffer);
+            av_freep(&f->slice_context[i]);
+            goto memfail;
+        }
     }
     return 0;
 
 memfail:
     while(--i >= 0) {
         av_freep(&f->slice_context[i]->sample_buffer);
+        av_freep(&f->slice_context[i]->ref_sample_buffer);
         av_freep(&f->slice_context[i]);
     }
     return AVERROR(ENOMEM);
@@ -219,6 +228,7 @@ av_cold int ffv1_close(AVCodecContext *avctx)
             av_freep(&p->vlc_state);
         }
         av_freep(&fs->sample_buffer);
+        av_freep(&fs->ref_sample_buffer);
     }
 
     av_freep(&avctx->stats_out);
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index bfc4d71..f5eeae6 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -103,6 +103,7 @@ typedef struct FFV1Context {
     int run_index;
     int colorspace;
     int16_t *sample_buffer;
+    int16_t *ref_sample_buffer;
 
     int ec;
     int intra;
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index fda3f09..1874b5b 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -95,7 +95,7 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState *const state,
 }
 
 static av_always_inline void decode_line(FFV1Context *s, int w,
-                                         int16_t *sample[2],
+                                         int16_t *sample[2], int16_t *ref_sample[2],
                                          int plane_index, int bits)
 {
     PlaneContext *const p = &s->plane[plane_index];
@@ -172,22 +172,31 @@ static av_always_inline void decode_line(FFV1Context *s, int w,
         if (sign)
             diff = -diff;
 
-        sample[1][x] = av_mod_uintp2(predict(sample[1] + x, sample[0] + x) + diff, bits);
+        if (s->cur->pict_type == AV_PICTURE_TYPE_I)
+            sample[1][x] = av_mod_uintp2(predict(sample[1] + x, sample[0] + x) + diff, bits);
+        else
+            sample[1][x] = av_mod_uintp2(diff + ref_sample[0][x], bits);
     }
     s->run_index = run_index;
 }
 
-static void decode_plane(FFV1Context *s, uint8_t *src,
+static void decode_plane(FFV1Context *s, uint8_t *src, uint8_t *ref,
                          int w, int h, int stride, int plane_index)
 {
     int x, y;
     int16_t *sample[2];
+    int16_t *ref_sample[2];
+
     sample[0] = s->sample_buffer + 3;
     sample[1] = s->sample_buffer + w + 6 + 3;
 
+    ref_sample[0] = s->ref_sample_buffer + 3;
+    ref_sample[1] = s->ref_sample_buffer + w + 6 + 3;
+
     s->run_index = 0;
 
     memset(s->sample_buffer, 0, 2 * (w + 6) * sizeof(*s->sample_buffer));
+    memset(s->ref_sample_buffer, 0, 2 * (w + 6) * sizeof(*s->ref_sample_buffer));
 
     for (y = 0; y < h; y++) {
         int16_t *temp = sample[0]; // FIXME: try a normal buffer
@@ -200,11 +209,16 @@ static void decode_plane(FFV1Context *s, uint8_t *src,
 
 // { START_TIMER
         if (s->avctx->bits_per_raw_sample <= 8) {
-            decode_line(s, w, sample, plane_index, 8);
+            if (ref) {
+                for (x = 0; x < w; x++) {
+                    ref_sample[0][x] = ref[x + stride * y]; 
+                }
+            }
+            decode_line(s, w, sample, ref_sample, plane_index, 8);
             for (x = 0; x < w; x++)
                 src[x + stride * y] = sample[1][x];
         } else {
-            decode_line(s, w, sample, plane_index, s->avctx->bits_per_raw_sample);
+            decode_line(s, w, sample, ref_sample, plane_index, s->avctx->bits_per_raw_sample);
             if (s->packed_at_lsb) {
                 for (x = 0; x < w; x++) {
                     ((uint16_t*)(src + stride*y))[x] = sample[1][x];
@@ -246,9 +260,9 @@ static void decode_rgb_frame(FFV1Context *s, uint8_t *src[3], int w, int h, int
             sample[p][1][-1]= sample[p][0][0  ];
             sample[p][0][ w]= sample[p][0][w-1];
             if (lbd && s->slice_coding_mode == 0)
-                decode_line(s, w, sample[p], (p + 1)/2, 9);
+                decode_line(s, w, sample[p], NULL, (p + 1)/2, 9);
             else
-                decode_line(s, w, sample[p], (p + 1)/2, bits + (s->slice_coding_mode != 1));
+                decode_line(s, w, sample[p], NULL, (p + 1)/2, bits + (s->slice_coding_mode != 1));
         }
         for (x = 0; x < w; x++) {
             int g = sample[0][1][x];
@@ -361,6 +375,8 @@ static int decode_slice(AVCodecContext *c, void *arg)
     int width, height, x, y, ret;
     const int ps      = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1;
     AVFrame * const p = f->cur;
+    AVFrame * const last_picture = f->last_picture.f;
+    uint8_t *ref_data[] = {NULL,NULL,NULL,NULL};
     int i, si;
 
     for( si=0; fs != f->slice_context[si]; si ++)
@@ -434,14 +450,22 @@ static int decode_slice(AVCodecContext *c, void *arg)
         const int chroma_height = FF_CEIL_RSHIFT(height, f->chroma_v_shift);
         const int cx            = x >> f->chroma_h_shift;
         const int cy            = y >> f->chroma_v_shift;
-        decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0);
+
+        if (p->pict_type == AV_PICTURE_TYPE_P) {
+            ref_data[0] = last_picture->data[0] + ps*x + y*p->linesize[0];
+            ref_data[1] = last_picture->data[1] + ps*cx+cy*p->linesize[1];
+            ref_data[2] = last_picture->data[2] + ps*cx+cy*p->linesize[2];
+            ref_data[3] = last_picture->data[3] + ps*x + y*p->linesize[3];
+        }
+
+        decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], ref_data[0], width, height, p->linesize[0], 0);
 
         if (f->chroma_planes) {
-            decode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
-            decode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1);
+            decode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], ref_data[1], chroma_width, chroma_height, p->linesize[1], 1);
+            decode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], ref_data[2], chroma_width, chroma_height, p->linesize[2], 1);
         }
         if (fs->transparency)
-            decode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], 2);
+            decode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], ref_data[3], width, height, p->linesize[3], 2);
     } else {
         uint8_t *planes[3] = { p->data[0] + ps * x + y * p->linesize[0],
                                p->data[1] + ps * x + y * p->linesize[1],
@@ -879,13 +903,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     ff_init_range_decoder(c, buf, buf_size);
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
 
-    p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P
+    if (f->version < 4)
+        p->pict_type = AV_PICTURE_TYPE_I;
+
     if (get_rac(c, &keystate)) {
         p->key_frame    = 1;
         f->key_frame_ok = 0;
         if ((ret = read_header(f)) < 0)
             return ret;
         f->key_frame_ok = 1;
+        if (f->version >= 4)
+            p->pict_type = AV_PICTURE_TYPE_I;
     } else {
         if (!f->key_frame_ok) {
             av_log(avctx, AV_LOG_ERROR,
@@ -893,6 +921,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
             return AVERROR_INVALIDDATA;
         }
         p->key_frame = 0;
+        if (f->version >= 4)
+            p->pict_type = AV_PICTURE_TYPE_P;
     }
 
     if ((ret = ff_thread_get_buffer(avctx, &f->picture, AV_GET_BUFFER_FLAG_REF)) < 0)
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 5b0ade4..f806dc5 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -264,7 +264,7 @@ static inline void put_vlc_symbol(PutBitContext *pb, VlcState *const state,
 }
 
 static av_always_inline int encode_line(FFV1Context *s, int w,
-                                        int16_t *sample[3],
+                                        int16_t *sample[3], int16_t *ref_sample[3],
                                         int plane_index, int bits)
 {
     PlaneContext *const p = &s->plane[plane_index];
@@ -302,7 +302,11 @@ static av_always_inline int encode_line(FFV1Context *s, int w,
         int diff, context;
 
         context = get_context(p, sample[0] + x, sample[1] + x, sample[2] + x);
-        diff    = sample[0][x] - predict(sample[0] + x, sample[1] + x);
+
+        if (ref_sample[0])
+            diff    = sample[0][x] - ref_sample[0][x];
+        else
+            diff    = sample[0][x] - predict(sample[0] + x, sample[1] + x);
 
         if (context < 0) {
             context = -context;
@@ -365,27 +369,37 @@ static av_always_inline int encode_line(FFV1Context *s, int w,
     return 0;
 }
 
-static int encode_plane(FFV1Context *s, uint8_t *src, int w, int h,
+static int encode_plane(FFV1Context *s, uint8_t *src, uint8_t *ref, int w, int h,
                          int stride, int plane_index)
 {
     int x, y, i, ret;
     const int ring_size = s->avctx->context_model ? 3 : 2;
     int16_t *sample[3];
+    int16_t *ref_sample[3];
     s->run_index = 0;
 
     memset(s->sample_buffer, 0, ring_size * (w + 6) * sizeof(*s->sample_buffer));
+    memset(s->ref_sample_buffer, 0, ring_size * (w + 6) * sizeof(*s->ref_sample_buffer));
 
     for (y = 0; y < h; y++) {
-        for (i = 0; i < ring_size; i++)
+        for (i = 0; i < ring_size; i++) {
             sample[i] = s->sample_buffer + (w + 6) * ((h + i - y) % ring_size) + 3;
+            ref_sample[i] = s->ref_sample_buffer + (w + 6) * ((h + i - y) % ring_size) + 3;
+        }
 
         sample[0][-1]= sample[1][0  ];
         sample[1][ w]= sample[1][w-1];
 // { START_TIMER
         if (s->bits_per_raw_sample <= 8) {
-            for (x = 0; x < w; x++)
+            for (x = 0; x < w; x++) {
+                if (ref)
+                    ref_sample[0][x] = ref[x + stride * y];
+                
                 sample[0][x] = src[x + stride * y];
-            if((ret = encode_line(s, w, sample, plane_index, 8)) < 0)
+            }
+            if (ref == NULL)//FIXME: try better way to let encode_line know we are encoding P frame.
+                ref_sample[0] = NULL;
+            if((ret = encode_line(s, w, sample, ref_sample, plane_index, 8)) < 0)
                 return ret;
         } else {
             if (s->packed_at_lsb) {
@@ -397,7 +411,7 @@ static int encode_plane(FFV1Context *s, uint8_t *src, int w, int h,
                     sample[0][x] = ((uint16_t*)(src + stride*y))[x] >> (16 - s->bits_per_raw_sample);
                 }
             }
-            if((ret = encode_line(s, w, sample, plane_index, s->bits_per_raw_sample)) < 0)
+            if((ret = encode_line(s, w, sample, ref_sample, plane_index, s->bits_per_raw_sample)) < 0)
                 return ret;
         }
 // STOP_TIMER("encode line") }
@@ -457,9 +471,9 @@ static int encode_rgb_frame(FFV1Context *s, const uint8_t *src[3],
             sample[p][0][-1] = sample[p][1][0  ];
             sample[p][1][ w] = sample[p][1][w-1];
             if (lbd && s->slice_coding_mode == 0)
-                ret = encode_line(s, w, sample[p], (p + 1) / 2, 9);
+                ret = encode_line(s, w, sample[p], NULL, (p + 1) / 2, 9);
             else
-                ret = encode_line(s, w, sample[p], (p + 1) / 2, bits + (s->slice_coding_mode != 1));
+                ret = encode_line(s, w, sample[p], NULL, (p + 1) / 2, bits + (s->slice_coding_mode != 1));
             if (ret < 0)
                 return ret;
         }
@@ -689,11 +703,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
         s->ec = (s->version >= 3);
     }
 
-    if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+    if ((s->version == 2 || s->version>4) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
         av_log(avctx, AV_LOG_ERROR, "Version 2 needed for requested features but version 2 is experimental and not enabled\n");
         return AVERROR_INVALIDDATA;
     }
 
+    av_log(avctx, AV_LOG_DEBUG, "FFv1 version is %d\n", s->version);
+
     s->ac = avctx->coder_type > 0 ? 2 : 0;
 
     s->plane_count = 3;
@@ -793,6 +809,17 @@ static av_cold int encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "format not supported\n");
         return AVERROR(ENOSYS);
     }
+    //FIXME: support these
+    if (s->version == 4) {
+        if (s->bits_per_raw_sample > 8) {
+            av_log(avctx, AV_LOG_ERROR, "Version 4 only support 8 bit currently\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (s->colorspace != 0) {
+            av_log(avctx, AV_LOG_ERROR, "Version 4 only support yuv colorspace currently\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
     if (s->transparency) {
         av_log(avctx, AV_LOG_WARNING, "Storing alpha plane, this will require a recent FFV1 decoder to playback!\n");
     }
@@ -1112,12 +1139,14 @@ static int encode_slice(AVCodecContext *c, void *arg)
     int x            = fs->slice_x;
     int y            = fs->slice_y;
     const AVFrame *const p = f->picture.f;
+    const AVFrame *const last_picture = f->last_picture.f;
     const int ps     = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1;
     int ret;
     RangeCoder c_bak = fs->c;
     const uint8_t *planes[3] = {p->data[0] + ps*x + y*p->linesize[0],
                                 p->data[1] + ps*x + y*p->linesize[1],
                                 p->data[2] + ps*x + y*p->linesize[2]};
+    uint8_t *ref_data[] = {NULL,NULL,NULL,NULL};
 
     fs->slice_coding_mode = 0;
     if (f->version > 3) {
@@ -1148,14 +1177,21 @@ retry:
         const int cx            = x >> f->chroma_h_shift;
         const int cy            = y >> f->chroma_v_shift;
 
-        ret = encode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0);
+        if (c->coded_frame->pict_type == AV_PICTURE_TYPE_P) {
+            ref_data[0] = last_picture->data[0] + ps*x + y*p->linesize[0];
+            ref_data[1] = last_picture->data[1] + ps*cx+cy*p->linesize[1];
+            ref_data[2] = last_picture->data[2] + ps*cx+cy*p->linesize[2];
+            ref_data[3] = last_picture->data[3] + ps*x + y*p->linesize[3];
+        }
+
+        ret = encode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], ref_data[0], width, height, p->linesize[0], 0);
 
         if (f->chroma_planes) {
-            ret |= encode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
-            ret |= encode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1);
+            ret |= encode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], ref_data[1], chroma_width, chroma_height, p->linesize[1], 1);
+            ret |= encode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], ref_data[2], chroma_width, chroma_height, p->linesize[2], 1);
         }
         if (fs->transparency)
-            ret |= encode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], 2);
+            ret |= encode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], ref_data[3], width, height, p->linesize[3], 2);
     } else {
         ret = encode_rgb_frame(fs, planes, width, height, p->linesize);
     }
@@ -1181,7 +1217,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 {
     FFV1Context *f      = avctx->priv_data;
     RangeCoder *const c = &f->slice_context[0]->c;
-    AVFrame *const p    = f->picture.f;
+    AVFrame *p          = f->picture.f;
     int used_count      = 0;
     uint8_t keystate    = 128;
     uint8_t *buf_p;
@@ -1243,19 +1279,34 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     ff_init_range_encoder(c, pkt->data, pkt->size);
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
 
+    if (f->last_picture.f)
+        ff_thread_release_buffer(avctx, &f->last_picture);
+    FFSWAP(ThreadFrame, f->picture, f->last_picture);
+
+    f->cur = p = f->picture.f;
+    
+    if ((ret = ff_thread_get_buffer(avctx, &f->picture, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+
     av_frame_unref(p);
     if ((ret = av_frame_ref(p, pict)) < 0)
         return ret;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
+    if (f->version < 4)
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 
     if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
         put_rac(c, &keystate, 1);
         avctx->coded_frame->key_frame = 1;
         f->gob_count++;
         write_header(f);
+        if (f->version >= 4)
+            avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     } else {
         put_rac(c, &keystate, 0);
         avctx->coded_frame->key_frame = 0;
+        if (f->version >= 4)
+            avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
     }
 
     if (f->ac > 1) {
@@ -1308,6 +1359,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     if (avctx->flags & CODEC_FLAG_PASS1)
         avctx->stats_out[0] = '\0';
 
+    if (f->last_picture.f)
+        ff_thread_release_buffer(avctx, &f->last_picture);
+    f->cur = NULL;
+
     f->picture_number++;
     pkt->size   = buf_p - pkt->data;
     pkt->pts    =
-- 
2.3.2 (Apple Git-55)