From f490153ba22f97dfbba5317ff9524b92a91164c7 Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Sun, 28 Jul 2019 22:27:34 +0200
Subject: [PATCH 2/3] avcodec/dsddec: add slice threading support

---
 libavcodec/dsd.c    |  6 +++---
 libavcodec/dsd.h    |  2 +-
 libavcodec/dsddec.c | 50 ++++++++++++++++++++++++++++++---------------
 libavcodec/dstdec.c |  7 ++++---
 4 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/libavcodec/dsd.c b/libavcodec/dsd.c
index 9104f38476..0cbcc0492f 100644
--- a/libavcodec/dsd.c
+++ b/libavcodec/dsd.c
@@ -57,7 +57,7 @@ void ff_dsd2pcm_translate(DSDContext* s, size_t samples, int lsbf,
                           float *dst, ptrdiff_t dst_stride)
 {
     unsigned pos, i;
-    unsigned char* p;
+    unsigned *p;
     double sum;
 
     pos = s->pos;
@@ -71,8 +71,8 @@ void ff_dsd2pcm_translate(DSDContext* s, size_t samples, int lsbf,
 
         sum = 0.0;
         for (i = 0; i < CTABLES; i++) {
-            unsigned char a = s->buf[(pos                   - i) & FIFOMASK];
-            unsigned char b = s->buf[(pos - (CTABLES*2 - 1) + i) & FIFOMASK];
+            unsigned a = s->buf[(pos                   - i) & FIFOMASK];
+            unsigned b = s->buf[(pos - (CTABLES*2 - 1) + i) & FIFOMASK];
             sum += ctables[i][a] + ctables[i][b];
         }
 
diff --git a/libavcodec/dsd.h b/libavcodec/dsd.h
index 5ca4574837..3d38438526 100644
--- a/libavcodec/dsd.h
+++ b/libavcodec/dsd.h
@@ -40,7 +40,7 @@
  * Per-channel buffer
  */
 typedef struct DSDContext {
-    unsigned char buf[FIFOSIZE];
+    DECLARE_ALIGNED(32, unsigned, buf)[FIFOSIZE];
     unsigned pos;
 } DSDContext;
 
diff --git a/libavcodec/dsddec.c b/libavcodec/dsddec.c
index 2c5c357acc..16bf1c38dc 100644
--- a/libavcodec/dsddec.c
+++ b/libavcodec/dsddec.c
@@ -53,7 +53,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
     silence = avctx->codec_id == AV_CODEC_ID_DSD_LSBF || avctx->codec_id == AV_CODEC_ID_DSD_LSBF_PLANAR ? ff_reverse[DSD_SILENCE] : DSD_SILENCE;
     for (i = 0; i < avctx->channels; i++) {
         s[i].pos = 0;
-        memset(s[i].buf, silence, sizeof(s[i].buf));
+        for (int j = 0; j < FF_ARRAY_ELEMS(s[i].buf); j++)
+            s[i].buf[j] = silence;
     }
 
     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
@@ -61,17 +62,20 @@ static av_cold int decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static int decode_frame(AVCodecContext *avctx, void *data,
-                        int *got_frame_ptr, AVPacket *avpkt)
+typedef struct ThreadData {
+    AVFrame *frame;
+    AVPacket *avpkt;
+} ThreadData;
+
+static int dsd_channel(AVCodecContext *avctx, void *tdata, int j, int threadnr)
 {
-    DSDContext * s = avctx->priv_data;
-    AVFrame *frame = data;
-    int ret, i;
     int lsbf = avctx->codec_id == AV_CODEC_ID_DSD_LSBF || avctx->codec_id == AV_CODEC_ID_DSD_LSBF_PLANAR;
-    int src_next;
-    int src_stride;
-
-    frame->nb_samples = avpkt->size / avctx->channels;
+    DSDContext *s = avctx->priv_data;
+    ThreadData *td = tdata;
+    AVFrame *frame = td->frame;
+    AVPacket *avpkt = td->avpkt;
+    int src_next, src_stride;
+    float *dst = ((float **)frame->extended_data)[j];
 
     if (avctx->codec_id == AV_CODEC_ID_DSD_LSBF_PLANAR || avctx->codec_id == AV_CODEC_ID_DSD_MSBF_PLANAR) {
         src_next   = frame->nb_samples;
@@ -81,15 +85,28 @@ static int decode_frame(AVCodecContext *avctx, void *data,
         src_stride = avctx->channels;
     }
 
+    ff_dsd2pcm_translate(&s[j], frame->nb_samples, lsbf,
+        avpkt->data + j * src_next, src_stride,
+        dst, 1);
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *got_frame_ptr, AVPacket *avpkt)
+{
+    AVFrame *frame = data;
+    ThreadData td;
+    int ret;
+
+    frame->nb_samples = avpkt->size / avctx->channels;
+
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
 
-    for (i = 0; i < avctx->channels; i++) {
-        float * dst = ((float **)frame->extended_data)[i];
-        ff_dsd2pcm_translate(&s[i], frame->nb_samples, lsbf,
-            avpkt->data + i * src_next, src_stride,
-            dst, 1);
-    }
+    td.frame = frame;
+    td.avpkt = avpkt;
+    avctx->execute2(avctx, dsd_channel, &td, NULL, avctx->channels);
 
     *got_frame_ptr = 1;
     return frame->nb_samples * avctx->channels;
@@ -103,6 +120,7 @@ AVCodec ff_##name_##_decoder = { \
     .id           = AV_CODEC_ID_##id_, \
     .init         = decode_init, \
     .decode       = decode_frame, \
+    .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS, \
     .sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP, \
                                                    AV_SAMPLE_FMT_NONE }, \
 };
diff --git a/libavcodec/dstdec.c b/libavcodec/dstdec.c
index 0614c99c4b..1525165d09 100644
--- a/libavcodec/dstdec.c
+++ b/libavcodec/dstdec.c
@@ -78,7 +78,6 @@ typedef struct DSTContext {
 static av_cold int decode_init(AVCodecContext *avctx)
 {
     DSTContext *s = avctx->priv_data;
-    int i;
 
     if (avctx->channels > DST_MAX_CHANNELS) {
         avpriv_request_sample(avctx, "Channel count %d", avctx->channels);
@@ -87,8 +86,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
 
-    for (i = 0; i < avctx->channels; i++)
-        memset(s->dsdctx[i].buf, 0x69, sizeof(s->dsdctx[i].buf));
+    for (int i = 0; i < avctx->channels; i++) {
+        for (int j = 0; j < FF_ARRAY_ELEMS(s->dsdctx[i].buf); j++)
+            s->dsdctx[i].buf[j] = 0x69;
+    }
 
     ff_init_dsd_data();
 
-- 
2.22.0