From 90cbabbc70fbd74831ab9d106ac4cdfb035a00b5 Mon Sep 17 00:00:00 2001
From: Martin Vignali <martin.vignali@gmail.com>
Date: Sat, 21 Oct 2017 21:42:44 +0200
Subject: [PATCH 1/3] libavcodec/lossless_videodsp : add add_bytes avx2 version

---
 libavcodec/lossless_videodsp.h          | 2 +-
 libavcodec/x86/lossless_videodsp.asm    | 2 ++
 libavcodec/x86/lossless_videodsp_init.c | 4 ++++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/libavcodec/lossless_videodsp.h b/libavcodec/lossless_videodsp.h
index cecf0fe1e5..ccab39bac6 100644
--- a/libavcodec/lossless_videodsp.h
+++ b/libavcodec/lossless_videodsp.h
@@ -29,7 +29,7 @@
 #include "libavutil/cpu.h"
 
 typedef struct LLVidDSPContext {
-    void (*add_bytes)(uint8_t *dst /* align 16 */, uint8_t *src /* align 16 */,
+    void (*add_bytes)(uint8_t *dst /* align 32 */, uint8_t *src /* align 32 */,
                       ptrdiff_t w);
     void (*add_median_pred)(uint8_t *dst, const uint8_t *top,
                             const uint8_t *diff, ptrdiff_t w,
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index 443fe02951..0237f9f242 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -216,6 +216,8 @@ ADD_BYTES
 %endif
 INIT_XMM sse2
 ADD_BYTES
+INIT_YMM avx2
+ADD_BYTES
 
 %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
     add     wd, wd
diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c
index 21bbd12bd2..4f20c1ce92 100644
--- a/libavcodec/x86/lossless_videodsp_init.c
+++ b/libavcodec/x86/lossless_videodsp_init.c
@@ -25,6 +25,7 @@
 
 void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w);
 void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
+void ff_add_bytes_avx2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
 
 void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
                                const uint8_t *diff, ptrdiff_t w,
@@ -115,4 +116,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
     if (EXTERNAL_SSE4(cpu_flags)) {
         c->add_left_pred_int16 = ff_add_left_pred_int16_sse4;
     }
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        c->add_bytes       = ff_add_bytes_avx2;
+    }
 }
-- 
2.11.0 (Apple Git-81)

