From cd75407fa0e0a6e418a1f15a05785580f3d6309b Mon Sep 17 00:00:00 2001
From: Thomas Mundt <tmundt75@gmail.com>
Date: Wed, 30 Aug 2017 03:37:18 +0200
Subject: [PATCH] avfilter/interlace: prevent over-sharpening with the complex
 low-pass filter

The complex vertical low-pass filter slightly over-sharpens the picture. This becomes visible when several transcodings are cascaded and the error potentises, e.g. some generations of HD->SD SD->HD.
To prevent this behaviour the destination pixel must not exceed the source pixel when the average of the pixels above and below is less than the source pixel. And the other way around.

Signed-off-by: Thomas Mundt <tmundt75@gmail.com>
---
 libavfilter/vf_interlace.c              | 17 +++++++---
 libavfilter/vf_tinterlace.c             | 17 +++++++---
 libavfilter/x86/vf_interlace.asm        | 55 ++++++++++++++++++++-------------
 tests/ref/fate/filter-interlace-complex | 50 +++++++++++++++---------------
 4 files changed, 84 insertions(+), 55 deletions(-)

diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
index d72cb43..1a92780 100644
--- a/libavfilter/vf_interlace.c
+++ b/libavfilter/vf_interlace.c
@@ -83,14 +83,23 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
     const uint8_t *srcp_below = srcp + pref;
     const uint8_t *srcp_above2 = srcp + mref * 2;
     const uint8_t *srcp_below2 = srcp + pref * 2;
-    int i;
+    int i, srcp_x, srcp_ab;
     for (i = 0; i < linesize; i++) {
         // this calculation is an integer representation of
         // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
         // '4 +' is for rounding.
-        dstp[i] = av_clip_uint8((4 + (srcp[i] << 2)
-                  + ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1)
-                  - srcp_above2[i] - srcp_below2[i]) >> 3);
+        srcp_x = srcp[i] << 1;
+        srcp_ab = srcp_above[i] + srcp_below[i];
+        dstp[i] = av_clip_uint8((4 + ((srcp[i] + srcp_x + srcp_ab) << 1)
+                                - srcp_above2[i] - srcp_below2[i]) >> 3);
+        // Prevent over-sharpening:
+        // dst must not exceed src when the average of above and below
+        // is less than src. And the other way around.
+        if (srcp_ab > srcp_x) {
+            if (dstp[i] < srcp[i])
+                dstp[i] = srcp[i];
+        } else if (dstp[i] > srcp[i])
+            dstp[i] = srcp[i];
     }
 }
 
diff --git a/libavfilter/vf_tinterlace.c b/libavfilter/vf_tinterlace.c
index 6599707..81d2d77 100644
--- a/libavfilter/vf_tinterlace.c
+++ b/libavfilter/vf_tinterlace.c
@@ -110,14 +110,23 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t
     const uint8_t *srcp_below = srcp + pref;
     const uint8_t *srcp_above2 = srcp + mref * 2;
     const uint8_t *srcp_below2 = srcp + pref * 2;
-    int i;
+    int i, srcp_x, srcp_ab;
     for (i = 0; i < width; i++) {
         // this calculation is an integer representation of
         // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
         // '4 +' is for rounding.
-        dstp[i] = av_clip_uint8((4 + (srcp[i] << 2)
-                  + ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1)
-                  - srcp_above2[i] - srcp_below2[i]) >> 3);
+        srcp_x = srcp[i] << 1;
+        srcp_ab = srcp_above[i] + srcp_below[i];
+        dstp[i] = av_clip_uint8((4 + ((srcp[i] + srcp_x + srcp_ab) << 1)
+                                - srcp_above2[i] - srcp_below2[i]) >> 3);
+        // Prevent over-sharpening:
+        // dst must not exceed src when the average of above and below
+        // is less than src. And the other way around.
+        if (srcp_ab > srcp_x) {
+            if (dstp[i] < srcp[i])
+                dstp[i] = srcp[i];
+        } else if (dstp[i] > srcp[i])
+            dstp[i] = srcp[i];
     }
 }
 
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index c601fd7..d0fffd2 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -63,41 +63,46 @@ REP_RET
 %endmacro
 
 %macro LOWPASS_LINE_COMPLEX 0
-cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref
-    pxor m6, m6
+cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
+    pxor m7, m7
 .loop:
     mova m0, [srcq+mrefq]
     mova m2, [srcq+prefq]
     mova m1, m0
     mova m3, m2
-    punpcklbw m0, m6
-    punpcklbw m2, m6
-    punpckhbw m1, m6
-    punpckhbw m3, m6
+    punpcklbw m0, m7
+    punpcklbw m2, m7
+    punpckhbw m1, m7
+    punpckhbw m3, m7
     paddw m0, m2
     paddw m1, m3
+    mova m6, m0
+    mova m5, m1
+    mova m2, [srcq]
+    mova m3, m2
+    punpcklbw m2, m7
+    punpckhbw m3, m7
+    paddw m0, m2
+    paddw m1, m3
+    psllw m2, 1
+    psllw m3, 1
+    paddw m0, m2
+    paddw m1, m3
+    psllw m0, 1
+    psllw m1, 1
+    pcmpgtw m6, m2
+    pcmpgtw m5, m3
+    packsswb m6, m5
     mova m2, [srcq+mrefq*2]
     mova m4, [srcq+prefq*2]
     mova m3, m2
     mova m5, m4
-    punpcklbw m2, m6
-    punpcklbw m4, m6
-    punpckhbw m3, m6
-    punpckhbw m5, m6
+    punpcklbw m2, m7
+    punpcklbw m4, m7
+    punpckhbw m3, m7
+    punpckhbw m5, m7
     paddw m2, m4
     paddw m3, m5
-    mova m4, [srcq]
-    mova m5, m4
-    punpcklbw m4, m6
-    punpckhbw m5, m6
-    paddw m0, m4
-    paddw m1, m5
-    psllw m0, 1
-    psllw m1, 1
-    psllw m4, 2
-    psllw m5, 2
-    paddw m0, m4
-    paddw m1, m5
     paddw m0, [pw_4]
     paddw m1, [pw_4]
     psubusw m0, m2
@@ -105,6 +110,12 @@ cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref
     psrlw m0, 3
     psrlw m1, 3
     packuswb m0, m1
+    mova m1, m0
+    pmaxub m0, [srcq]
+    pminub m1, [srcq]
+    pand m0, m6
+    pandn m6, m1
+    por m0, m6
     mova [dstq], m0
 
     add dstq, mmsize
diff --git a/tests/ref/fate/filter-interlace-complex b/tests/ref/fate/filter-interlace-complex
index 3b78125..e8db46a 100644
--- a/tests/ref/fate/filter-interlace-complex
+++ b/tests/ref/fate/filter-interlace-complex
@@ -3,28 +3,28 @@
 #codec_id 0: rawvideo
 #dimensions 0: 352x288
 #sar 0: 0/1
-0,          0,          0,        1,   152064, 0x91290ae6
-0,          1,          1,        1,   152064, 0x24f34baf
-0,          2,          2,        1,   152064, 0x799fc436
-0,          3,          3,        1,   152064, 0xfe42c0a9
-0,          4,          4,        1,   152064, 0xb496f879
-0,          5,          5,        1,   152064, 0xc43b36c9
-0,          6,          6,        1,   152064, 0xb65abbf4
-0,          7,          7,        1,   152064, 0xd1806312
-0,          8,          8,        1,   152064, 0x0faf56c1
-0,          9,          9,        1,   152064, 0x4de73b75
-0,         10,         10,        1,   152064, 0xf90f24ce
-0,         11,         11,        1,   152064, 0xc1efd6e0
-0,         12,         12,        1,   152064, 0xeb8e5894
-0,         13,         13,        1,   152064, 0xe8aacabc
-0,         14,         14,        1,   152064, 0x8bd2163c
-0,         15,         15,        1,   152064, 0xbfc72ac2
-0,         16,         16,        1,   152064, 0x1e8f6f56
-0,         17,         17,        1,   152064, 0xe3d19450
-0,         18,         18,        1,   152064, 0x3872af32
-0,         19,         19,        1,   152064, 0xf23be72a
-0,         20,         20,        1,   152064, 0x024f8f2b
-0,         21,         21,        1,   152064, 0xb49301ea
-0,         22,         22,        1,   152064, 0x84f5d056
-0,         23,         23,        1,   152064, 0xd2c09ca5
-0,         24,         24,        1,   152064, 0xe9b5ddfd
+0,          0,          0,        1,   152064, 0x778ab0c1
+0,          1,          1,        1,   152064, 0xdc30f7c3
+0,          2,          2,        1,   152064, 0xcb637467
+0,          3,          3,        1,   152064, 0xcbf778ce
+0,          4,          4,        1,   152064, 0x573d9f6d
+0,          5,          5,        1,   152064, 0xd794df2c
+0,          6,          6,        1,   152064, 0x3e885448
+0,          7,          7,        1,   152064, 0xccec1794
+0,          8,          8,        1,   152064, 0x6f32f51a
+0,          9,          9,        1,   152064, 0x0657f5ac
+0,         10,         10,        1,   152064, 0xfa82d600
+0,         11,         11,        1,   152064, 0x15ff7f32
+0,         12,         12,        1,   152064, 0x1cac0342
+0,         13,         13,        1,   152064, 0x6afb7c49
+0,         14,         14,        1,   152064, 0x6c47d554
+0,         15,         15,        1,   152064, 0xe0fbd132
+0,         16,         16,        1,   152064, 0x4f891e8d
+0,         17,         17,        1,   152064, 0x88554045
+0,         18,         18,        1,   152064, 0x0c8e6192
+0,         19,         19,        1,   152064, 0xf73c91c3
+0,         20,         20,        1,   152064, 0x49ac328d
+0,         21,         21,        1,   152064, 0xf18ebd82
+0,         22,         22,        1,   152064, 0x3359760d
+0,         23,         23,        1,   152064, 0x5c85601a
+0,         24,         24,        1,   152064, 0x28c1657b
-- 
2.7.4.windows.1

