On little-endian PPC, vec_ld + VEC_SPLAT16 selects the wrong coefficient
lane, so the chroma MC taps use incorrect weights and produce visible
artifacts.

Load the 16-bit coefficients as scalars and use vec_splats() so the
same value is broadcast regardless of endianness.

Signed-off-by: jfiusdq <[email protected]>
---
 libavcodec/ppc/h264chroma_template.c | 40 +++++++++++++---------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/libavcodec/ppc/h264chroma_template.c 
b/libavcodec/ppc/h264chroma_template.c
index c64856bb14..25c446a048 100644
--- a/libavcodec/ppc/h264chroma_template.c
+++ b/libavcodec/ppc/h264chroma_template.c
@@ -113,19 +113,17 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, 
const uint8_t * src,
                                            ptrdiff_t stride, int h,
                                            int x, int y)
 {
-    DECLARE_ALIGNED(16, signed int, ABCD)[4] =
-                        {((8 - x) * (8 - y)),
-                         ((    x) * (8 - y)),
-                         ((8 - x) * (    y)),
-                         ((    x) * (    y))};
+    const int a = (8 - x) * (8 - y);
+    const int b = x * (8 - y);
+    const int c = (8 - x) * y;
+    const int d = x * y;
     register int i;
     vec_u8 fperm;
     LOAD_ZERO;
-    const vec_s32 vABCD = vec_ld(0, ABCD);
-    const vec_s16 vA = VEC_SPLAT16(vABCD, 1);
-    const vec_s16 vB = VEC_SPLAT16(vABCD, 3);
-    const vec_s16 vC = VEC_SPLAT16(vABCD, 5);
-    const vec_s16 vD = VEC_SPLAT16(vABCD, 7);
+    const vec_s16 vA = vec_splats((signed short)a);
+    const vec_s16 vB = vec_splats((signed short)b);
+    const vec_s16 vC = vec_splats((signed short)c);
+    const vec_s16 vD = vec_splats((signed short)d);
     const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
     const vec_u16 v6us = vec_splat_u16(6);
 
@@ -159,14 +157,14 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, 
const uint8_t * src,
     vsrc0ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc0uc);
     vsrc1ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc1uc);
 
-    if (ABCD[3]) {
+    if (d) {
         for (i = 0 ; i < h ; i++) {
             GET_VSRC(vsrc2uc, vsrc3uc, stride, 16, vsrcperm0, vsrcperm1, src);
             CHROMA_MC8_ALTIVEC_CORE(v32ss, noop);
         }
     } else {
         const vec_s16 vE = vec_add(vB, vC);
-        if (ABCD[2]) { // x == 0 B == 0
+        if (c) { // x == 0 B == 0
             for (i = 0 ; i < h ; i++) {
                 GET_VSRC1(vsrc1uc, stride, 15, vsrcperm0, src);
                 CHROMA_MC8_ALTIVEC_CORE_SIMPLE;
@@ -188,19 +186,17 @@ static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t 
*dst, const uint8_t *sr
                                                  ptrdiff_t stride, int h,
                                                  int x, int y)
 {
-   DECLARE_ALIGNED(16, signed int, ABCD)[4] =
-                        {((8 - x) * (8 - y)),
-                         ((    x) * (8 - y)),
-                         ((8 - x) * (    y)),
-                         ((    x) * (    y))};
+    const int a = (8 - x) * (8 - y);
+    const int b = x * (8 - y);
+    const int c = (8 - x) * y;
+    const int d = x * y;
     register int i;
     vec_u8 fperm;
     LOAD_ZERO;
-    const vec_s32 vABCD = vec_ld(0, ABCD);
-    const vec_s16 vA = VEC_SPLAT16(vABCD, 1);
-    const vec_s16 vB = VEC_SPLAT16(vABCD, 3);
-    const vec_s16 vC = VEC_SPLAT16(vABCD, 5);
-    const vec_s16 vD = VEC_SPLAT16(vABCD, 7);
+    const vec_s16 vA = vec_splats((signed short)a);
+    const vec_s16 vB = vec_splats((signed short)b);
+    const vec_s16 vC = vec_splats((signed short)c);
+    const vec_s16 vD = vec_splats((signed short)d);
     const vec_s16 v28ss = 
vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
     const vec_u16 v6us  = vec_splat_u16(6);
 
-- 
2.47.3


_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to