[FFmpeg-devel] [PR] avcodec/avutil: decouple ER SAD from me_cmp and add PPC pixelutils SAD (PR #22368)

jfiusdq via ffmpeg-devel Tue, 03 Mar 2026 12:05:02 -0800

PR #22368 opened by jfiusdq
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22368
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22368.patch


As asked in https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22318

Tested on both `ppc64le` and `ppc64be`, FATE tests don't have new failures.


>From bdcea490ebb7863dbf19f41869c4cf2f5af2e666 Mon Sep 17 00:00:00 2001
From: jfiusdq <[email protected]>
Date: Tue, 3 Mar 2026 18:38:40 +0100
Subject: [PATCH 1/4] avcodec/error_resilience: use pixelutils for SAD

error_resilience only needs a 16x16 SAD and currently gets it through
MECmpContext, including the MPVEncContext-shaped callback type.

Switch ER to av_pixelutils_get_sad_fn(4, 4, 0, ...) and store the
callback as av_pixelutils_sad_fn. Keep a local C 16x16 fallback so
builds without CONFIG_PIXELUTILS still work.

The is_intra_more_likely() updates are signature-only:
- old call shape: sad(NULL, blk1, blk2, stride, 16)
- new call shape: sad(blk1, stride, blk2, stride)

The heuristic itself is unchanged; it still compares
SAD(last, current) against SAD(last, spatially shifted last)
to bias intra/inter decision-making under damage.

This removes the ER dependency on me_cmp.
---
 libavcodec/error_resilience.c | 35 +++++++++++++++++++++++++++--------
 libavcodec/error_resilience.h |  6 ++----
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index 8cf5bc6a3c..b6a6d5605b 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -25,23 +25,39 @@
  * Error resilience / concealment.
  */
 
+#include "config.h"
+
 #include <limits.h>
 
 #include "libavutil/avassert.h"
 #include "libavutil/attributes.h"
 #include "libavutil/mem.h"
+#include "libavutil/pixelutils.h"
 #include "avcodec.h"
 #include "error_resilience.h"
 #include "mathops.h"
-#include "me_cmp.h"
 #include "mpegutils.h"
 #include "mpegvideo.h"
 #include "threadframe.h"
 #include "threadprogress.h"
 
+static int sad_16x16_c(const uint8_t *src1, ptrdiff_t stride1,
+                       const uint8_t *src2, ptrdiff_t stride2)
+{
+    int sum = 0;
+
+    for (int y = 0; y < 16; y++) {
+        for (int x = 0; x < 16; x++)
+            sum += FFABS(src1[x] - src2[x]);
+        src1 += stride1;
+        src2 += stride2;
+    }
+
+    return sum;
+}
+
 av_cold int ff_er_init(ERContext *const s)
 {
-    MECmpContext mecc;
     unsigned mb_array_size = s->mb_height * s->mb_stride;
 
     s->error_status_table = av_mallocz(mb_array_size);
@@ -51,8 +67,11 @@ av_cold int ff_er_init(ERContext *const s)
     if (!s->er_temp_buffer)
         return AVERROR(ENOMEM);
 
-    ff_me_cmp_init(&mecc, s->avctx);
-    s->sad = mecc.sad[0];
+#if CONFIG_PIXELUTILS
+    s->sad = av_pixelutils_get_sad_fn(4, 4, 0, s->avctx);
+#endif
+    if (!s->sad)
+        s->sad = sad_16x16_c;
 
     return 0;
 }
@@ -791,12 +810,12 @@ static int is_intra_more_likely(ERContext *s)
                 } else {
                     ff_thread_progress_await(s->last_pic.progress, mb_y);
                 }
-                is_intra_likely += s->sad(NULL, last_mb_ptr, mb_ptr,
-                                          linesize[0], 16);
+                is_intra_likely += s->sad(last_mb_ptr, linesize[0],
+                                          mb_ptr, linesize[0]);
                 // FIXME need await_progress() here
-                is_intra_likely -= s->sad(NULL, last_mb_ptr,
+                is_intra_likely -= s->sad(last_mb_ptr, linesize[0],
                                           last_mb_ptr + linesize[0] * 16,
-                                          linesize[0], 16);
+                                          linesize[0]);
             } else {
                 if (IS_INTRA(s->cur_pic.mb_type[mb_xy]))
                    is_intra_likely++;
diff --git a/libavcodec/error_resilience.h b/libavcodec/error_resilience.h
index 1beae5a6b0..0dfc805216 100644
--- a/libavcodec/error_resilience.h
+++ b/libavcodec/error_resilience.h
@@ -23,6 +23,7 @@
 #include <stdatomic.h>
 
 #include "avcodec.h"
+#include "libavutil/pixelutils.h"
 
 /// current MB is the first after a resync marker
 #define VP_START               1
@@ -36,8 +37,6 @@
 #define ER_MB_ERROR (ER_AC_ERROR|ER_DC_ERROR|ER_MV_ERROR)
 #define ER_MB_END   (ER_AC_END|ER_DC_END|ER_MV_END)
 
-typedef struct MPVEncContext MPVEncContext;
-
 typedef struct ERPicture {
     AVFrame *f;
     const struct ThreadFrame *tf;
@@ -54,8 +53,7 @@ typedef struct ERPicture {
 typedef struct ERContext {
     AVCodecContext *avctx;
 
-    int (*sad)(MPVEncContext *unused, const uint8_t *blk1,
-               const uint8_t *blk2, ptrdiff_t stride, int h);
+    av_pixelutils_sad_fn sad;
 
     int *mb_index2xy;
     int mb_num;
-- 
2.52.0


>From e07d41ea78bd031cdeb42f26375684f03d6f9ed3 Mon Sep 17 00:00:00 2001
From: jfiusdq <[email protected]>
Date: Tue, 3 Mar 2026 18:38:44 +0100
Subject: [PATCH 2/4] avutil/pixelutils: add PPC AltiVec/VSX SAD init

Add a PPC backend to pixelutils SAD dispatch and hook it into
av_pixelutils_get_sad_fn().

Implement 16x16 SAD with:
- an AltiVec baseline
- a POWER8+ VSX variant selected at runtime

Also add the ppc build wiring.

The load helper uses bounded vector loads via memcpy so unaligned
small-buffer cases stay sanitizer-clean on ppc64le and ppc64be.
---
 libavutil/pixelutils.c     |   4 ++
 libavutil/ppc/Makefile     |   2 +
 libavutil/ppc/pixelutils.c | 128 +++++++++++++++++++++++++++++++++++++
 libavutil/ppc/pixelutils.h |  26 ++++++++
 4 files changed, 160 insertions(+)
 create mode 100644 libavutil/ppc/pixelutils.c
 create mode 100644 libavutil/ppc/pixelutils.h

diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c
index 8e91f0a2cc..cf7b7272c6 100644
--- a/libavutil/pixelutils.c
+++ b/libavutil/pixelutils.c
@@ -29,6 +29,7 @@
 #include "macros.h"
 
 #include "x86/pixelutils.h"
+#include "ppc/pixelutils.h"
 
 static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1,
                                     const uint8_t *src2, ptrdiff_t stride2,
@@ -89,6 +90,9 @@ av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int 
h_bits, int aligne
 #if ARCH_X86 && HAVE_X86ASM
     ff_pixelutils_sad_init_x86(sad, aligned);
 #endif
+#if ARCH_PPC && HAVE_ALTIVEC
+    ff_pixelutils_sad_init_ppc(sad, aligned);
+#endif
 
     return sad[w_bits - 1];
 #endif
diff --git a/libavutil/ppc/Makefile b/libavutil/ppc/Makefile
index a0febf8d52..3188d4f446 100644
--- a/libavutil/ppc/Makefile
+++ b/libavutil/ppc/Makefile
@@ -3,4 +3,6 @@ OBJS += ppc/cpu.o                                               
        \
 
 ALTIVEC-OBJS += ppc/float_dsp_altivec.o                                 \
 
+ALTIVEC-OBJS-$(CONFIG_PIXELUTILS) += ppc/pixelutils.o                  \
+
 VSX-OBJS     += ppc/float_dsp_vsx.o                                     \
diff --git a/libavutil/ppc/pixelutils.c b/libavutil/ppc/pixelutils.c
new file mode 100644
index 0000000000..f104889b7a
--- /dev/null
+++ b/libavutil/ppc/pixelutils.c
@@ -0,0 +1,128 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/util_altivec.h"
+
+#include "pixelutils.h"
+
+#if HAVE_ALTIVEC
+static av_always_inline vector unsigned char load_u8x16(const uint8_t *src)
+{
+    vector unsigned char v;
+    memcpy(&v, src, sizeof(v));
+    return v;
+}
+
+static av_always_inline vector unsigned char sad_diff_u8(vector unsigned char 
a,
+                                                         vector unsigned char 
b)
+{
+    return vec_sub(vec_max(a, b), vec_min(a, b));
+}
+
+static int pixelutils_sad_16x16_altivec(const uint8_t *src1, ptrdiff_t stride1,
+                                        const uint8_t *src2, ptrdiff_t stride2)
+{
+    int i;
+    int __attribute__((aligned(16))) s = 0;
+    const vector unsigned int zero =
+        (const vector unsigned int) vec_splat_u32(0);
+    vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
+    vector signed int sumdiffs;
+
+    for (i = 0; i < 16; i++) {
+        vector unsigned char t1 = load_u8x16(src1);
+        vector unsigned char t2 = load_u8x16(src2);
+
+        sad = vec_sum4s(sad_diff_u8(t1, t2), sad);
+
+        src1 += stride1;
+        src2 += stride2;
+    }
+
+    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+
+    return s;
+}
+
+#if HAVE_VSX
+/*
+ * Tuned for POWER9 pipelines (dual independent accumulators, 2 rows/iter),
+ * while only using VSX/ISA2.07 operations so it remains POWER8 compatible.
+ */
+static int pixelutils_sad_16x16_power8plus_vsx(const uint8_t *src1, ptrdiff_t 
stride1,
+                                               const uint8_t *src2, ptrdiff_t 
stride2)
+{
+    int i;
+    int __attribute__((aligned(16))) s = 0;
+    const vector unsigned int zero =
+        (const vector unsigned int) vec_splat_u32(0);
+    vector unsigned int sad0 = (vector unsigned int) vec_splat_u32(0);
+    vector unsigned int sad1 = (vector unsigned int) vec_splat_u32(0);
+    vector signed int sumdiffs;
+
+    for (i = 0; i < 16; i += 2) {
+        vector unsigned char t10 = load_u8x16(src1);
+        vector unsigned char t20 = load_u8x16(src2);
+        vector unsigned char t11 = load_u8x16(src1 + stride1);
+        vector unsigned char t21 = load_u8x16(src2 + stride2);
+
+        sad0 = vec_sum4s(sad_diff_u8(t10, t20), sad0);
+        sad1 = vec_sum4s(sad_diff_u8(t11, t21), sad1);
+
+        src1 += 2 * stride1;
+        src2 += 2 * stride2;
+    }
+
+    sad0 = vec_add(sad0, sad1);
+    sumdiffs = vec_sums((vector signed int) sad0, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+
+    return s;
+}
+#endif /* HAVE_VSX */
+#endif
+
+av_cold void ff_pixelutils_sad_init_ppc(av_pixelutils_sad_fn *sad, int aligned)
+{
+#if HAVE_ALTIVEC
+    int cpu_flags = av_get_cpu_flags();
+
+    (void)aligned;
+    if (!PPC_ALTIVEC(cpu_flags))
+        return;
+
+    sad[3] = pixelutils_sad_16x16_altivec;
+#if HAVE_VSX
+    if (PPC_POWER8(cpu_flags))
+        sad[3] = pixelutils_sad_16x16_power8plus_vsx;
+#endif
+#else
+    (void)sad;
+    (void)aligned;
+#endif
+}
diff --git a/libavutil/ppc/pixelutils.h b/libavutil/ppc/pixelutils.h
new file mode 100644
index 0000000000..c737a695d7
--- /dev/null
+++ b/libavutil/ppc/pixelutils.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PPC_PIXELUTILS_H
+#define AVUTIL_PPC_PIXELUTILS_H
+
+#include "libavutil/pixelutils.h"
+
+void ff_pixelutils_sad_init_ppc(av_pixelutils_sad_fn *sad, int aligned);
+
+#endif /* AVUTIL_PPC_PIXELUTILS_H */
-- 
2.52.0


>From da0799f5505225782dd729ad9285be92c063a4a4 Mon Sep 17 00:00:00 2001
From: user <user@localhost>
Date: Tue, 3 Mar 2026 20:34:02 +0100
Subject: [PATCH 3/4] avcodec/ppc/hpeldsp_altivec: avoid UBSAN alignment abort
 on LE

---
 libavcodec/ppc/hpeldsp_altivec.c | 37 +++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/libavcodec/ppc/hpeldsp_altivec.c b/libavcodec/ppc/hpeldsp_altivec.c
index d8cf7518d5..110f04d856 100644
--- a/libavcodec/ppc/hpeldsp_altivec.c
+++ b/libavcodec/ppc/hpeldsp_altivec.c
@@ -32,6 +32,19 @@
 #include "hpeldsp_altivec.h"
 
 #if HAVE_ALTIVEC
+
+#if HAVE_BIGENDIAN
+#define vec_ld_unaligned(offset, src) VEC_LD(offset, src)
+#else
+#if AV_HAS_ATTRIBUTE(no_sanitize)
+__attribute__((no_sanitize("alignment")))
+#endif
+static av_always_inline vec_u8 vec_ld_unaligned(int offset, const uint8_t *src)
+{
+    return vec_vsx_ld(offset, src);
+}
+#endif
+
 /* next one assumes that ((line_size % 16) == 0) */
 void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t 
line_size, int h)
 {
@@ -73,7 +86,7 @@ void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t 
*pixels, ptrdiff_t li
     int i;
     for (i = 0; i < h; i++) {
         blockv = vec_ld(0, block);
-        pixelsv = VEC_LD( 0, pixels);
+        pixelsv = vec_ld_unaligned(0, pixels);
         blockv = vec_avg(blockv,pixelsv);
         vec_st(blockv, 0, (unsigned char*)block);
         pixels+=line_size;
@@ -93,7 +106,7 @@ static void avg_pixels8_altivec(uint8_t * block, const 
uint8_t * pixels, ptrdiff
        int rightside = ((unsigned long)block & 0x0000000F);
 
        blockv = vec_ld(0, block);
-       pixelsv = VEC_LD( 0, pixels);
+       pixelsv = vec_ld_unaligned(0, pixels);
 
        if (rightside) {
            pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));
@@ -120,8 +133,8 @@ static void put_pixels8_xy2_altivec(uint8_t *block, const 
uint8_t *pixels, ptrdi
     register const vector unsigned char vczero = (const vector unsigned 
char)vec_splat_u8(0);
     register const vector unsigned short vctwo = (const vector unsigned 
short)vec_splat_u16(2);
 
-    pixelsv1 = VEC_LD(0, pixels);
-    pixelsv2 = VEC_LD(1, pixels);
+    pixelsv1 = vec_ld_unaligned(0, pixels);
+    pixelsv2 = vec_ld_unaligned(1, pixels);
     pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
     pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
 
@@ -168,8 +181,8 @@ static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, 
const uint8_t *pixels
     register const vector unsigned short vcone = (const vector unsigned 
short)vec_splat_u16(1);
     register const vector unsigned short vctwo = (const vector unsigned 
short)vec_splat_u16(2);
 
-    pixelsv1 = VEC_LD(0, pixels);
-    pixelsv2 = VEC_LD(1, pixels);
+    pixelsv1 = vec_ld_unaligned(0, pixels);
+    pixelsv2 = vec_ld_unaligned(1, pixels);
     pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
     pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
     pixelssum1 = vec_add((vector unsigned short)pixelsv1,
@@ -215,8 +228,8 @@ static void put_pixels16_xy2_altivec(uint8_t * block, const 
uint8_t * pixels, pt
     register const vector unsigned char vczero = (const vector unsigned 
char)vec_splat_u8(0);
     register const vector unsigned short vctwo = (const vector unsigned 
short)vec_splat_u16(2);
 
-    pixelsv1 = VEC_LD(0, pixels);
-    pixelsv2 = VEC_LD(1, pixels);
+    pixelsv1 = vec_ld_unaligned(0, pixels);
+    pixelsv2 = vec_ld_unaligned(1, pixels);
     pixelsv3 = VEC_MERGEL(vczero, pixelsv1);
     pixelsv4 = VEC_MERGEL(vczero, pixelsv2);
     pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
@@ -271,8 +284,8 @@ static void put_no_rnd_pixels16_xy2_altivec(uint8_t * 
block, const uint8_t * pix
     register const vector unsigned short vcone = (const vector unsigned 
short)vec_splat_u16(1);
     register const vector unsigned short vctwo = (const vector unsigned 
short)vec_splat_u16(2);
 
-    pixelsv1 = VEC_LD(0, pixels);
-    pixelsv2 = VEC_LD(1, pixels);
+    pixelsv1 = vec_ld_unaligned(0, pixels);
+    pixelsv2 = vec_ld_unaligned(1, pixels);
     pixelsv3 = VEC_MERGEL(vczero, pixelsv1);
     pixelsv4 = VEC_MERGEL(vczero, pixelsv2);
     pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
@@ -326,8 +339,8 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const 
uint8_t *pixels, ptrdi
     register const vector unsigned short vctwo = (const vector unsigned short)
                                         vec_splat_u16(2);
 
-    pixelsv1 = VEC_LD(0, pixels);
-    pixelsv2 = VEC_LD(1, pixels);
+    pixelsv1 = vec_ld_unaligned(0, pixels);
+    pixelsv2 = vec_ld_unaligned(1, pixels);
     pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
     pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
     pixelssum1 = vec_add((vector unsigned short)pixelsv1,
-- 
2.52.0


>From 716b7c6909f81bda7337a9f892aaea17cda80145 Mon Sep 17 00:00:00 2001
From: user <user@localhost>
Date: Tue, 3 Mar 2026 20:36:15 +0100
Subject: [PATCH 4/4] avcodec/ppc/hpeldsp_altivec: use wrapped loads in all
 unaligned paths

---
 libavcodec/ppc/hpeldsp_altivec.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/libavcodec/ppc/hpeldsp_altivec.c b/libavcodec/ppc/hpeldsp_altivec.c
index 110f04d856..7ba43f94e6 100644
--- a/libavcodec/ppc/hpeldsp_altivec.c
+++ b/libavcodec/ppc/hpeldsp_altivec.c
@@ -64,10 +64,10 @@ void ff_put_pixels16_altivec(uint8_t *block, const uint8_t 
*pixels, ptrdiff_t li
 // -funroll-loops w/ this is bad - 74 cycles again.
 // all this is on a 7450, tuning for the 7450
     for (i = 0; i < h; i += 4) {
-        pixelsv1  = unaligned_load( 0, pixels);
-        pixelsv1B = unaligned_load(line_size, pixels);
-        pixelsv1C = unaligned_load(line_size_2, pixels);
-        pixelsv1D = unaligned_load(line_size_3, pixels);
+        pixelsv1  = vec_ld_unaligned( 0, pixels);
+        pixelsv1B = vec_ld_unaligned(line_size, pixels);
+        pixelsv1C = vec_ld_unaligned(line_size_2, pixels);
+        pixelsv1D = vec_ld_unaligned(line_size_3, pixels);
         VEC_ST(pixelsv1, 0, (unsigned char*)block);
         VEC_ST(pixelsv1B, line_size, (unsigned char*)block);
         VEC_ST(pixelsv1C, line_size_2, (unsigned char*)block);
@@ -146,8 +146,8 @@ static void put_pixels8_xy2_altivec(uint8_t *block, const 
uint8_t *pixels, ptrdi
         int rightside = ((unsigned long)block & 0x0000000F);
         blockv = vec_ld(0, block);
 
-        pixelsv1 = unaligned_load(line_size, pixels);
-        pixelsv2 = unaligned_load(line_size+1, pixels);
+        pixelsv1 = vec_ld_unaligned(line_size, pixels);
+        pixelsv2 = vec_ld_unaligned(line_size+1, pixels);
         pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
         pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
         pixelssum2 = vec_add((vector unsigned short)pixelsv1,
@@ -193,8 +193,8 @@ static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, 
const uint8_t *pixels
         int rightside = ((unsigned long)block & 0x0000000F);
         blockv = vec_ld(0, block);
 
-        pixelsv1 = unaligned_load(line_size, pixels);
-        pixelsv2 = unaligned_load(line_size+1, pixels);
+        pixelsv1 = vec_ld_unaligned(line_size, pixels);
+        pixelsv2 = vec_ld_unaligned(line_size+1, pixels);
         pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
         pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
         pixelssum2 = vec_add((vector unsigned short)pixelsv1,
@@ -244,8 +244,8 @@ static void put_pixels16_xy2_altivec(uint8_t * block, const 
uint8_t * pixels, pt
     for (i = 0; i < h ; i++) {
         blockv = vec_ld(0, block);
 
-        pixelsv1 = unaligned_load(line_size, pixels);
-        pixelsv2 = unaligned_load(line_size+1, pixels);
+        pixelsv1 = vec_ld_unaligned(line_size, pixels);
+        pixelsv2 = vec_ld_unaligned(line_size+1, pixels);
 
         pixelsv3 = VEC_MERGEL(vczero, pixelsv1);
         pixelsv4 = VEC_MERGEL(vczero, pixelsv2);
@@ -298,8 +298,8 @@ static void put_no_rnd_pixels16_xy2_altivec(uint8_t * 
block, const uint8_t * pix
     pixelssum1 = vec_add(pixelssum1, vcone);
 
     for (i = 0; i < h ; i++) {
-        pixelsv1 = unaligned_load(line_size, pixels);
-        pixelsv2 = unaligned_load(line_size+1, pixels);
+        pixelsv1 = vec_ld_unaligned(line_size, pixels);
+        pixelsv2 = vec_ld_unaligned(line_size+1, pixels);
 
         pixelsv3 = VEC_MERGEL(vczero, pixelsv1);
         pixelsv4 = VEC_MERGEL(vczero, pixelsv2);
@@ -351,8 +351,8 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const 
uint8_t *pixels, ptrdi
         int rightside = ((unsigned long)block & 0x0000000F);
         blockv = vec_ld(0, block);
 
-        pixelsv1 = unaligned_load(line_size, pixels);
-        pixelsv2 = unaligned_load(line_size+1, pixels);
+        pixelsv1 = vec_ld_unaligned(line_size, pixels);
+        pixelsv2 = vec_ld_unaligned(line_size+1, pixels);
 
         pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
         pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PR] avcodec/avutil: decouple ER SAD from me_cmp and add PPC pixelutils SAD (PR #22368)

Reply via email to