This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 1785542a806082cb8ff258da462c15b84b9b6552
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Thu Mar 26 22:41:12 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Mon Mar 30 13:51:53 2026 +0200

    avcodec/x86/vvc/of: Don't add to zero
    
    Instead rewrite the code to use assignment. Saves zeroing and
    additions.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vvc/of.asm | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/libavcodec/x86/vvc/of.asm b/libavcodec/x86/vvc/of.asm
index d40ed73b3b..29d3ca0798 100644
--- a/libavcodec/x86/vvc/of.asm
+++ b/libavcodec/x86/vvc/of.asm
@@ -129,18 +129,26 @@ INIT_YMM avx2
     SAVE                        [dstq + ds3q], 6, %4
 %endmacro
 
-%macro SUM_MIN_BLOCK_W16 4 ; src/dst, shuffle, perm, tmp
+%macro SUM_MIN_BLOCK_W16 4-5 ; src/dst, shuffle, perm, tmp, [dst]
     pshufb  %4, %1, %2
     vpermd  %4, %3, %4
+%if %0 == 4
     paddw   %1, %4
+%else
+    paddw   %5, %1, %4
+%endif
 %endmacro
 
-%macro SUM_MIN_BLOCK_W8 3 ; src/dst, shuffle, tmp
+%macro SUM_MIN_BLOCK_W8 3-4 ; src/dst, shuffle, tmp, [dst]
     pshufb  %3, %1, %2
+%if %0 == 3
     paddw   %1, %3
+%else
+    paddw   %4, %1, %3
+%endif
 %endmacro
 
-%macro BDOF_PROF_GRAD 2 ; line_no, last_line
+%macro BDOF_PROF_GRAD 2-3 0 ; line_no, last_line, assign (instead of add) to 
dst regs
 %assign i0 (%1 + 0) % 3
 %assign j0 (%1 + 1) % 3
 %assign k0 (%1 + 2) % 3
@@ -201,7 +209,11 @@ INIT_YMM avx2
     SUM_MIN_BLOCK_W8            m7, t0, m11
     SUM_MIN_BLOCK_W8            m8, t0, m11
     SUM_MIN_BLOCK_W8            m9, t0, m11
+%if (%3)
+    SUM_MIN_BLOCK_W8           m10, t0, m11, m13
+%else
     SUM_MIN_BLOCK_W8           m10, t0, m11
+%endif
     jmp                     %%wend
 
 %%w16:
@@ -210,7 +222,11 @@ INIT_YMM avx2
     SUM_MIN_BLOCK_W16           m7, t0, t1, m11
     SUM_MIN_BLOCK_W16           m8, t0, t1, m11
     SUM_MIN_BLOCK_W16           m9, t0, t1, m11
+%if (%3)
+    SUM_MIN_BLOCK_W16          m10, t0, t1, m11, m13
+%else
     SUM_MIN_BLOCK_W16          m10, t0, t1, m11
+%endif
 
 %%wend:
     vpblendd                    m11, m8, m7, 10101010b
@@ -227,13 +243,17 @@ INIT_YMM avx2
     vpblendw                     m6, m8, m6, 01010101b
     pshuflw                      m6, m6, q2301
     pshufhw                      m6, m6, q2301
+%if (%3)
+    paddw                       m12, m6, m11                ; 4 x (4sgx2, 
4sgy2, 4sgxdi, 4sgydi)
+%else
     paddw                        m8, m6, m11                ; 4 x (4sgx2, 
4sgy2, 4sgxdi, 4sgydi)
+%endif
 
 %if (%1) == 0
     ; pad for top and directly output to m12, m13
     paddw                      m12, m8,  m8
     paddw                      m13, m10, m10
-%else
+%elifn (%3)
 %if (%2)
     ; pad for bottom
     paddw                       m8, m8
@@ -323,9 +343,7 @@ INIT_YMM avx2
     mova                   m14, m12
     mova                   m15, m13
 
-    pxor                   m12, m12
-    pxor                   m13, m13
-    BDOF_PROF_GRAD  %1 * 4 + 3, 0
+    BDOF_PROF_GRAD  %1 * 4 + 3, 0, 1
     BDOF_PROF_GRAD  %1 * 4 + 4, 0
     paddw                  m14, m12
     paddw                  m15, m13

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to