On 2/27/2021 11:05 AM, Paul B Mahol wrote:
Signed-off-by: Paul B Mahol <[email protected]> --- libavcodec/cfhdencdsp.c | 3 + libavcodec/x86/Makefile | 2 + libavcodec/x86/cfhdencdsp.asm | 435 +++++++++++++++++++++++++++++++ libavcodec/x86/cfhdencdsp_init.c | 48 ++++ 4 files changed, 488 insertions(+) create mode 100644 libavcodec/x86/cfhdencdsp.asm create mode 100644 libavcodec/x86/cfhdencdsp_init.cdiff --git a/libavcodec/cfhdencdsp.c b/libavcodec/cfhdencdsp.c index 0becb76d1d..b979e9e09a 100644 --- a/libavcodec/cfhdencdsp.c +++ b/libavcodec/cfhdencdsp.c @@ -73,4 +73,7 @@ av_cold void ff_cfhdencdsp_init(CFHDEncDSPContext *c) { c->horiz_filter = horiz_filter; c->vert_filter = vert_filter; + + if (ARCH_X86) + ff_cfhdencdsp_init_x86(c); } diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 884dc0c759..6361161180 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -51,6 +51,7 @@ OBJS-$(CONFIG_ALAC_DECODER) += x86/alacdsp_init.o OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp_init.o OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o OBJS-$(CONFIG_CFHD_DECODER) += x86/cfhddsp_init.o +OBJS-$(CONFIG_CFHD_ENCODER) += x86/cfhdencdsp_init.o OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp_init.o x86/synth_filter_init.o OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc_init.o OBJS-$(CONFIG_EXR_DECODER) += x86/exrdsp_init.o @@ -154,6 +155,7 @@ X86ASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o X86ASM-OBJS-$(CONFIG_ALAC_DECODER) += x86/alacdsp.o X86ASM-OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp.o X86ASM-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsidct.o +X86ASM-OBJS-$(CONFIG_CFHD_ENCODER) += x86/cfhdencdsp.o X86ASM-OBJS-$(CONFIG_CFHD_DECODER) += x86/cfhddsp.o X86ASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o x86/synth_filter.o X86ASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp.o \ diff --git a/libavcodec/x86/cfhdencdsp.asm b/libavcodec/x86/cfhdencdsp.asm new file mode 100644 index 0000000000..b0b094aa71 --- /dev/null +++ b/libavcodec/x86/cfhdencdsp.asm @@ -0,0 +1,435 @@ +;****************************************************************************** +;* x86-optimized functions for the CFHD encoder +;* Copyright (c) 2021 Paul B Mahol +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pw_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1 +pw_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1 +pw_p5_n11: dw 5, -11, 5, -11, 5, -11, 5, -11 +pw_n5_p11: dw -5, 11, -5, 11, -5, 11, -5, 11 +pw_p11_n5: dw 11, -5, 11, -5, 11, -5, 11, -5 +pw_n11_p5: dw -11, 5, -11, 5, -11, 5, -11, 5 +pd_4: times 4 dd 4 +pw_n4: times 8 dw -4 +cextern pw_m1 +cextern pw_1 +cextern pw_4 + +SECTION .text
[...]
+ +%if ARCH_X86_64 +INIT_XMM sse2 +cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidth, width, height, x, y, pos + movsxdifnidn widthq, widthd + movsxdifnidn heightq, heightd
Why did you add this? The shl and sub below using a d suffix like in the previous version is enough to clear the upper bits.
+ + shl istrideq, 1 + + shl widthq, 1 + sub heightq, 2
Should be ok if tested and bitexact. _______________________________________________ ffmpeg-devel mailing list [email protected] https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email [email protected] with subject "unsubscribe".
