PR #21184 opened by Zhao Zhili (quink) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21184 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21184.patch
This reverts commit 8f48a62, 9af8782, and bd3e71b. Commit 8f48a62 extends tx to 2M, resulting in the tx_float bss section reaching a size of 4M. This isn't a issue on devices with normal memory sizes and OS supporting virtual memory. But it's a real issue for embedded devices with realtime OS, which may not support virtual memory, e.g., Nuttx. This 4M of bss section map to physical memory directly, which is a scarce resource on embedded devices. >From 07b26f98e9eb3a2429a439a72e90ac29f28c1658 Mon Sep 17 00:00:00 2001 From: Zhao Zhili <[email protected]> Date: Sat, 13 Dec 2025 12:53:58 +0800 Subject: [PATCH] Revert "avutil/tx_template: extend to 2M" This reverts commit 8f48a62, 9af8782, and bd3e71b. Commit 8f48a62 extends tx to 2M, resulting in the tx_float bss section reaching a size of 4M. This isn't a issue on devices with normal memory sizes and OS supporting virtual memory. But it's a real issue for embedded devices with realtime OS, which may not support virtual memory, e.g., Nuttx. This 4M of bss section map to physical memory directly, which is a scarce resource on embedded devices. --- libavutil/tx_template.c | 12 ------------ libavutil/x86/tx_float.asm | 8 ++------ libavutil/x86/tx_float_init.c | 18 +++++++++--------- 3 files changed, 11 insertions(+), 27 deletions(-) diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c index ec630954b8..ecee572e40 100644 --- a/libavutil/tx_template.c +++ b/libavutil/tx_template.c @@ -45,10 +45,6 @@ SR_TABLE(32768) \ SR_TABLE(65536) \ SR_TABLE(131072) \ - SR_TABLE(262144) \ - SR_TABLE(524288) \ - SR_TABLE(1048576) \ - SR_TABLE(2097152) \ #define SR_TABLE(len) \ TABLE_DEF(len, len/4 + 1); @@ -724,10 +720,6 @@ DECL_SR_CODELET(16384,8192,4096) DECL_SR_CODELET(32768,16384,8192) DECL_SR_CODELET(65536,32768,16384) DECL_SR_CODELET(131072,65536,32768) -DECL_SR_CODELET(262144,131072,65536) -DECL_SR_CODELET(524288,262144,131072) -DECL_SR_CODELET(1048576,524288,262144) -DECL_SR_CODELET(2097152,1048576,524288) static av_cold int TX_NAME(ff_tx_fft_init)(AVTXContext *s, const FFTXCodelet *cd, @@ -2160,10 +2152,6 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = { &TX_NAME(ff_tx_fft32768_ns_def), &TX_NAME(ff_tx_fft65536_ns_def), &TX_NAME(ff_tx_fft131072_ns_def), - &TX_NAME(ff_tx_fft262144_ns_def), - &TX_NAME(ff_tx_fft524288_ns_def), - &TX_NAME(ff_tx_fft1048576_ns_def), - &TX_NAME(ff_tx_fft2097152_ns_def), /* Prime factor codelets */ &TX_NAME(ff_tx_fft3_ns_def), diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm index c030147ce8..87be21c2d6 100644 --- a/libavutil/x86/tx_float.asm +++ b/libavutil/x86/tx_float.asm @@ -46,7 +46,7 @@ %endif %assign i 16 -%rep 18 +%rep 14 cextern tab_ %+ i %+ _float ; ff_tab_i_float... %assign i (i << 1) %endrep @@ -1385,11 +1385,7 @@ FFT_SPLIT_RADIX_DEF 8192, .16384pt FFT_SPLIT_RADIX_DEF 16384, .32768pt FFT_SPLIT_RADIX_DEF 32768, .65536pt FFT_SPLIT_RADIX_DEF 65536, .131072pt -FFT_SPLIT_RADIX_DEF 131072, .262144pt -FFT_SPLIT_RADIX_DEF 262144, .524288pt -FFT_SPLIT_RADIX_DEF 524288, .1048576pt -FFT_SPLIT_RADIX_DEF 1048576, .2097152pt -FFT_SPLIT_RADIX_DEF 2097152 +FFT_SPLIT_RADIX_DEF 131072 ;=============================================================================== ; Final synthesis + deinterleaving code diff --git a/libavutil/x86/tx_float_init.c b/libavutil/x86/tx_float_init.c index 3e99c21eac..f69a5a6d77 100644 --- a/libavutil/x86/tx_float_init.c +++ b/libavutil/x86/tx_float_init.c @@ -271,15 +271,15 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] = { AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW), TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW), - TX_DEF(fft_sr, FFT, 64, 2097152, 2, 0, 256, b8_i2, avx, AVX, 0, AV_CPU_FLAG_AVXSLOW), - TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX, + TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX, 0, AV_CPU_FLAG_AVXSLOW), + TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW), - TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, + TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW), - TX_DEF(fft_sr, FFT, 64, 2097152, 2, 0, 288, b8_i2, fma3, FMA3, 0, AV_CPU_FLAG_AVXSLOW), - TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3, FMA3, + TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 288, b8_i2, fma3, FMA3, 0, AV_CPU_FLAG_AVXSLOW), + TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW), - TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, + TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW), TX_DEF(fft15, FFT, 15, 15, 15, 0, 320, factor_init, avx2, AVX2, @@ -287,11 +287,11 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] = { TX_DEF(fft15_ns, FFT, 15, 15, 15, 0, 384, factor_init, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW), - TX_DEF(fft_sr, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx2, AVX2, 0, + TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 320, b8_i2, avx2, AVX2, 0, AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER), - TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2, + TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER), - TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE, + TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER), TX_DEF(fft_pfa_15xM, FFT, 60, TX_LEN_UNLIMITED, 15, 2, 320, fft_pfa_init, avx2, AVX2, -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
