PR #21009 opened by george.zaguri URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21009 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21009.patch
All changes are made during development/testing of SME for ffmpeg (vvc). Tested on Apple M4. Additinally fixed bug in Makefile vars to exclude subvars for SVE/SVE2, otherwise it leads to inclusion of SVE/SVE2/SME-specific object files into each ffmpeg target library >From 652083378193c2a812fab9fa01d1ae0593768858 Mon Sep 17 00:00:00 2001 From: Georgii Zagoruiko <[email protected]> Date: Mon, 24 Nov 2025 21:01:03 +0000 Subject: [PATCH] configure: add detection of assembler support for SME All changes are made during development/testing of SME for ffmpeg (vvc). Tested on Apple M4. Additinally fixed bug in Makefile vars to exclude subvars for SVE/SVE2, otherwise it leads to inclusion of SVE/SVE2/SME-specific object files into each ffmpeg target library --- Makefile | 3 ++- configure | 11 +++++++++-- ffbuild/arch.mak | 1 + libavutil/aarch64/Makefile | 2 ++ libavutil/aarch64/asm.S | 9 +++++++++ libavutil/aarch64/cpu.c | 16 ++++++++++++++++ libavutil/aarch64/cpu.h | 5 +++++ libavutil/aarch64/cpu_sme.S | 31 +++++++++++++++++++++++++++++++ libavutil/cpu.c | 1 + libavutil/cpu.h | 1 + libavutil/tests/cpu.c | 5 +++++ tests/checkasm/checkasm.c | 1 + 12 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 libavutil/aarch64/cpu_sme.S diff --git a/Makefile b/Makefile index 2f78db02a5..7a972b8502 100644 --- a/Makefile +++ b/Makefile @@ -110,7 +110,8 @@ SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS \ ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS \ MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \ MMI-OBJS LSX-OBJS LASX-OBJS RV-OBJS RVV-OBJS RVVB-OBJS \ - OBJS SHLIBOBJS STLIBOBJS HOSTOBJS TESTOBJS SIMD128-OBJS + OBJS SHLIBOBJS STLIBOBJS HOSTOBJS TESTOBJS SIMD128-OBJS \ + SVE-OBJS SVE2-OBJS SME-OBJS define RESET $(1) := diff --git a/configure b/configure index 7d6061b55c..508268303f 100755 --- a/configure +++ b/configure @@ -478,6 +478,7 @@ Optimization options (experts only): --disable-i8mm disable I8MM optimizations --disable-sve disable SVE optimizations --disable-sve2 disable SVE2 optimizations + --disable-sme disable SME optimizations --disable-inline-asm disable use of inline assembly --disable-x86asm disable use of standalone x86 assembly --disable-mipsdsp disable MIPS DSP ASE R1 optimizations @@ -2224,6 +2225,7 @@ ARCH_EXT_LIST_ARM=" setend sve sve2 + sme " ARCH_EXT_LIST_MIPS=" @@ -2491,6 +2493,7 @@ TOOLCHAIN_FEATURES=" as_archext_i8mm_directive as_archext_sve_directive as_archext_sve2_directive + as_archext_sme_directive as_dn_directive as_fpu_directive as_func @@ -2823,6 +2826,7 @@ dotprod_deps="aarch64 neon" i8mm_deps="aarch64 neon" sve_deps="aarch64 neon" sve2_deps="aarch64 neon sve" +sme_deps="aarch64 neon sve sve2" map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM @@ -5526,6 +5530,7 @@ elif enabled arm; then elif check_arm_arch 7M 7_M; then echo armv7-m elif check_arm_arch 7EM 7E_M; then echo armv7-m elif check_arm_arch 8A 8_A; then echo armv8-a + elif check_arm_arch 9A 9_A; then echo armv9-a fi } @@ -6423,7 +6428,7 @@ if enabled aarch64; then # Clang before version 17 (Xcode versions up to and including 15.0) # didn't support controlling the dotprod/i8mm extensions via # .arch_extension; thus try to enable them via the .arch level as well. - for level in armv8.2-a armv8.4-a armv8.6-a; do + for level in armv8.2-a armv8.4-a armv8.6-a armv9-a; do check_arch_level $level done # Clang before version 17 (Xcode versions up to and including 15.0) @@ -6440,11 +6445,12 @@ if enabled aarch64; then # internal assembler in clang 3.3 does not support this instruction enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1' - archext_list="dotprod i8mm sve sve2" + archext_list="dotprod i8mm sve sve2 sme" enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b' enabled i8mm && check_archext_insn i8mm 'usdot v0.4s, v0.16b, v0.16b' enabled sve && check_archext_insn sve 'whilelt p0.s, x0, x1' enabled sve2 && check_archext_insn sve2 'sqrdmulh z0.s, z0.s, z0.s' + enabled sme && check_archext_insn sme 'smstop' # Disable the main feature (e.g. HAVE_NEON) if neither inline nor external # assembly support the feature out of the box. Skip this for the features @@ -8202,6 +8208,7 @@ if enabled aarch64; then echo "I8MM enabled ${i8mm-no}" echo "SVE enabled ${sve-no}" echo "SVE2 enabled ${sve2-no}" + echo "SME enabled ${sme-no}" fi if enabled arm; then echo "ARMv5TE enabled ${armv5te-no}" diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak index 197e30bb89..7dec692b15 100644 --- a/ffbuild/arch.mak +++ b/ffbuild/arch.mak @@ -5,6 +5,7 @@ OBJS-$(HAVE_VFP) += $(VFP-OBJS) $(VFP-OBJS-yes) OBJS-$(HAVE_NEON) += $(NEON-OBJS) $(NEON-OBJS-yes) OBJS-$(HAVE_SVE) += $(SVE-OBJS) $(SVE-OBJS-yes) OBJS-$(HAVE_SVE2) += $(SVE2-OBJS) $(SVE2-OBJS-yes) +OBJS-$(HAVE_SME) += $(SME-OBJS) $(SME-OBJS-yes) OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS) $(MIPSFPU-OBJS-yes) OBJS-$(HAVE_MIPSDSP) += $(MIPSDSP-OBJS) $(MIPSDSP-OBJS-yes) diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile index 992e95e4df..744c2c53d7 100644 --- a/libavutil/aarch64/Makefile +++ b/libavutil/aarch64/Makefile @@ -6,3 +6,5 @@ NEON-OBJS += aarch64/float_dsp_neon.o \ aarch64/tx_float_neon.o \ SVE-OBJS += aarch64/cpu_sve.o \ + +SME-OBJS += aarch64/cpu_sme.o \ diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S index 2e4e451ec2..77cea57cfc 100644 --- a/libavutil/aarch64/asm.S +++ b/libavutil/aarch64/asm.S @@ -72,10 +72,19 @@ #define DISABLE_SVE2 #endif +#if HAVE_AS_ARCHEXT_SME_DIRECTIVE +#define ENABLE_SME .arch_extension sme +#define DISABLE_SME .arch_extension nosme +#else +#define ENABLE_SME +#define DISABLE_SME +#endif + DISABLE_DOTPROD DISABLE_I8MM DISABLE_SVE DISABLE_SVE2 +DISABLE_SME /* Support macros for diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c index e82c0f19ab..1776da3bfa 100644 --- a/libavutil/aarch64/cpu.c +++ b/libavutil/aarch64/cpu.c @@ -28,6 +28,7 @@ #define HWCAP_AARCH64_SVE (1 << 22) #define HWCAP2_AARCH64_SVE2 (1 << 1) #define HWCAP2_AARCH64_I8MM (1 << 13) +#define HWCAP2_SME (1 << 23) static int detect_flags(void) { @@ -44,6 +45,8 @@ static int detect_flags(void) flags |= AV_CPU_FLAG_SVE2; if (hwcap2 & HWCAP2_AARCH64_I8MM) flags |= AV_CPU_FLAG_I8MM; + if (hwcap & HWCAP_AARCH64_SME) + flags |= AV_CPU_FLAG_SME; return flags; } @@ -67,6 +70,12 @@ static int detect_flags(void) flags |= AV_CPU_FLAG_DOTPROD; if (have_feature("hw.optional.arm.FEAT_I8MM")) flags |= AV_CPU_FLAG_I8MM; + if (have_feature("hw.optional.arm.FEAT_SVE")) + flags |= AV_CPU_FLAG_SVE; + if (have_feature("hw.optional.arm.FEAT_SVE2")) + flags |= AV_CPU_FLAG_SVE2; + if (have_feature("hw.optional.arm.FEAT_SME")) + flags |= AV_CPU_FLAG_SME; return flags; } @@ -133,6 +142,10 @@ static int detect_flags(void) #ifdef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) flags |= AV_CPU_FLAG_SVE2; +#endif +#ifdef PF_ARM_SME_INSTRUCTIONS_AVAILABLE + if (IsProcessorFeaturePresent(PF_ARM_SME_INSTRUCTIONS_AVAILABLE)) + flags |= AV_CPU_FLAG_SME; #endif return flags; } @@ -162,6 +175,9 @@ int ff_get_cpu_flags_aarch64(void) #ifdef __ARM_FEATURE_SVE2 flags |= AV_CPU_FLAG_SVE2; #endif +#ifdef __ARM_FEATURE_SME + flags |= AV_CPU_FLAG_SME; +#endif flags |= detect_flags(); diff --git a/libavutil/aarch64/cpu.h b/libavutil/aarch64/cpu.h index a41b729659..62d5eb768f 100644 --- a/libavutil/aarch64/cpu.h +++ b/libavutil/aarch64/cpu.h @@ -29,9 +29,14 @@ #define have_i8mm(flags) CPUEXT(flags, I8MM) #define have_sve(flags) CPUEXT(flags, SVE) #define have_sve2(flags) CPUEXT(flags, SVE2) +#define have_sme(flags) CPUEXT(flags, SME) #if HAVE_SVE int ff_aarch64_sve_length(void); #endif +#if HAVE_SME +int ff_aarch64_sme_length(void); +#endif + #endif /* AVUTIL_AARCH64_CPU_H */ diff --git a/libavutil/aarch64/cpu_sme.S b/libavutil/aarch64/cpu_sme.S new file mode 100644 index 0000000000..ba79d483a1 --- /dev/null +++ b/libavutil/aarch64/cpu_sme.S @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2025 Georgii Zagoruiko + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "asm.S" + +ENABLE_SME + +function ff_aarch64_sme_length, export=1 + smstart + cntb x0 + smstop + ret +endfunc diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 8f9b785ebc..5aed2f39dc 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -186,6 +186,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "i8mm", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_I8MM }, .unit = "flags" }, { "sve", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE }, .unit = "flags" }, { "sve2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE2 }, .unit = "flags" }, + { "sme", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME }, .unit = "flags" }, #elif ARCH_MIPS { "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" }, { "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index a06fc08e56..87cecd0424 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -76,6 +76,7 @@ #define AV_CPU_FLAG_I8MM (1 << 9) #define AV_CPU_FLAG_SVE (1 <<10) #define AV_CPU_FLAG_SVE2 (1 <<11) +#define AV_CPU_FLAG_SME (1 <<12) #define AV_CPU_FLAG_SETEND (1 <<16) #define AV_CPU_FLAG_MMI (1 << 0) diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c index fd2e32901d..f59db0915c 100644 --- a/libavutil/tests/cpu.c +++ b/libavutil/tests/cpu.c @@ -48,6 +48,7 @@ static const struct { { AV_CPU_FLAG_I8MM, "i8mm" }, { AV_CPU_FLAG_SVE, "sve" }, { AV_CPU_FLAG_SVE2, "sve2" }, + { AV_CPU_FLAG_SME, "sme" }, #elif ARCH_ARM { AV_CPU_FLAG_ARMV5TE, "armv5te" }, { AV_CPU_FLAG_ARMV6, "armv6" }, @@ -174,6 +175,10 @@ int main(int argc, char **argv) #if ARCH_AARCH64 && HAVE_SVE if (cpu_flags_raw & AV_CPU_FLAG_SVE) printf("sve_vector_length = %d\n", 8 * ff_aarch64_sve_length()); +#endif +#if ARCH_AARCH64 && HAVE_SME + if (cpu_flags_raw & AV_CPU_FLAG_SME) + printf("sme_vector_length = %d\n", 8 * ff_aarch64_sme_length()); #elif ARCH_RISCV && HAVE_RVV if (cpu_flags_raw & AV_CPU_FLAG_RVV_I32) { size_t bytes = ff_get_rv_vlenb(); diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 20d8f19757..ae3756d5d0 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -351,6 +351,7 @@ static const struct { { "I8MM", "i8mm", AV_CPU_FLAG_I8MM }, { "SVE", "sve", AV_CPU_FLAG_SVE }, { "SVE2", "sve2", AV_CPU_FLAG_SVE2 }, + { "SME", "sme", AV_CPU_FLAG_SME }, #elif ARCH_ARM { "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE }, { "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 }, -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
