This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit b19bd0de6c3b554ddc0221c7ee3f2c23af4617be Author: Shreesh Adiga <[email protected]> AuthorDate: Thu Feb 5 18:22:17 2026 +0530 Commit: Martin Storsjö <[email protected]> CommitDate: Wed Mar 11 14:03:36 2026 +0000 avutil/cpu: add aarch64 CPU feature flag for PMULL and EOR3 --- configure | 14 +++++++++++++- libavutil/aarch64/asm.S | 18 ++++++++++++++++++ libavutil/aarch64/cpu.c | 28 ++++++++++++++++++++++++++++ libavutil/aarch64/cpu.h | 8 +++++--- libavutil/cpu.c | 3 ++- libavutil/cpu.h | 2 ++ libavutil/tests/cpu.c | 2 ++ tests/checkasm/checkasm.c | 1 + 8 files changed, 71 insertions(+), 5 deletions(-) diff --git a/configure b/configure index 1759694274..854bca6ffb 100755 --- a/configure +++ b/configure @@ -482,6 +482,8 @@ Optimization options (experts only): --disable-arm-crc disable ARM/AArch64 CRC optimizations --disable-dotprod disable DOTPROD optimizations --disable-i8mm disable I8MM optimizations + --disable-pmull disable PMULL optimizations + --disable-eor3 disable EOR3 optimizations --disable-sve disable SVE optimizations --disable-sve2 disable SVE2 optimizations --disable-sme disable SME optimizations @@ -2299,6 +2301,8 @@ ARCH_EXT_LIST_ARM=" arm_crc dotprod i8mm + pmull + eor3 neon vfp vfpv3 @@ -2575,6 +2579,8 @@ TOOLCHAIN_FEATURES=" as_archext_crc_directive as_archext_dotprod_directive as_archext_i8mm_directive + as_archext_sha3_directive + as_archext_aes_directive as_archext_sve_directive as_archext_sve2_directive as_archext_sme_directive @@ -2918,6 +2924,8 @@ setend_deps="arm" arm_crc_deps="aarch64" dotprod_deps="aarch64 neon" i8mm_deps="aarch64 neon" +pmull_deps="aarch64 neon" +eor3_deps="aarch64 neon" sve_deps="aarch64 neon" sve2_deps="aarch64 neon sve" sme_deps="aarch64 neon sve sve2" @@ -6561,8 +6569,10 @@ if enabled aarch64; then # internal assembler in clang 3.3 does not support this instruction enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1' - archext_list="arm_crc dotprod i8mm sve sve2 sme sme_i16i64 sme2" + archext_list="arm_crc dotprod i8mm pmull eor3 sve sve2 sme sme_i16i64 sme2" enabled arm_crc && check_archext_name_insn arm_crc crc 'crc32x w0, w0, x0' + enabled pmull && check_archext_name_insn pmull aes 'pmull v0.1q, v0.1d, v0.1d' + enabled eor3 && check_archext_name_insn eor3 sha3 'eor3 v0.16b, v1.16b, v2.16b, v3.16b' enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b' enabled i8mm && check_archext_insn i8mm 'usdot v0.4s, v0.16b, v0.16b' enabled sve && check_archext_insn sve 'whilelt p0.s, x0, x1' @@ -8400,6 +8410,8 @@ if enabled aarch64; then echo "NEON enabled ${neon-no}" echo "DOTPROD enabled ${dotprod-no}" echo "I8MM enabled ${i8mm-no}" + echo "PMULL enabled ${pmull-no}" + echo "EOR3 enabled ${eor3-no}" echo "SVE enabled ${sve-no}" echo "SVE2 enabled ${sve2-no}" echo "SME enabled ${sme-no}" diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S index 04fe6286a8..d9698a3203 100644 --- a/libavutil/aarch64/asm.S +++ b/libavutil/aarch64/asm.S @@ -64,6 +64,22 @@ #define DISABLE_I8MM #endif +#if HAVE_AS_ARCHEXT_AES_DIRECTIVE +#define ENABLE_PMULL .arch_extension aes +#define DISABLE_PMULL .arch_extension noaes +#else +#define ENABLE_PMULL +#define DISABLE_PMULL +#endif + +#if HAVE_AS_ARCHEXT_SHA3_DIRECTIVE +#define ENABLE_EOR3 .arch_extension sha3 +#define DISABLE_EOR3 .arch_extension nosha3 +#else +#define ENABLE_EOR3 +#define DISABLE_EOR3 +#endif + #if HAVE_AS_ARCHEXT_SVE_DIRECTIVE #define ENABLE_SVE .arch_extension sve #define DISABLE_SVE .arch_extension nosve @@ -105,6 +121,8 @@ #endif DISABLE_ARM_CRC +DISABLE_PMULL +DISABLE_EOR3 DISABLE_DOTPROD DISABLE_I8MM DISABLE_SVE diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c index 8dd8245358..b8d2bd3280 100644 --- a/libavutil/aarch64/cpu.c +++ b/libavutil/aarch64/cpu.c @@ -24,7 +24,9 @@ #include <stdint.h> #include <sys/auxv.h> +#define HWCAP_AARCH64_PMULL (1 << 4) #define HWCAP_AARCH64_CRC32 (1 << 7) +#define HWCAP_AARCH64_SHA3 (1 << 17) #define HWCAP_AARCH64_ASIMDDP (1 << 20) #define HWCAP_AARCH64_SVE (1 << 22) #define HWCAP2_AARCH64_SVE2 (1 << 1) @@ -40,6 +42,10 @@ static int detect_flags(void) unsigned long hwcap = ff_getauxval(AT_HWCAP); unsigned long hwcap2 = ff_getauxval(AT_HWCAP2); + if (hwcap & HWCAP_AARCH64_PMULL) + flags |= AV_CPU_FLAG_PMULL; + if (hwcap & HWCAP_AARCH64_SHA3) + flags |= AV_CPU_FLAG_EOR3; if (hwcap & HWCAP_AARCH64_CRC32) flags |= AV_CPU_FLAG_ARM_CRC; if (hwcap & HWCAP_AARCH64_ASIMDDP) @@ -85,6 +91,10 @@ static int detect_flags(void) flags |= AV_CPU_FLAG_SME_I16I64; if (have_feature("hw.optional.armv8_crc32")) flags |= AV_CPU_FLAG_ARM_CRC; + if (have_feature("hw.optional.arm.FEAT_PMULL")) + flags |= AV_CPU_FLAG_PMULL; + if (have_feature("hw.optional.armv8_2_sha3")) + flags |= AV_CPU_FLAG_EOR3; if (have_feature("hw.optional.arm.FEAT_SME2")) flags |= AV_CPU_FLAG_SME2; @@ -115,6 +125,10 @@ static int detect_flags(void) flags |= AV_CPU_FLAG_DOTPROD; if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE) flags |= AV_CPU_FLAG_ARM_CRC; + if (ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_PMULL) + flags |= AV_CPU_FLAG_PMULL; + if (ID_AA64ISAR0_SHA3(isar0) >= ID_AA64ISAR0_SHA3_IMPL) + flags |= AV_CPU_FLAG_EOR3; } mib[0] = CTL_MACHDEP; @@ -141,6 +155,14 @@ static int detect_flags(void) if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) flags |= AV_CPU_FLAG_ARM_CRC; #endif +#ifdef PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) + flags |= AV_CPU_FLAG_PMULL; +#endif +#ifdef PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE + if (IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE)) + flags |= AV_CPU_FLAG_EOR3; +#endif #ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) flags |= AV_CPU_FLAG_DOTPROD; @@ -203,6 +225,12 @@ int ff_get_cpu_flags_aarch64(void) #ifdef __ARM_FEATURE_CRC32 flags |= AV_CPU_FLAG_ARM_CRC; #endif +#ifdef __ARM_FEATURE_AES + flags |= AV_CPU_FLAG_PMULL; +#endif +#ifdef __ARM_FEATURE_SHA3 + flags |= AV_CPU_FLAG_EOR3; +#endif #ifdef __ARM_FEATURE_SME_I16I64 flags |= AV_CPU_FLAG_SME_I16I64; #endif diff --git a/libavutil/aarch64/cpu.h b/libavutil/aarch64/cpu.h index f1dff3f647..433ba60bca 100644 --- a/libavutil/aarch64/cpu.h +++ b/libavutil/aarch64/cpu.h @@ -22,10 +22,12 @@ #include "libavutil/cpu.h" #include "libavutil/cpu_internal.h" -#define have_armv8(flags) CPUEXT(flags, ARMV8) -#define have_neon(flags) CPUEXT(flags, NEON) -#define have_vfp(flags) CPUEXT(flags, VFP) +#define have_armv8(flags) CPUEXT(flags, ARMV8) +#define have_neon(flags) CPUEXT(flags, NEON) +#define have_vfp(flags) CPUEXT(flags, VFP) #define have_arm_crc(flags) CPUEXT(flags, ARM_CRC) +#define have_pmull(flags) CPUEXT(flags, PMULL) +#define have_eor3(flags) CPUEXT(flags, EOR3) #define have_dotprod(flags) CPUEXT(flags, DOTPROD) #define have_i8mm(flags) CPUEXT(flags, I8MM) #define have_sve(flags) CPUEXT(flags, SVE) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 07b90aa90a..00029f81ca 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -190,8 +190,9 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "sme", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME }, .unit = "flags" }, { "crc", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARM_CRC }, .unit = "flags" }, { "sme_i16i64", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME_I16I64 }, .unit = "flags" }, - { "sme2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME2 }, .unit = "flags" }, + { "pmull", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_PMULL }, .unit = "flags" }, + { "eor3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_EOR3 }, .unit = "flags" }, #elif ARCH_MIPS { "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" }, { "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 464d4cd5df..07076dafb8 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -82,6 +82,8 @@ #define AV_CPU_FLAG_SME2 (1 <<14) #define AV_CPU_FLAG_SME_I16I64 (1 <<15) #define AV_CPU_FLAG_SETEND (1 <<16) +#define AV_CPU_FLAG_PMULL (1 <<17) +#define AV_CPU_FLAG_EOR3 (1 <<18) #define AV_CPU_FLAG_MMI (1 << 0) #define AV_CPU_FLAG_MSA (1 << 1) diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c index 3e468fecc8..f74d02270e 100644 --- a/libavutil/tests/cpu.c +++ b/libavutil/tests/cpu.c @@ -52,6 +52,8 @@ static const struct { { AV_CPU_FLAG_SME_I16I64, "sme_i16i64" }, { AV_CPU_FLAG_ARM_CRC, "crc" }, { AV_CPU_FLAG_SME2, "sme2" }, + { AV_CPU_FLAG_PMULL, "pmull" }, + { AV_CPU_FLAG_EOR3, "eor3" }, #elif ARCH_ARM { AV_CPU_FLAG_ARMV5TE, "armv5te" }, { AV_CPU_FLAG_ARMV6, "armv6" }, diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 9ab448685b..8b2f648b3d 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -383,6 +383,7 @@ static const struct { { "SME-I16I64", "sme_i16i64", AV_CPU_FLAG_SME_I16I64 }, { "CRC", "crc", AV_CPU_FLAG_ARM_CRC }, { "SME2", "sme2", AV_CPU_FLAG_SME2 }, + { "PMULL", "pmull_eor3", AV_CPU_FLAG_PMULL|AV_CPU_FLAG_EOR3 }, #elif ARCH_ARM { "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE }, { "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 }, _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
