On Sun, Dec 01, 2024 at 07:18:17PM -0500, Brad Smith wrote: > On Sun, Dec 01, 2024 at 02:48:38AM -0500, Brad Smith wrote: > > Here is an update to x265 4.1. > > > > Tested on older amd64. Needs testing on modern amd64 for IBT. > > > > aarch64 currently crashes in the NEON code. Could someone take a > > look at this? Is this xonly or something else? > > An updated diff with Mark's xonly fixes merged in.
Also added a comment above my newly added patches. Index: Makefile =================================================================== RCS file: /cvs/ports/multimedia/x265/Makefile,v retrieving revision 1.59 diff -u -p -u -p -r1.59 Makefile --- Makefile 7 May 2024 15:01:27 -0000 1.59 +++ Makefile 2 Dec 2024 02:57:14 -0000 @@ -1,12 +1,12 @@ COMMENT= free H.265/HEVC encoder -VER= 3.6 +VER= 4.1 DISTNAME= x265_${VER} PKGNAME= x265-${VER} CATEGORIES= multimedia SITES= https://bitbucket.org/multicoreware/x265_git/downloads/ -SHARED_LIBS= x265 24.0 +SHARED_LIBS= x265 25.0 HOMEPAGE= https://x265.org/ @@ -30,10 +30,8 @@ BUILD_DEPENDS+= devel/nasm CONFIGURE_ARGS+=-DCMAKE_ASM_YASM_FLAGS_DEBUG="-g dwarf2" \ -DENABLE_PIC=On \ - -DENABLE_TESTS=On - -CONFIGURE_ARGS+=-DX265_VERSION=${VER} \ - -DX265_LATEST_TAG=${VER} + -DENABLE_TESTS=On \ + -DGIT_ARCHETYPE=1 .if ${MACHINE_ARCH} == "arm" || ${MACHINE_ARCH} == "i386" CONFIGURE_ARGS+=-DENABLE_ASSEMBLY=Off Index: distinfo =================================================================== RCS file: /cvs/ports/multimedia/x265/distinfo,v retrieving revision 1.27 diff -u -p -u -p -r1.27 distinfo --- distinfo 7 May 2024 15:01:27 -0000 1.27 +++ distinfo 2 Dec 2024 02:57:14 -0000 @@ -1,2 +1,2 @@ -SHA256 (x265_3.6.tar.gz) = ZjUx80HFOJ9GDXMOYuEKT8yjQoyiyhCWk4Z7xf4uKAc= -SIZE (x265_3.6.tar.gz) = 1655889 +SHA256 (x265_4.1.tar.gz) = oxaZxqiYBrdLAVHl5qffZd5LSQUEgv5ev4pDedevjyk= +SIZE (x265_4.1.tar.gz) = 1725279 Index: patches/patch-source_CMakeLists_txt =================================================================== RCS file: /cvs/ports/multimedia/x265/patches/patch-source_CMakeLists_txt,v retrieving revision 1.8 diff -u -p -u -p -r1.8 patch-source_CMakeLists_txt --- patches/patch-source_CMakeLists_txt 7 May 2024 15:01:27 -0000 1.8 +++ patches/patch-source_CMakeLists_txt 2 Dec 2024 02:57:14 -0000 @@ -1,13 +1,31 @@ +- Add run-time CPU feature detection for FreeBSD / OpenBSD + Index: source/CMakeLists.txt --- source/CMakeLists.txt.orig +++ source/CMakeLists.txt -@@ -523,7 +523,8 @@ if(POWER) +@@ -88,7 +88,7 @@ elseif(ARM64MATCH GREATER "-1") + option(AARCH64_WARNINGS_AS_ERRORS "Build with -Werror for AArch64 Intrinsics files" OFF) + + option(AARCH64_RUNTIME_CPU_DETECT "Enable AArch64 run-time CPU feature detection" ON) +- if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin|Windows") ++ if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|FreeBSD|OpenBSD|Darwin|Windows") + set(AARCH64_RUNTIME_CPU_DETECT OFF CACHE BOOL "" FORCE) + message(STATUS "Run-time CPU feature detection unsupported on this platform") endif() - endif() +@@ -522,6 +522,16 @@ endif() --include(Version) # determine X265_VERSION and X265_LATEST_TAG -+set(X265_VERSION "unknown" CACHE STRING "") -+set(X265_LATEST_TAG "0.0" CACHE STRING "") - include_directories(. common encoder "${PROJECT_BINARY_DIR}") + if(ENABLE_ASSEMBLY) + add_definitions(-DENABLE_ASSEMBLY) ++endif() ++ ++check_symbol_exists(getauxval sys/auxv.h HAVE_GETAUXVAL) ++if(HAVE_GETAUXVAL) ++ add_definitions(-DHAVE_GETAUXVAL=1) ++endif() ++ ++check_symbol_exists(elf_aux_info sys/auxv.h HAVE_ELF_AUX_INFO) ++if(HAVE_ELF_AUX_INFO) ++ add_definitions(-DHAVE_ELF_AUX_INFO=1) + endif() - option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF) + option(CHECKED_BUILD "Enable run-time sanity checks (debugging)" OFF) Index: patches/patch-source_common_aarch64_asm_S =================================================================== RCS file: /cvs/ports/multimedia/x265/patches/patch-source_common_aarch64_asm_S,v retrieving revision 1.4 diff -u -p -u -p -r1.4 patch-source_common_aarch64_asm_S --- patches/patch-source_common_aarch64_asm_S 7 May 2024 15:01:27 -0000 1.4 +++ patches/patch-source_common_aarch64_asm_S 2 Dec 2024 02:57:14 -0000 @@ -1,7 +1,7 @@ Index: source/common/aarch64/asm.S --- source/common/aarch64/asm.S.orig +++ source/common/aarch64/asm.S -@@ -97,6 +97,7 @@ ELF .hidden EXTERN_ASM\name +@@ -107,6 +107,7 @@ ELF .hidden EXTERN_ASM\name ELF .type EXTERN_ASM\name, %function FUNC .func EXTERN_ASM\name EXTERN_ASM\name: Index: patches/patch-source_common_aarch64_cpu_h =================================================================== RCS file: patches/patch-source_common_aarch64_cpu_h diff -N patches/patch-source_common_aarch64_cpu_h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-source_common_aarch64_cpu_h 2 Dec 2024 02:57:14 -0000 @@ -0,0 +1,27 @@ +- Add run-time CPU feature detection for FreeBSD / OpenBSD + +Index: source/common/aarch64/cpu.h +--- source/common/aarch64/cpu.h.orig ++++ source/common/aarch64/cpu.h +@@ -119,7 +119,7 @@ static inline int aarch64_get_cpu_flags() + return flags; + } + +-#elif defined(__linux__) ++#elif HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO + + #include <sys/auxv.h> + +@@ -133,10 +133,10 @@ static inline int aarch64_get_cpu_flags() + int flags = 0; + + #if HAVE_NEON_DOTPROD || HAVE_SVE +- unsigned long hwcap = getauxval(AT_HWCAP); ++ unsigned long hwcap = x265_getauxval(AT_HWCAP); + #endif + #if HAVE_NEON_I8MM || HAVE_SVE2 +- unsigned long hwcap2 = getauxval(AT_HWCAP2); ++ unsigned long hwcap2 = x265_getauxval(AT_HWCAP2); + #endif + + #if HAVE_NEON Index: patches/patch-source_common_aarch64_dct_S =================================================================== RCS file: patches/patch-source_common_aarch64_dct_S diff -N patches/patch-source_common_aarch64_dct_S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-source_common_aarch64_dct_S 2 Dec 2024 02:57:14 -0000 @@ -0,0 +1,44 @@ +Fix for aarch64 xonly. + +Index: source/common/aarch64/dct.S +--- source/common/aarch64/dct.S.orig ++++ source/common/aarch64/dct.S +@@ -32,9 +32,6 @@ + .section .rodata + #endif + +-.align 4 +- +-.text + .set idct16_shift_1, 7 + .set idct16_shift_2, 12-(BIT_DEPTH-8) + +@@ -99,7 +96,10 @@ tbl_const_dct_0: + .word 64, 83, 36, 89, 75, 50, 18, 0 // v0, v1 + .word 90, 87, 80, 70, 57, 43, 25, 9 // v2, v3 + ++.text + ++.align 4 ++ + // ***** idct 16x16 ***** + // void idct16(const int16_t* src, int16_t* dst, intptr_t dstStride) + function PFX(idct16_neon) +@@ -112,7 +112,7 @@ function PFX(idct16_neon) + stp d8, d9, [sp,#-16]! + sub sp, sp, #(16*16*2) + +- adr x8, tbl_const_idct_0 ++ movrel x8, tbl_const_idct_0 + ldp q0, q1, [x8] + + mov x5, sp +@@ -513,7 +513,7 @@ function PFX(dct16_neon) + stp d12, d13, [sp,#-16]! + stp d14, d15, [sp,#-16]! + +- adr x6, tbl_const_dct_0 ++ movrel x6, tbl_const_dct_0 + ld4r {v16.2d, v17.2d, v18.2d, v19.2d}, [x6], #32 + ld4r {v20.2d, v21.2d, v22.2d, v23.2d}, [x6], #32 + ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x6], #64 Index: patches/patch-source_common_aarch64_intrapred_S =================================================================== RCS file: patches/patch-source_common_aarch64_intrapred_S diff -N patches/patch-source_common_aarch64_intrapred_S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-source_common_aarch64_intrapred_S 2 Dec 2024 02:57:14 -0000 @@ -0,0 +1,44 @@ +Fix for aarch64 xonly. + +Index: source/common/aarch64/intrapred.S +--- source/common/aarch64/intrapred.S.orig ++++ source/common/aarch64/intrapred.S +@@ -33,16 +33,16 @@ + #endif + + .align 4 +- +-.text +- +-.align 4 + tbl_const_1to8_7to0: + .byte 1, 2, 3, 4, 5, 6, 7, 8 + .byte 7, 6, 5, 4, 3, 2, 1, 0 + .byte 9, 10, 11, 12, 13, 14, 15, 16 + .byte 15, 14, 13, 12, 11, 10, 9, 8 + ++.text ++ ++.align 4 ++ + // ***** planar_pred ***** + // void planar_pred(pixel* dst, intptr_t dstStride, const pixel* srcPix, int /*dirMode*/, int /*bFilter*/) + function PFX(intra_pred_planar8_neon) +@@ -77,7 +77,7 @@ function PFX(intra_pred_planar8_neon) + ldr x3, [x2, #(2*8+1)] // x3 = left[x]_b + ldr d0, [x2, #1] // v0 = above[x]_b + +- adr x4, tbl_const_1to8_7to0 ++ movrel x4, tbl_const_1to8_7to0 + ldr d4, [x4] // v4 = const_b[8 7 6 5 4 3 2 1] + ldr d5, [x4, #8] // v5 = const_b[7 6 5 4 3 2 1 0] + +@@ -135,7 +135,7 @@ function PFX(intra_pred_planar16_neon) + dup v2.16b, w3 // v2 = topRight_b + dup v3.8h, w4 // v3 = bottomLeft_h + +- adr x4, tbl_const_1to8_7to0 ++ movrel x4, tbl_const_1to8_7to0 + ld2 {v4.2d, v5.2d}, [x4] // v4 = const_b[16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1] + ext v5.16b, v5.16b, v5.16b, #8 // v5 = const_b[15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0] + Index: patches/patch-source_common_cpu_cpp =================================================================== RCS file: patches/patch-source_common_cpu_cpp diff -N patches/patch-source_common_cpu_cpp --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-source_common_cpu_cpp 2 Dec 2024 02:57:14 -0000 @@ -0,0 +1,76 @@ +- Add run-time CPU feature detection for FreeBSD / OpenBSD + +Index: source/common/cpu.cpp +--- source/common/cpu.cpp.orig ++++ source/common/cpu.cpp +@@ -31,6 +31,9 @@ + #include "cpu.h" + #include "common.h" + ++#if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO ++#include <sys/auxv.h> ++#endif + #if MACOS || SYS_FREEBSD + #include <sys/types.h> + #include <sys/sysctl.h> +@@ -41,7 +44,7 @@ + #include <machine/cpu.h> + #endif + +-#if X265_ARCH_ARM && !defined(HAVE_NEON) ++#if X265_ARCH_ARM && !defined(HAVE_NEON) && !(HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO) + #include <signal.h> + #include <setjmp.h> + static sigjmp_buf jmpbuf; +@@ -128,6 +131,22 @@ const cpu_name_t cpu_names[] = + { "", 0 }, + }; + ++unsigned long x265_getauxval(unsigned long type) ++{ ++#if HAVE_GETAUXVAL ++ return getauxval(type); ++#elif HAVE_ELF_AUX_INFO ++ unsigned long aux = 0; ++ int ret = elf_aux_info(type, &aux, sizeof(aux)); ++ if (ret != 0) ++ errno = ret; ++ return aux; ++#else ++ errno = ENOSYS; ++ return 0; ++#endif ++} ++ + #if X265_ARCH_X86 + + extern "C" { +@@ -348,6 +367,8 @@ void PFX(cpu_neon_test)(void); + int PFX(cpu_fast_neon_mrc_test)(void); + } + ++#define X265_ARM_HWCAP_NEON (1U << 12) ++ + uint32_t cpu_detect(bool benableavx512) + { + int flags = 0; +@@ -355,6 +376,11 @@ uint32_t cpu_detect(bool benableavx512) + #if HAVE_ARMV6 + flags |= X265_CPU_ARMV6; + ++#if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO ++ unsigned long hwcap = x265_getauxval(AT_HWCAP); ++ ++ if (hwcap & X265_ARM_HWCAP_NEON) flags |= X265_CPU_NEON; ++#else + // don't do this hack if compiled with -mfpu=neon + #if !HAVE_NEON + static void (* oldsig)(int); +@@ -372,6 +398,7 @@ uint32_t cpu_detect(bool benableavx512) + #endif // if !HAVE_NEON + + flags |= X265_CPU_NEON; ++#endif + + // fast neon -> arm (Cortex-A9) detection relies on user access to the + // cycle counter; this assumes ARMv7 performance counters. Index: patches/patch-source_common_cpu_h =================================================================== RCS file: patches/patch-source_common_cpu_h diff -N patches/patch-source_common_cpu_h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-source_common_cpu_h 2 Dec 2024 02:57:14 -0000 @@ -0,0 +1,14 @@ +- Add run-time CPU feature detection for FreeBSD / OpenBSD + +Index: source/common/cpu.h +--- source/common/cpu.h.orig ++++ source/common/cpu.h +@@ -48,6 +48,8 @@ extern "C" void PFX(safe_intel_cpu_indicator_init)(voi + #define x265_emms() PFX(cpu_emms)() + #endif + ++unsigned long x265_getauxval(unsigned long); ++ + namespace X265_NS { + uint32_t cpu_detect(bool); + bool detect512();