On Sun, Dec 01, 2024 at 07:18:17PM -0500, Brad Smith wrote:
> On Sun, Dec 01, 2024 at 02:48:38AM -0500, Brad Smith wrote:
> > Here is an update to x265 4.1.
> > 
> > Tested on older amd64. Needs testing on modern amd64 for IBT.
> > 
> > aarch64 currently crashes in the NEON code. Could someone take a
> > look at this? Is this xonly or something else?
> 
> An updated diff with Mark's xonly fixes merged in.

Also added a comment above my newly added patches.

 
Index: Makefile
===================================================================
RCS file: /cvs/ports/multimedia/x265/Makefile,v
retrieving revision 1.59
diff -u -p -u -p -r1.59 Makefile
--- Makefile    7 May 2024 15:01:27 -0000       1.59
+++ Makefile    2 Dec 2024 02:57:14 -0000
@@ -1,12 +1,12 @@
 COMMENT=       free H.265/HEVC encoder
 
-VER=           3.6
+VER=           4.1
 DISTNAME=      x265_${VER}
 PKGNAME=       x265-${VER}
 CATEGORIES=    multimedia
 SITES=         https://bitbucket.org/multicoreware/x265_git/downloads/
 
-SHARED_LIBS=   x265    24.0
+SHARED_LIBS=   x265    25.0
 
 HOMEPAGE=      https://x265.org/
 
@@ -30,10 +30,8 @@ BUILD_DEPENDS+=      devel/nasm
 
 CONFIGURE_ARGS+=-DCMAKE_ASM_YASM_FLAGS_DEBUG="-g dwarf2" \
                -DENABLE_PIC=On \
-               -DENABLE_TESTS=On
-
-CONFIGURE_ARGS+=-DX265_VERSION=${VER} \
-               -DX265_LATEST_TAG=${VER}
+               -DENABLE_TESTS=On \
+               -DGIT_ARCHETYPE=1
 
 .if ${MACHINE_ARCH} == "arm" || ${MACHINE_ARCH} == "i386"
 CONFIGURE_ARGS+=-DENABLE_ASSEMBLY=Off
Index: distinfo
===================================================================
RCS file: /cvs/ports/multimedia/x265/distinfo,v
retrieving revision 1.27
diff -u -p -u -p -r1.27 distinfo
--- distinfo    7 May 2024 15:01:27 -0000       1.27
+++ distinfo    2 Dec 2024 02:57:14 -0000
@@ -1,2 +1,2 @@
-SHA256 (x265_3.6.tar.gz) = ZjUx80HFOJ9GDXMOYuEKT8yjQoyiyhCWk4Z7xf4uKAc=
-SIZE (x265_3.6.tar.gz) = 1655889
+SHA256 (x265_4.1.tar.gz) = oxaZxqiYBrdLAVHl5qffZd5LSQUEgv5ev4pDedevjyk=
+SIZE (x265_4.1.tar.gz) = 1725279
Index: patches/patch-source_CMakeLists_txt
===================================================================
RCS file: /cvs/ports/multimedia/x265/patches/patch-source_CMakeLists_txt,v
retrieving revision 1.8
diff -u -p -u -p -r1.8 patch-source_CMakeLists_txt
--- patches/patch-source_CMakeLists_txt 7 May 2024 15:01:27 -0000       1.8
+++ patches/patch-source_CMakeLists_txt 2 Dec 2024 02:57:14 -0000
@@ -1,13 +1,31 @@
+- Add run-time CPU feature detection for FreeBSD / OpenBSD
+
 Index: source/CMakeLists.txt
 --- source/CMakeLists.txt.orig
 +++ source/CMakeLists.txt
-@@ -523,7 +523,8 @@ if(POWER)
+@@ -88,7 +88,7 @@ elseif(ARM64MATCH GREATER "-1")
+     option(AARCH64_WARNINGS_AS_ERRORS "Build with -Werror for AArch64 
Intrinsics files" OFF)
+ 
+     option(AARCH64_RUNTIME_CPU_DETECT "Enable AArch64 run-time CPU feature 
detection" ON)
+-    if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin|Windows")
++    if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|FreeBSD|OpenBSD|Darwin|Windows")
+         set(AARCH64_RUNTIME_CPU_DETECT OFF CACHE BOOL "" FORCE)
+         message(STATUS "Run-time CPU feature detection unsupported on this 
platform")
      endif()
- endif()
+@@ -522,6 +522,16 @@ endif()
  
--include(Version) # determine X265_VERSION and X265_LATEST_TAG
-+set(X265_VERSION "unknown" CACHE STRING "")
-+set(X265_LATEST_TAG "0.0" CACHE STRING "")
- include_directories(. common encoder "${PROJECT_BINARY_DIR}")
+ if(ENABLE_ASSEMBLY)
+    add_definitions(-DENABLE_ASSEMBLY)
++endif()
++
++check_symbol_exists(getauxval sys/auxv.h HAVE_GETAUXVAL)
++if(HAVE_GETAUXVAL)
++    add_definitions(-DHAVE_GETAUXVAL=1)
++endif()
++
++check_symbol_exists(elf_aux_info sys/auxv.h HAVE_ELF_AUX_INFO)
++if(HAVE_ELF_AUX_INFO)
++    add_definitions(-DHAVE_ELF_AUX_INFO=1)
+ endif()
  
- option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
+ option(CHECKED_BUILD "Enable run-time sanity checks (debugging)" OFF)
Index: patches/patch-source_common_aarch64_asm_S
===================================================================
RCS file: /cvs/ports/multimedia/x265/patches/patch-source_common_aarch64_asm_S,v
retrieving revision 1.4
diff -u -p -u -p -r1.4 patch-source_common_aarch64_asm_S
--- patches/patch-source_common_aarch64_asm_S   7 May 2024 15:01:27 -0000       
1.4
+++ patches/patch-source_common_aarch64_asm_S   2 Dec 2024 02:57:14 -0000
@@ -1,7 +1,7 @@
 Index: source/common/aarch64/asm.S
 --- source/common/aarch64/asm.S.orig
 +++ source/common/aarch64/asm.S
-@@ -97,6 +97,7 @@ ELF     .hidden EXTERN_ASM\name
+@@ -107,6 +107,7 @@ ELF     .hidden EXTERN_ASM\name
  ELF     .type   EXTERN_ASM\name, %function
  FUNC    .func   EXTERN_ASM\name
  EXTERN_ASM\name:
Index: patches/patch-source_common_aarch64_cpu_h
===================================================================
RCS file: patches/patch-source_common_aarch64_cpu_h
diff -N patches/patch-source_common_aarch64_cpu_h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-source_common_aarch64_cpu_h   2 Dec 2024 02:57:14 -0000
@@ -0,0 +1,27 @@
+- Add run-time CPU feature detection for FreeBSD / OpenBSD
+
+Index: source/common/aarch64/cpu.h
+--- source/common/aarch64/cpu.h.orig
++++ source/common/aarch64/cpu.h
+@@ -119,7 +119,7 @@ static inline int aarch64_get_cpu_flags()
+     return flags;
+ }
+ 
+-#elif defined(__linux__)
++#elif HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO
+ 
+ #include <sys/auxv.h>
+ 
+@@ -133,10 +133,10 @@ static inline int aarch64_get_cpu_flags()
+     int flags = 0;
+ 
+ #if HAVE_NEON_DOTPROD || HAVE_SVE
+-    unsigned long hwcap = getauxval(AT_HWCAP);
++    unsigned long hwcap = x265_getauxval(AT_HWCAP);
+ #endif
+ #if HAVE_NEON_I8MM || HAVE_SVE2
+-    unsigned long hwcap2 = getauxval(AT_HWCAP2);
++    unsigned long hwcap2 = x265_getauxval(AT_HWCAP2);
+ #endif
+ 
+ #if HAVE_NEON
Index: patches/patch-source_common_aarch64_dct_S
===================================================================
RCS file: patches/patch-source_common_aarch64_dct_S
diff -N patches/patch-source_common_aarch64_dct_S
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-source_common_aarch64_dct_S   2 Dec 2024 02:57:14 -0000
@@ -0,0 +1,44 @@
+Fix for aarch64 xonly.
+
+Index: source/common/aarch64/dct.S
+--- source/common/aarch64/dct.S.orig
++++ source/common/aarch64/dct.S
+@@ -32,9 +32,6 @@
+ .section .rodata
+ #endif
+ 
+-.align 4
+-
+-.text
+ .set idct16_shift_1, 7
+ .set idct16_shift_2, 12-(BIT_DEPTH-8)
+ 
+@@ -99,7 +96,10 @@ tbl_const_dct_0:
+     .word 64, 83, 36, 89, 75, 50, 18,  0    // v0, v1
+     .word 90, 87, 80, 70, 57, 43, 25,  9    // v2, v3
+ 
++.text
+ 
++.align 4
++
+ // ***** idct 16x16 *****
+ // void idct16(const int16_t* src, int16_t* dst, intptr_t dstStride)
+ function PFX(idct16_neon)
+@@ -112,7 +112,7 @@ function PFX(idct16_neon)
+     stp             d8, d9, [sp,#-16]!
+     sub             sp, sp, #(16*16*2)
+ 
+-    adr             x8, tbl_const_idct_0
++    movrel          x8, tbl_const_idct_0
+     ldp             q0, q1, [x8]
+ 
+     mov             x5, sp
+@@ -513,7 +513,7 @@ function PFX(dct16_neon)
+     stp             d12, d13, [sp,#-16]!
+     stp             d14, d15, [sp,#-16]!
+ 
+-    adr             x6, tbl_const_dct_0
++    movrel          x6, tbl_const_dct_0
+     ld4r            {v16.2d, v17.2d, v18.2d, v19.2d}, [x6], #32
+     ld4r            {v20.2d, v21.2d, v22.2d, v23.2d}, [x6], #32
+     ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x6], #64
Index: patches/patch-source_common_aarch64_intrapred_S
===================================================================
RCS file: patches/patch-source_common_aarch64_intrapred_S
diff -N patches/patch-source_common_aarch64_intrapred_S
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-source_common_aarch64_intrapred_S     2 Dec 2024 02:57:14 
-0000
@@ -0,0 +1,44 @@
+Fix for aarch64 xonly.
+
+Index: source/common/aarch64/intrapred.S
+--- source/common/aarch64/intrapred.S.orig
++++ source/common/aarch64/intrapred.S
+@@ -33,16 +33,16 @@
+ #endif
+ 
+ .align 4
+-
+-.text
+-
+-.align 4
+ tbl_const_1to8_7to0:
+     .byte 1, 2, 3, 4, 5, 6, 7, 8
+     .byte 7, 6, 5, 4, 3, 2, 1, 0
+     .byte 9, 10, 11, 12, 13, 14, 15, 16
+     .byte 15, 14, 13, 12, 11, 10, 9, 8
+ 
++.text
++
++.align 4
++
+ // ***** planar_pred *****
+ // void planar_pred(pixel* dst, intptr_t dstStride, const pixel* srcPix, int 
/*dirMode*/, int /*bFilter*/)
+ function PFX(intra_pred_planar8_neon)
+@@ -77,7 +77,7 @@ function PFX(intra_pred_planar8_neon)
+     ldr             x3, [x2, #(2*8+1)]              // x3 = left[x]_b
+     ldr             d0, [x2, #1]                    // v0 = above[x]_b
+ 
+-    adr             x4, tbl_const_1to8_7to0
++    movrel          x4, tbl_const_1to8_7to0
+     ldr             d4, [x4]                        // v4 = const_b[8 7 6 5 4 
3 2 1]
+     ldr             d5, [x4, #8]                    // v5 = const_b[7 6 5 4 3 
2 1 0]
+ 
+@@ -135,7 +135,7 @@ function PFX(intra_pred_planar16_neon)
+     dup             v2.16b, w3                      // v2 = topRight_b
+     dup             v3.8h, w4                       // v3 = bottomLeft_h
+ 
+-    adr             x4, tbl_const_1to8_7to0
++    movrel          x4, tbl_const_1to8_7to0
+     ld2             {v4.2d, v5.2d}, [x4]            // v4 = const_b[16 15 14 
13 12 11 10 9 8 7 6 5 4 3 2 1]
+     ext             v5.16b, v5.16b, v5.16b, #8      // v5 = const_b[15 14 13 
12 11 10 9 8 7 6 5 4 3 2 1 0]
+ 
Index: patches/patch-source_common_cpu_cpp
===================================================================
RCS file: patches/patch-source_common_cpu_cpp
diff -N patches/patch-source_common_cpu_cpp
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-source_common_cpu_cpp 2 Dec 2024 02:57:14 -0000
@@ -0,0 +1,76 @@
+- Add run-time CPU feature detection for FreeBSD / OpenBSD
+
+Index: source/common/cpu.cpp
+--- source/common/cpu.cpp.orig
++++ source/common/cpu.cpp
+@@ -31,6 +31,9 @@
+ #include "cpu.h"
+ #include "common.h"
+ 
++#if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO
++#include <sys/auxv.h>
++#endif
+ #if MACOS || SYS_FREEBSD
+ #include <sys/types.h>
+ #include <sys/sysctl.h>
+@@ -41,7 +44,7 @@
+ #include <machine/cpu.h>
+ #endif
+ 
+-#if X265_ARCH_ARM && !defined(HAVE_NEON)
++#if X265_ARCH_ARM && !defined(HAVE_NEON) && !(HAVE_GETAUXVAL || 
HAVE_ELF_AUX_INFO)
+ #include <signal.h>
+ #include <setjmp.h>
+ static sigjmp_buf jmpbuf;
+@@ -128,6 +131,22 @@ const cpu_name_t cpu_names[] =
+     { "", 0 },
+ };
+ 
++unsigned long x265_getauxval(unsigned long type)
++{
++#if HAVE_GETAUXVAL
++    return getauxval(type);
++#elif HAVE_ELF_AUX_INFO
++    unsigned long aux = 0;
++    int ret = elf_aux_info(type, &aux, sizeof(aux));
++    if (ret != 0)
++        errno = ret;
++    return aux;
++#else
++    errno = ENOSYS;
++    return 0;
++#endif
++}
++
+ #if X265_ARCH_X86
+ 
+ extern "C" {
+@@ -348,6 +367,8 @@ void PFX(cpu_neon_test)(void);
+ int PFX(cpu_fast_neon_mrc_test)(void);
+ }
+ 
++#define X265_ARM_HWCAP_NEON (1U << 12)
++
+ uint32_t cpu_detect(bool benableavx512)
+ {
+     int flags = 0;
+@@ -355,6 +376,11 @@ uint32_t cpu_detect(bool benableavx512)
+ #if HAVE_ARMV6
+     flags |= X265_CPU_ARMV6;
+ 
++#if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO
++    unsigned long hwcap = x265_getauxval(AT_HWCAP);
++
++    if (hwcap & X265_ARM_HWCAP_NEON) flags |= X265_CPU_NEON;
++#else
+     // don't do this hack if compiled with -mfpu=neon
+ #if !HAVE_NEON
+     static void (* oldsig)(int);
+@@ -372,6 +398,7 @@ uint32_t cpu_detect(bool benableavx512)
+ #endif // if !HAVE_NEON
+ 
+     flags |= X265_CPU_NEON;
++#endif
+ 
+     // fast neon -> arm (Cortex-A9) detection relies on user access to the
+     // cycle counter; this assumes ARMv7 performance counters.
Index: patches/patch-source_common_cpu_h
===================================================================
RCS file: patches/patch-source_common_cpu_h
diff -N patches/patch-source_common_cpu_h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-source_common_cpu_h   2 Dec 2024 02:57:14 -0000
@@ -0,0 +1,14 @@
+- Add run-time CPU feature detection for FreeBSD / OpenBSD
+
+Index: source/common/cpu.h
+--- source/common/cpu.h.orig
++++ source/common/cpu.h
+@@ -48,6 +48,8 @@ extern "C" void PFX(safe_intel_cpu_indicator_init)(voi
+ #define x265_emms() PFX(cpu_emms)()
+ #endif
+ 
++unsigned long x265_getauxval(unsigned long);
++
+ namespace X265_NS {
+ uint32_t cpu_detect(bool);
+ bool detect512();

Reply via email to