Hi,

We have marked graphics/ffmpeg (and some ports using it) with USE_NOBTCFI=Yes 
as 
the library doesn't have proper function annotation regarding IBT on amd64.

But the list of ports using ffmpeg as library is huge:

        $ show-reverse-deps -v graphics/ffmpeg | grep LIB_DEPENDS | wc -l
             217

The following patch makes the ffmpeg functions to have the `endbr64' 
instruction, and the testsuite of ffmpeg passes on IBT machine.

I added a _CET_ENDBR define (name following <cet.h> convention) as some code is 
shared between amd64 and i386. So only amd64 should have `endbr64' instruction 
(I don't tested it on i386, only checked that the %else branch would compile).

Most of the ffmpeg functions are using a macro for the declaration (cglobal). 
So 
i just added _CET_ENDBR at the beginning.

The testsuite showed that libavcodec/x86/fft.asm has several functions not 
using 
`cglobal' and a jumptable to properly choose between fft implementations 
(depending cpuflags). I added _CET_ENDBR here too.

It should permit to avoid marking large portion of the port tree with 
USE_NOBTCFI=Yes .

Comments or OK ?
-- 
Sebastien Marie

Index: Makefile
===================================================================
RCS file: /cvs/ports/graphics/ffmpeg/Makefile,v
retrieving revision 1.233
diff -u -p -r1.233 Makefile
--- Makefile    12 Jun 2023 22:58:00 -0000      1.233
+++ Makefile    13 Jun 2023 07:58:36 -0000
@@ -3,12 +3,10 @@ COMMENT=      audio/video converter and strea
 V=             4.4.4
 DISTNAME=      ffmpeg-${V}
 EPOCH=         1
-REVISION=      0
+REVISION=      1
 CATEGORIES=    graphics multimedia
 MASTER_SITES=  https://ffmpeg.org/releases/
 EXTRACT_SUFX=  .tar.xz
-
-USE_NOBTCFI=   Yes
 
 SHARED_LIBS=   avcodec         25.0 \
                avdevice        13.0 \
Index: patches/patch-libavcodec_x86_fft_asm
===================================================================
RCS file: /cvs/ports/graphics/ffmpeg/patches/patch-libavcodec_x86_fft_asm,v
retrieving revision 1.1
diff -u -p -r1.1 patch-libavcodec_x86_fft_asm
--- patches/patch-libavcodec_x86_fft_asm        24 Jan 2023 14:13:12 -0000      
1.1
+++ patches/patch-libavcodec_x86_fft_asm        13 Jun 2023 07:58:36 -0000
@@ -1,7 +1,90 @@
+- place a table in rodata
+- use _CET_ENDBR in functions header
+
 Index: libavcodec/x86/fft.asm
 --- libavcodec/x86/fft.asm.orig
 +++ libavcodec/x86/fft.asm
-@@ -548,10 +548,6 @@ DEFINE_ARGS zc, w, n, o1, o3
+@@ -325,6 +325,7 @@ INIT_YMM avx
+ %if HAVE_AVX_EXTERNAL
+ align 16
+ fft8_avx:
++    _CET_ENDBR
+     mova      m0, Z(0)
+     mova      m1, Z(1)
+     T8_AVX    m0, m1, m2, m3, m4
+@@ -335,6 +336,7 @@ fft8_avx:
+ 
+ align 16
+ fft16_avx:
++    _CET_ENDBR
+     mova       m2, Z(2)
+     mova       m3, Z(3)
+     T4_SSE     m2, m3, m7
+@@ -372,6 +374,7 @@ fft16_avx:
+ 
+ align 16
+ fft32_avx:
++    _CET_ENDBR
+     call fft16_avx
+ 
+     mova m0, Z(4)
+@@ -396,6 +399,7 @@ fft32_avx:
+     ret
+ 
+ fft32_interleave_avx:
++    _CET_ENDBR
+     call fft32_avx
+     mov r2d, 32
+ .deint_loop:
+@@ -419,6 +423,7 @@ INIT_XMM sse
+ align 16
+ fft4_avx:
+ fft4_sse:
++    _CET_ENDBR
+     mova     m0, Z(0)
+     mova     m1, Z(1)
+     T4_SSE   m0, m1, m2
+@@ -428,6 +433,7 @@ fft4_sse:
+ 
+ align 16
+ fft8_sse:
++    _CET_ENDBR
+     mova     m0, Z(0)
+     mova     m1, Z(1)
+     T4_SSE   m0, m1, m2
+@@ -442,6 +448,7 @@ fft8_sse:
+ 
+ align 16
+ fft16_sse:
++    _CET_ENDBR
+     mova     m0, Z(0)
+     mova     m1, Z(1)
+     T4_SSE   m0, m1, m2
+@@ -465,6 +472,7 @@ fft16_sse:
+ %macro FFT48_3DNOW 0
+ align 16
+ fft4 %+ SUFFIX:
++    _CET_ENDBR
+     T2_3DNOW m0, m1, Z(0), Z(1)
+     mova     m2, Z(2)
+     mova     m3, Z(3)
+@@ -479,6 +487,7 @@ fft4 %+ SUFFIX:
+ 
+ align 16
+ fft8 %+ SUFFIX:
++    _CET_ENDBR
+     T2_3DNOW m0, m1, Z(0), Z(1)
+     mova     m2, Z(2)
+     mova     m3, Z(3)
+@@ -532,6 +541,7 @@ FFT48_3DNOW
+ %macro DECL_PASS 2+ ; name, payload
+ align 16
+ %1:
++    _CET_ENDBR
+ DEFINE_ARGS zc, w, n, o1, o3
+     lea o3q, [nq*3]
+     lea o1q, [nq*8]
+@@ -548,10 +558,6 @@ DEFINE_ARGS zc, w, n, o1, o3
  %macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
      lea r2, [dispatch_tab%1]
      mov r2, [r2 + (%2q-2)*gprsize]
@@ -12,7 +95,7 @@ Index: libavcodec/x86/fft.asm
      call r2
  %endmacro ; FFT_DISPATCH
  
-@@ -731,11 +727,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
+@@ -731,11 +737,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
  %define pass_interleave_3dnowext pass_interleave_3dnow
  %endif
  
@@ -24,7 +107,15 @@ Index: libavcodec/x86/fft.asm
  
  %macro DECL_FFT 1-2 ; nbits, suffix
  %ifidn %0, 1
-@@ -773,8 +765,10 @@ fft %+ n %+ fullsuffix:
+@@ -759,6 +761,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
+ 
+ align 16
+ fft %+ n %+ fullsuffix:
++    _CET_ENDBR
+     call fft %+ n2 %+ SUFFIX
+     add r0, n*4 - (n&(-2<<%1))
+     call fft %+ n4 %+ SUFFIX
+@@ -773,8 +776,10 @@ fft %+ n %+ fullsuffix:
  %endrep
  %undef n
  
Index: patches/patch-libavutil_x86_x86inc_asm
===================================================================
RCS file: patches/patch-libavutil_x86_x86inc_asm
diff -N patches/patch-libavutil_x86_x86inc_asm
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-libavutil_x86_x86inc_asm      13 Jun 2023 07:58:36 -0000
@@ -0,0 +1,27 @@
+- define _CET_ENDBR depending X86_64 / X86
+- use it in cglobal macro
+
+Index: libavutil/x86/x86inc.asm
+--- libavutil/x86/x86inc.asm.orig
++++ libavutil/x86/x86inc.asm
+@@ -53,6 +53,12 @@
+     %endif
+ %endif
+ 
++%if ARCH_X86_64
++    %define _CET_ENDBR endbr64
++%else
++    %define _CET_ENDBR
++%endif
++
+ %define WIN64  0
+ %define UNIX64 0
+ %if ARCH_X86_64
+@@ -746,6 +752,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg,
+     %endif
+     align function_align
+     %2:
++    _CET_ENDBR
+     RESET_MM_PERMUTATION        ; needed for x86-64, also makes disassembly 
somewhat nicer
+     %xdefine rstk rsp           ; copy of the original stack pointer, used 
when greater alignment than the known stack alignment is required
+     %assign stack_offset 0      ; stack pointer offset relative to the return 
address
Index: Makefile
===================================================================
RCS file: /cvs/ports/graphics/ffmpeg/Makefile,v
retrieving revision 1.233
diff -u -p -r1.233 Makefile
--- Makefile    12 Jun 2023 22:58:00 -0000      1.233
+++ Makefile    13 Jun 2023 07:58:36 -0000
@@ -3,12 +3,10 @@ COMMENT=      audio/video converter and strea
 V=             4.4.4
 DISTNAME=      ffmpeg-${V}
 EPOCH=         1
-REVISION=      0
+REVISION=      1
 CATEGORIES=    graphics multimedia
 MASTER_SITES=  https://ffmpeg.org/releases/
 EXTRACT_SUFX=  .tar.xz
-
-USE_NOBTCFI=   Yes
 
 SHARED_LIBS=   avcodec         25.0 \
                avdevice        13.0 \
Index: patches/patch-libavcodec_x86_fft_asm
===================================================================
RCS file: /cvs/ports/graphics/ffmpeg/patches/patch-libavcodec_x86_fft_asm,v
retrieving revision 1.1
diff -u -p -r1.1 patch-libavcodec_x86_fft_asm
--- patches/patch-libavcodec_x86_fft_asm        24 Jan 2023 14:13:12 -0000      
1.1
+++ patches/patch-libavcodec_x86_fft_asm        13 Jun 2023 07:58:36 -0000
@@ -1,7 +1,90 @@
+- place a table in rodata
+- use _CET_ENDBR in functions header
+
 Index: libavcodec/x86/fft.asm
 --- libavcodec/x86/fft.asm.orig
 +++ libavcodec/x86/fft.asm
-@@ -548,10 +548,6 @@ DEFINE_ARGS zc, w, n, o1, o3
+@@ -325,6 +325,7 @@ INIT_YMM avx
+ %if HAVE_AVX_EXTERNAL
+ align 16
+ fft8_avx:
++    _CET_ENDBR
+     mova      m0, Z(0)
+     mova      m1, Z(1)
+     T8_AVX    m0, m1, m2, m3, m4
+@@ -335,6 +336,7 @@ fft8_avx:
+ 
+ align 16
+ fft16_avx:
++    _CET_ENDBR
+     mova       m2, Z(2)
+     mova       m3, Z(3)
+     T4_SSE     m2, m3, m7
+@@ -372,6 +374,7 @@ fft16_avx:
+ 
+ align 16
+ fft32_avx:
++    _CET_ENDBR
+     call fft16_avx
+ 
+     mova m0, Z(4)
+@@ -396,6 +399,7 @@ fft32_avx:
+     ret
+ 
+ fft32_interleave_avx:
++    _CET_ENDBR
+     call fft32_avx
+     mov r2d, 32
+ .deint_loop:
+@@ -419,6 +423,7 @@ INIT_XMM sse
+ align 16
+ fft4_avx:
+ fft4_sse:
++    _CET_ENDBR
+     mova     m0, Z(0)
+     mova     m1, Z(1)
+     T4_SSE   m0, m1, m2
+@@ -428,6 +433,7 @@ fft4_sse:
+ 
+ align 16
+ fft8_sse:
++    _CET_ENDBR
+     mova     m0, Z(0)
+     mova     m1, Z(1)
+     T4_SSE   m0, m1, m2
+@@ -442,6 +448,7 @@ fft8_sse:
+ 
+ align 16
+ fft16_sse:
++    _CET_ENDBR
+     mova     m0, Z(0)
+     mova     m1, Z(1)
+     T4_SSE   m0, m1, m2
+@@ -465,6 +472,7 @@ fft16_sse:
+ %macro FFT48_3DNOW 0
+ align 16
+ fft4 %+ SUFFIX:
++    _CET_ENDBR
+     T2_3DNOW m0, m1, Z(0), Z(1)
+     mova     m2, Z(2)
+     mova     m3, Z(3)
+@@ -479,6 +487,7 @@ fft4 %+ SUFFIX:
+ 
+ align 16
+ fft8 %+ SUFFIX:
++    _CET_ENDBR
+     T2_3DNOW m0, m1, Z(0), Z(1)
+     mova     m2, Z(2)
+     mova     m3, Z(3)
+@@ -532,6 +541,7 @@ FFT48_3DNOW
+ %macro DECL_PASS 2+ ; name, payload
+ align 16
+ %1:
++    _CET_ENDBR
+ DEFINE_ARGS zc, w, n, o1, o3
+     lea o3q, [nq*3]
+     lea o1q, [nq*8]
+@@ -548,10 +558,6 @@ DEFINE_ARGS zc, w, n, o1, o3
  %macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
      lea r2, [dispatch_tab%1]
      mov r2, [r2 + (%2q-2)*gprsize]
@@ -12,7 +95,7 @@ Index: libavcodec/x86/fft.asm
      call r2
  %endmacro ; FFT_DISPATCH
  
-@@ -731,11 +727,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
+@@ -731,11 +737,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
  %define pass_interleave_3dnowext pass_interleave_3dnow
  %endif
  
@@ -24,7 +107,15 @@ Index: libavcodec/x86/fft.asm
  
  %macro DECL_FFT 1-2 ; nbits, suffix
  %ifidn %0, 1
-@@ -773,8 +765,10 @@ fft %+ n %+ fullsuffix:
+@@ -759,6 +761,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
+ 
+ align 16
+ fft %+ n %+ fullsuffix:
++    _CET_ENDBR
+     call fft %+ n2 %+ SUFFIX
+     add r0, n*4 - (n&(-2<<%1))
+     call fft %+ n4 %+ SUFFIX
+@@ -773,8 +776,10 @@ fft %+ n %+ fullsuffix:
  %endrep
  %undef n
  
Index: patches/patch-libavutil_x86_x86inc_asm
===================================================================
RCS file: patches/patch-libavutil_x86_x86inc_asm
diff -N patches/patch-libavutil_x86_x86inc_asm
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-libavutil_x86_x86inc_asm      13 Jun 2023 07:58:36 -0000
@@ -0,0 +1,27 @@
+- define _CET_ENDBR depending X86_64 / X86
+- use it in cglobal macro
+
+Index: libavutil/x86/x86inc.asm
+--- libavutil/x86/x86inc.asm.orig
++++ libavutil/x86/x86inc.asm
+@@ -53,6 +53,12 @@
+     %endif
+ %endif
+ 
++%if ARCH_X86_64
++    %define _CET_ENDBR endbr64
++%else
++    %define _CET_ENDBR
++%endif
++
+ %define WIN64  0
+ %define UNIX64 0
+ %if ARCH_X86_64
+@@ -746,6 +752,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg,
+     %endif
+     align function_align
+     %2:
++    _CET_ENDBR
+     RESET_MM_PERMUTATION        ; needed for x86-64, also makes disassembly 
somewhat nicer
+     %xdefine rstk rsp           ; copy of the original stack pointer, used 
when greater alignment than the known stack alignment is required
+     %assign stack_offset 0      ; stack pointer offset relative to the return 
address

Reply via email to