Hi, We have marked graphics/ffmpeg (and some ports using it) with USE_NOBTCFI=Yes as the library doesn't have proper function annotation regarding IBT on amd64.
But the list of ports using ffmpeg as library is huge: $ show-reverse-deps -v graphics/ffmpeg | grep LIB_DEPENDS | wc -l 217 The following patch makes the ffmpeg functions to have the `endbr64' instruction, and the testsuite of ffmpeg passes on IBT machine. I added a _CET_ENDBR define (name following <cet.h> convention) as some code is shared between amd64 and i386. So only amd64 should have `endbr64' instruction (I don't tested it on i386, only checked that the %else branch would compile). Most of the ffmpeg functions are using a macro for the declaration (cglobal). So i just added _CET_ENDBR at the beginning. The testsuite showed that libavcodec/x86/fft.asm has several functions not using `cglobal' and a jumptable to properly choose between fft implementations (depending cpuflags). I added _CET_ENDBR here too. It should permit to avoid marking large portion of the port tree with USE_NOBTCFI=Yes . Comments or OK ? -- Sebastien Marie Index: Makefile =================================================================== RCS file: /cvs/ports/graphics/ffmpeg/Makefile,v retrieving revision 1.233 diff -u -p -r1.233 Makefile --- Makefile 12 Jun 2023 22:58:00 -0000 1.233 +++ Makefile 13 Jun 2023 07:58:36 -0000 @@ -3,12 +3,10 @@ COMMENT= audio/video converter and strea V= 4.4.4 DISTNAME= ffmpeg-${V} EPOCH= 1 -REVISION= 0 +REVISION= 1 CATEGORIES= graphics multimedia MASTER_SITES= https://ffmpeg.org/releases/ EXTRACT_SUFX= .tar.xz - -USE_NOBTCFI= Yes SHARED_LIBS= avcodec 25.0 \ avdevice 13.0 \ Index: patches/patch-libavcodec_x86_fft_asm =================================================================== RCS file: /cvs/ports/graphics/ffmpeg/patches/patch-libavcodec_x86_fft_asm,v retrieving revision 1.1 diff -u -p -r1.1 patch-libavcodec_x86_fft_asm --- patches/patch-libavcodec_x86_fft_asm 24 Jan 2023 14:13:12 -0000 1.1 +++ patches/patch-libavcodec_x86_fft_asm 13 Jun 2023 07:58:36 -0000 @@ -1,7 +1,90 @@ +- place a table in rodata +- use _CET_ENDBR in functions header + Index: libavcodec/x86/fft.asm --- libavcodec/x86/fft.asm.orig +++ libavcodec/x86/fft.asm -@@ -548,10 +548,6 @@ DEFINE_ARGS zc, w, n, o1, o3 +@@ -325,6 +325,7 @@ INIT_YMM avx + %if HAVE_AVX_EXTERNAL + align 16 + fft8_avx: ++ _CET_ENDBR + mova m0, Z(0) + mova m1, Z(1) + T8_AVX m0, m1, m2, m3, m4 +@@ -335,6 +336,7 @@ fft8_avx: + + align 16 + fft16_avx: ++ _CET_ENDBR + mova m2, Z(2) + mova m3, Z(3) + T4_SSE m2, m3, m7 +@@ -372,6 +374,7 @@ fft16_avx: + + align 16 + fft32_avx: ++ _CET_ENDBR + call fft16_avx + + mova m0, Z(4) +@@ -396,6 +399,7 @@ fft32_avx: + ret + + fft32_interleave_avx: ++ _CET_ENDBR + call fft32_avx + mov r2d, 32 + .deint_loop: +@@ -419,6 +423,7 @@ INIT_XMM sse + align 16 + fft4_avx: + fft4_sse: ++ _CET_ENDBR + mova m0, Z(0) + mova m1, Z(1) + T4_SSE m0, m1, m2 +@@ -428,6 +433,7 @@ fft4_sse: + + align 16 + fft8_sse: ++ _CET_ENDBR + mova m0, Z(0) + mova m1, Z(1) + T4_SSE m0, m1, m2 +@@ -442,6 +448,7 @@ fft8_sse: + + align 16 + fft16_sse: ++ _CET_ENDBR + mova m0, Z(0) + mova m1, Z(1) + T4_SSE m0, m1, m2 +@@ -465,6 +472,7 @@ fft16_sse: + %macro FFT48_3DNOW 0 + align 16 + fft4 %+ SUFFIX: ++ _CET_ENDBR + T2_3DNOW m0, m1, Z(0), Z(1) + mova m2, Z(2) + mova m3, Z(3) +@@ -479,6 +487,7 @@ fft4 %+ SUFFIX: + + align 16 + fft8 %+ SUFFIX: ++ _CET_ENDBR + T2_3DNOW m0, m1, Z(0), Z(1) + mova m2, Z(2) + mova m3, Z(3) +@@ -532,6 +541,7 @@ FFT48_3DNOW + %macro DECL_PASS 2+ ; name, payload + align 16 + %1: ++ _CET_ENDBR + DEFINE_ARGS zc, w, n, o1, o3 + lea o3q, [nq*3] + lea o1q, [nq*8] +@@ -548,10 +558,6 @@ DEFINE_ARGS zc, w, n, o1, o3 %macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs lea r2, [dispatch_tab%1] mov r2, [r2 + (%2q-2)*gprsize] @@ -12,7 +95,7 @@ Index: libavcodec/x86/fft.asm call r2 %endmacro ; FFT_DISPATCH -@@ -731,11 +727,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0 +@@ -731,11 +737,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0 %define pass_interleave_3dnowext pass_interleave_3dnow %endif @@ -24,7 +107,15 @@ Index: libavcodec/x86/fft.asm %macro DECL_FFT 1-2 ; nbits, suffix %ifidn %0, 1 -@@ -773,8 +765,10 @@ fft %+ n %+ fullsuffix: +@@ -759,6 +761,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0 + + align 16 + fft %+ n %+ fullsuffix: ++ _CET_ENDBR + call fft %+ n2 %+ SUFFIX + add r0, n*4 - (n&(-2<<%1)) + call fft %+ n4 %+ SUFFIX +@@ -773,8 +776,10 @@ fft %+ n %+ fullsuffix: %endrep %undef n Index: patches/patch-libavutil_x86_x86inc_asm =================================================================== RCS file: patches/patch-libavutil_x86_x86inc_asm diff -N patches/patch-libavutil_x86_x86inc_asm --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-libavutil_x86_x86inc_asm 13 Jun 2023 07:58:36 -0000 @@ -0,0 +1,27 @@ +- define _CET_ENDBR depending X86_64 / X86 +- use it in cglobal macro + +Index: libavutil/x86/x86inc.asm +--- libavutil/x86/x86inc.asm.orig ++++ libavutil/x86/x86inc.asm +@@ -53,6 +53,12 @@ + %endif + %endif + ++%if ARCH_X86_64 ++ %define _CET_ENDBR endbr64 ++%else ++ %define _CET_ENDBR ++%endif ++ + %define WIN64 0 + %define UNIX64 0 + %if ARCH_X86_64 +@@ -746,6 +752,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, + %endif + align function_align + %2: ++ _CET_ENDBR + RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer + %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required + %assign stack_offset 0 ; stack pointer offset relative to the return address
Index: Makefile =================================================================== RCS file: /cvs/ports/graphics/ffmpeg/Makefile,v retrieving revision 1.233 diff -u -p -r1.233 Makefile --- Makefile 12 Jun 2023 22:58:00 -0000 1.233 +++ Makefile 13 Jun 2023 07:58:36 -0000 @@ -3,12 +3,10 @@ COMMENT= audio/video converter and strea V= 4.4.4 DISTNAME= ffmpeg-${V} EPOCH= 1 -REVISION= 0 +REVISION= 1 CATEGORIES= graphics multimedia MASTER_SITES= https://ffmpeg.org/releases/ EXTRACT_SUFX= .tar.xz - -USE_NOBTCFI= Yes SHARED_LIBS= avcodec 25.0 \ avdevice 13.0 \ Index: patches/patch-libavcodec_x86_fft_asm =================================================================== RCS file: /cvs/ports/graphics/ffmpeg/patches/patch-libavcodec_x86_fft_asm,v retrieving revision 1.1 diff -u -p -r1.1 patch-libavcodec_x86_fft_asm --- patches/patch-libavcodec_x86_fft_asm 24 Jan 2023 14:13:12 -0000 1.1 +++ patches/patch-libavcodec_x86_fft_asm 13 Jun 2023 07:58:36 -0000 @@ -1,7 +1,90 @@ +- place a table in rodata +- use _CET_ENDBR in functions header + Index: libavcodec/x86/fft.asm --- libavcodec/x86/fft.asm.orig +++ libavcodec/x86/fft.asm -@@ -548,10 +548,6 @@ DEFINE_ARGS zc, w, n, o1, o3 +@@ -325,6 +325,7 @@ INIT_YMM avx + %if HAVE_AVX_EXTERNAL + align 16 + fft8_avx: ++ _CET_ENDBR + mova m0, Z(0) + mova m1, Z(1) + T8_AVX m0, m1, m2, m3, m4 +@@ -335,6 +336,7 @@ fft8_avx: + + align 16 + fft16_avx: ++ _CET_ENDBR + mova m2, Z(2) + mova m3, Z(3) + T4_SSE m2, m3, m7 +@@ -372,6 +374,7 @@ fft16_avx: + + align 16 + fft32_avx: ++ _CET_ENDBR + call fft16_avx + + mova m0, Z(4) +@@ -396,6 +399,7 @@ fft32_avx: + ret + + fft32_interleave_avx: ++ _CET_ENDBR + call fft32_avx + mov r2d, 32 + .deint_loop: +@@ -419,6 +423,7 @@ INIT_XMM sse + align 16 + fft4_avx: + fft4_sse: ++ _CET_ENDBR + mova m0, Z(0) + mova m1, Z(1) + T4_SSE m0, m1, m2 +@@ -428,6 +433,7 @@ fft4_sse: + + align 16 + fft8_sse: ++ _CET_ENDBR + mova m0, Z(0) + mova m1, Z(1) + T4_SSE m0, m1, m2 +@@ -442,6 +448,7 @@ fft8_sse: + + align 16 + fft16_sse: ++ _CET_ENDBR + mova m0, Z(0) + mova m1, Z(1) + T4_SSE m0, m1, m2 +@@ -465,6 +472,7 @@ fft16_sse: + %macro FFT48_3DNOW 0 + align 16 + fft4 %+ SUFFIX: ++ _CET_ENDBR + T2_3DNOW m0, m1, Z(0), Z(1) + mova m2, Z(2) + mova m3, Z(3) +@@ -479,6 +487,7 @@ fft4 %+ SUFFIX: + + align 16 + fft8 %+ SUFFIX: ++ _CET_ENDBR + T2_3DNOW m0, m1, Z(0), Z(1) + mova m2, Z(2) + mova m3, Z(3) +@@ -532,6 +541,7 @@ FFT48_3DNOW + %macro DECL_PASS 2+ ; name, payload + align 16 + %1: ++ _CET_ENDBR + DEFINE_ARGS zc, w, n, o1, o3 + lea o3q, [nq*3] + lea o1q, [nq*8] +@@ -548,10 +558,6 @@ DEFINE_ARGS zc, w, n, o1, o3 %macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs lea r2, [dispatch_tab%1] mov r2, [r2 + (%2q-2)*gprsize] @@ -12,7 +95,7 @@ Index: libavcodec/x86/fft.asm call r2 %endmacro ; FFT_DISPATCH -@@ -731,11 +727,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0 +@@ -731,11 +737,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0 %define pass_interleave_3dnowext pass_interleave_3dnow %endif @@ -24,7 +107,15 @@ Index: libavcodec/x86/fft.asm %macro DECL_FFT 1-2 ; nbits, suffix %ifidn %0, 1 -@@ -773,8 +765,10 @@ fft %+ n %+ fullsuffix: +@@ -759,6 +761,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0 + + align 16 + fft %+ n %+ fullsuffix: ++ _CET_ENDBR + call fft %+ n2 %+ SUFFIX + add r0, n*4 - (n&(-2<<%1)) + call fft %+ n4 %+ SUFFIX +@@ -773,8 +776,10 @@ fft %+ n %+ fullsuffix: %endrep %undef n Index: patches/patch-libavutil_x86_x86inc_asm =================================================================== RCS file: patches/patch-libavutil_x86_x86inc_asm diff -N patches/patch-libavutil_x86_x86inc_asm --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-libavutil_x86_x86inc_asm 13 Jun 2023 07:58:36 -0000 @@ -0,0 +1,27 @@ +- define _CET_ENDBR depending X86_64 / X86 +- use it in cglobal macro + +Index: libavutil/x86/x86inc.asm +--- libavutil/x86/x86inc.asm.orig ++++ libavutil/x86/x86inc.asm +@@ -53,6 +53,12 @@ + %endif + %endif + ++%if ARCH_X86_64 ++ %define _CET_ENDBR endbr64 ++%else ++ %define _CET_ENDBR ++%endif ++ + %define WIN64 0 + %define UNIX64 0 + %if ARCH_X86_64 +@@ -746,6 +752,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, + %endif + align function_align + %2: ++ _CET_ENDBR + RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer + %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required + %assign stack_offset 0 ; stack pointer offset relative to the return address