On 2017-08-31 12:10:56 +0300, Martin Storsjö wrote:
> In binutils 2.29, the behavior of the ADR instruction changed so that 1 is
> added to the address of a Thumb function (previously nothing was added). This
> allows the loaded address to be passed to a BLX instruction and the correct
> mode change will occur.
>
> See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458
>
> By using adr with a label that isn't annotated as a thumb function,
> we avoid the new behaviour in binutils 2.29 and get the same behaviour
> as in prior releases, and as in other assemblers (ms armasm.exe,
> clang's built in assembler).
> ---
> libavcodec/arm/h264idct_neon.S | 20 ++++++++++++--------
> 1 file changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
> index f588f3e..b078cf2 100644
> --- a/libavcodec/arm/h264idct_neon.S
> +++ b/libavcodec/arm/h264idct_neon.S
> @@ -21,6 +21,7 @@
> #include "libavutil/arm/asm.S"
>
> function ff_h264_idct_add_neon, export=1
> +h264_idct_add_neon_nothumb:
I thought of adding the symbol to the function macro but given that it's
just two functions in single file adding the macros manually might be
better. I don't expect it to be needed in many more places if any.
> vld1.64 {d0-d3}, [r1,:128]
> vmov.i16 q15, #0
>
> @@ -73,6 +74,7 @@ function ff_h264_idct_add_neon, export=1
> endfunc
>
> function ff_h264_idct_dc_add_neon, export=1
> +h264_idct_dc_add_neon_nothumb:
> mov r3, #0
> vld1.16 {d2[],d3[]}, [r1,:16]
> strh r3, [r1]
> @@ -113,8 +115,8 @@ function ff_h264_idct_add16_neon, export=1
> movne lr, #0
> cmp lr, #0
> ite ne
> - adrne lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
> - adreq lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB
> + adrne lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
> + adreq lr, h264_idct_add_neon_nothumb + CONFIG_THUMB
> blx lr
> 2: subs ip, ip, #1
> add r1, r1, #32
> @@ -138,8 +140,8 @@ function ff_h264_idct_add16intra_neon, export=1
> cmp r8, #0
> ldrsh r8, [r1]
> iteet ne
> - adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB
> - adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
> + adrne lr, h264_idct_add_neon_nothumb + CONFIG_THUMB
> + adreq lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
> cmpeq r8, #0
> blxne lr
> subs ip, ip, #1
> @@ -166,8 +168,8 @@ function ff_h264_idct_add8_neon, export=1
> cmp r8, #0
> ldrsh r8, [r1]
> iteet ne
> - adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB
> - adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
> + adrne lr, h264_idct_add_neon_nothumb + CONFIG_THUMB
> + adreq lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
> cmpeq r8, #0
> blxne lr
> add r12, r12, #1
> @@ -267,6 +269,7 @@ endfunc
> .endm
>
> function ff_h264_idct8_add_neon, export=1
> +h264_idct8_add_neon_nothumb:
> vmov.i16 q3, #0
> vld1.16 {q8-q9}, [r1,:128]
> vst1.16 {q3}, [r1,:128]!
> @@ -328,6 +331,7 @@ function ff_h264_idct8_add_neon, export=1
> endfunc
>
> function ff_h264_idct8_dc_add_neon, export=1
> +h264_idct8_dc_add_neon_nothumb:
> mov r3, #0
> vld1.16 {d30[],d31[]},[r1,:16]
> strh r3, [r1]
> @@ -388,8 +392,8 @@ function ff_h264_idct8_add4_neon, export=1
> movne lr, #0
> cmp lr, #0
> ite ne
> - adrne lr, X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
> - adreq lr, X(ff_h264_idct8_add_neon) + CONFIG_THUMB
> + adrne lr, h264_idct8_dc_add_neon_nothumb + CONFIG_THUMB
> + adreq lr, h264_idct8_add_neon_nothumb + CONFIG_THUMB
> blx lr
> 2: subs r12, r12, #4
> add r1, r1, #128
patch ok
Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel