[FFmpeg-devel] [PATCH] arm: Consistently use proper interworking function returns

Martin Storsjö Fri, 04 Oct 2024 01:51:13 -0700

Use "bx lr", or "pop {lr}", which do proper mode switching
between thumb and arm modes. A plain "mov pc, lr" does not switch
from thumb mode to arm mode (while in arm mode, it does switch
mode for a thumb caller).


This is normally not an issue, as CONFIG_THUMB only is enabled if
the C compiler defaults to thumb; but stick to patterns that can
do mode switching if needed, for consistency.
---
 libswresample/arm/resample.S  | 8 ++++----
 libswscale/arm/hscale.S       | 3 +--
 libswscale/arm/output.S       | 3 +--
 libswscale/arm/yuv2rgb_neon.S | 3 +--
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/libswresample/arm/resample.S b/libswresample/arm/resample.S
index 3ce7623246..791f4cc016 100644
--- a/libswresample/arm/resample.S
+++ b/libswresample/arm/resample.S
@@ -30,7 +30,7 @@ function ff_resample_common_apply_filter_x4_float_neon, 
export=1
     vpadd.f32           d0, d0, d1                                     @ pair 
adding of the 4x32-bit accumulated values
     vpadd.f32           d0, d0, d0                                     @ pair 
adding of the 4x32-bit accumulator values
     vst1.32             {d0[0]}, [r0]                                  @ write 
accumulator
-    mov pc, lr
+    bx                  lr
 endfunc
 
 function ff_resample_common_apply_filter_x8_float_neon, export=1
@@ -46,7 +46,7 @@ function ff_resample_common_apply_filter_x8_float_neon, 
export=1
     vpadd.f32           d0, d0, d1                                     @ pair 
adding of the 4x32-bit accumulated values
     vpadd.f32           d0, d0, d0                                     @ pair 
adding of the 4x32-bit accumulator values
     vst1.32             {d0[0]}, [r0]                                  @ write 
accumulator
-    mov pc, lr
+    bx                  lr
 endfunc
 
 function ff_resample_common_apply_filter_x4_s16_neon, export=1
@@ -59,7 +59,7 @@ function ff_resample_common_apply_filter_x4_s16_neon, export=1
     vpadd.s32           d0, d0, d1                                     @ pair 
adding of the 4x32-bit accumulated values
     vpadd.s32           d0, d0, d0                                     @ pair 
adding of the 4x32-bit accumulator values
     vst1.32             {d0[0]}, [r0]                                  @ write 
accumulator
-    mov pc, lr
+    bx                  lr
 endfunc
 
 function ff_resample_common_apply_filter_x8_s16_neon, export=1
@@ -73,5 +73,5 @@ function ff_resample_common_apply_filter_x8_s16_neon, export=1
     vpadd.s32           d0, d0, d1                                     @ pair 
adding of the 4x32-bit accumulated values
     vpadd.s32           d0, d0, d0                                     @ pair 
adding of the 4x32-bit accumulator values
     vst1.32             {d0[0]}, [r0]                                  @ write 
accumulator
-    mov pc, lr
+    bx                  lr
 endfunc
diff --git a/libswscale/arm/hscale.S b/libswscale/arm/hscale.S
index dd4d453957..5c3551a0f1 100644
--- a/libswscale/arm/hscale.S
+++ b/libswscale/arm/hscale.S
@@ -65,6 +65,5 @@ function ff_hscale_8_to_15_neon, export=1
     subs                r2, #2                                         @ dstW 
-= 2
     bgt                 1b                                             @ loop 
until end of line
     vpop                {q4-q7}
-    pop                 {r4-r12, lr}
-    mov pc, lr
+    pop                 {r4-r12, pc}
 endfunc
diff --git a/libswscale/arm/output.S b/libswscale/arm/output.S
index 70846dee1f..5f10585f81 100644
--- a/libswscale/arm/output.S
+++ b/libswscale/arm/output.S
@@ -73,6 +73,5 @@ function ff_yuv2planeX_8_neon, export=1
     subs                r4, r4, #8                                     @ dstW 
-= 8
     bgt                 2b                                             @ loop 
until width is consumed
     vpop                {q4-q7}
-    pop                 {r4-r12, lr}
-    mov                 pc, lr
+    pop                 {r4-r12, pc}
 endfunc
diff --git a/libswscale/arm/yuv2rgb_neon.S b/libswscale/arm/yuv2rgb_neon.S
index 474465427d..6777d625f9 100644
--- a/libswscale/arm/yuv2rgb_neon.S
+++ b/libswscale/arm/yuv2rgb_neon.S
@@ -262,8 +262,7 @@ function ff_\ifmt\()_to_\ofmt\()_neon, export=1
     increment_and_test_\ifmt
     bgt                 1b
     vpop                {q4-q7}
-    pop                 {r4-r12, lr}
-    mov                 pc, lr
+    pop                 {r4-r12, pc}
 endfunc
 .endm
 
-- 
2.39.5 (Apple Git-154)

_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".

[FFmpeg-devel] [PATCH] arm: Consistently use proper interworking function returns

Reply via email to