This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit a1bfaa0e78c95ab71cd232cf7f43ffca0fa0a055 Author: Ramiro Polla <[email protected]> AuthorDate: Tue Mar 10 15:40:37 2026 +0100 Commit: Ramiro Polla <[email protected]> CommitDate: Mon Mar 30 11:38:35 2026 +0000 swscale/aarch64: introduce tool to enumerate sws_ops for NEON backend The NEON sws_ops backend will use a build-time code generator for the various operation functions it needs to implement. This build time code generator (ops_asmgen) will need a list of the operations that must be implemented. This commit adds a tool (sws_ops_aarch64) that generates such a list (ops_entries.c). The list is generated by iterating over all possible conversion combinations and collecting the parameters for each NEON assembly function that has to be implemented, defined by an unique set of parameters derived from SwsOp. Whenever swscale evolves, with improved optimization passes, new pixel formats, or improvements to the backend itself, this file (ops_entries.c) should be regenerated by running: $ make sws_ops_entries_aarch64 Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <[email protected]> --- libswscale/Makefile | 7 + libswscale/aarch64/ops_entries.c | 390 +++++++++++++++++++++++++++++++++++++ libswscale/aarch64/ops_impl.c | 257 ++++++++++++++++++++++++ libswscale/aarch64/ops_impl.h | 142 ++++++++++++++ libswscale/aarch64/ops_impl_conv.c | 236 ++++++++++++++++++++++ libswscale/tests/sws_ops_aarch64.c | 208 ++++++++++++++++++++ tests/ref/fate/source | 1 + 7 files changed, 1241 insertions(+) diff --git a/libswscale/Makefile b/libswscale/Makefile index f33754ce67..fa9231aff1 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -50,3 +50,10 @@ TESTPROGS = colorspace \ pixdesc_query \ swscale \ sws_ops \ + sws_ops_aarch64 \ + +sws_ops_entries_aarch64: TAG = GEN +sws_ops_entries_aarch64: $(SUBDIR)tests/sws_ops_aarch64$(EXESUF) + $(M)$< > $(SRC_PATH)/libswscale/aarch64/ops_entries.c.tmp + $(CP) $(SRC_PATH)/libswscale/aarch64/ops_entries.c.tmp $(SRC_PATH)/libswscale/aarch64/ops_entries.c + $(RM) $(SRC_PATH)/libswscale/aarch64/ops_entries.c.tmp diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c new file mode 100644 index 0000000000..61ff8bf760 --- /dev/null +++ b/libswscale/aarch64/ops_entries.c @@ -0,0 +1,390 @@ +/* + * This file is automatically generated. Do not edit manually. + * To regenerate, run: make sws_ops_entries_aarch64 + */ + +{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_NIBBLE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0001, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0001, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0231, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1023, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1023, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1203, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0121, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0121, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0233, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0233, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0332, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0332, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0444, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0444, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0555, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0555, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0565, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0565, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x2aaa, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x2aaa, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0xaaa2, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0xaaa2, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0121, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0121, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0233, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0233, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0332, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0332, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0444, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0444, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0555, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0555, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0565, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0565, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x2aaa, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0xaaa2, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 1, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 1, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 1, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 3, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 5, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 7, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 8, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 8, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 4, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 6, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1011 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1100 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1100 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1011 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1100 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000373dcc7ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000373dcc7ULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x0003f3fccfULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x0003f3fccfULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcfcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcfcULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcffULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcffULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc000000000ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc000000000ULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00000000fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00000000fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc0000000fcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc0000000fcULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc003f3fccfULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc003f3fccfULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00c00c00cULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00c00c00cULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00ff3fcffULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00ff3fcffULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x0325, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x032f, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x2305, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x230f, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x3000, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x302f, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x3ff0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5023, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5032, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5230, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5ff0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5fff, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf023, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf032, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf203, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf230, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf2f0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf2ff, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf302, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xff30, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xff3f, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xfff0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, diff --git a/libswscale/aarch64/ops_impl.c b/libswscale/aarch64/ops_impl.c new file mode 100644 index 0000000000..1e2f42ef14 --- /dev/null +++ b/libswscale/aarch64/ops_impl.c @@ -0,0 +1,257 @@ +/* + * Copyright (C) 2026 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <inttypes.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> + +#include "libavutil/attributes.h" + +#include "libavutil/avassert.h" + +#include "ops_impl.h" + +/*********************************************************************/ +static const char pixel_types[AARCH64_PIXEL_TYPE_NB][32] = { + [AARCH64_PIXEL_U8 ] = "AARCH64_PIXEL_U8", + [AARCH64_PIXEL_U16] = "AARCH64_PIXEL_U16", + [AARCH64_PIXEL_U32] = "AARCH64_PIXEL_U32", + [AARCH64_PIXEL_F32] = "AARCH64_PIXEL_F32", +}; + +static const char *aarch64_pixel_type(SwsAArch64PixelType fmt) +{ + if (fmt >= AARCH64_PIXEL_TYPE_NB) { + av_assert0(!"Invalid pixel type!"); + return NULL; + } + return pixel_types[fmt]; +} + +/*********************************************************************/ +static const char op_types[AARCH64_SWS_OP_TYPE_NB][32] = { + [AARCH64_SWS_OP_NONE ] = "AARCH64_SWS_OP_NONE", + [AARCH64_SWS_OP_PROCESS ] = "AARCH64_SWS_OP_PROCESS", + [AARCH64_SWS_OP_PROCESS_RETURN] = "AARCH64_SWS_OP_PROCESS_RETURN", + [AARCH64_SWS_OP_READ_BIT ] = "AARCH64_SWS_OP_READ_BIT", + [AARCH64_SWS_OP_READ_NIBBLE ] = "AARCH64_SWS_OP_READ_NIBBLE", + [AARCH64_SWS_OP_READ_PACKED ] = "AARCH64_SWS_OP_READ_PACKED", + [AARCH64_SWS_OP_READ_PLANAR ] = "AARCH64_SWS_OP_READ_PLANAR", + [AARCH64_SWS_OP_WRITE_BIT ] = "AARCH64_SWS_OP_WRITE_BIT", + [AARCH64_SWS_OP_WRITE_NIBBLE ] = "AARCH64_SWS_OP_WRITE_NIBBLE", + [AARCH64_SWS_OP_WRITE_PACKED ] = "AARCH64_SWS_OP_WRITE_PACKED", + [AARCH64_SWS_OP_WRITE_PLANAR ] = "AARCH64_SWS_OP_WRITE_PLANAR", + [AARCH64_SWS_OP_SWAP_BYTES ] = "AARCH64_SWS_OP_SWAP_BYTES", + [AARCH64_SWS_OP_SWIZZLE ] = "AARCH64_SWS_OP_SWIZZLE", + [AARCH64_SWS_OP_UNPACK ] = "AARCH64_SWS_OP_UNPACK", + [AARCH64_SWS_OP_PACK ] = "AARCH64_SWS_OP_PACK", + [AARCH64_SWS_OP_LSHIFT ] = "AARCH64_SWS_OP_LSHIFT", + [AARCH64_SWS_OP_RSHIFT ] = "AARCH64_SWS_OP_RSHIFT", + [AARCH64_SWS_OP_CLEAR ] = "AARCH64_SWS_OP_CLEAR", + [AARCH64_SWS_OP_CONVERT ] = "AARCH64_SWS_OP_CONVERT", + [AARCH64_SWS_OP_EXPAND ] = "AARCH64_SWS_OP_EXPAND", + [AARCH64_SWS_OP_MIN ] = "AARCH64_SWS_OP_MIN", + [AARCH64_SWS_OP_MAX ] = "AARCH64_SWS_OP_MAX", + [AARCH64_SWS_OP_SCALE ] = "AARCH64_SWS_OP_SCALE", + [AARCH64_SWS_OP_LINEAR ] = "AARCH64_SWS_OP_LINEAR", + [AARCH64_SWS_OP_DITHER ] = "AARCH64_SWS_OP_DITHER", +}; + +static const char *aarch64_op_type(SwsAArch64OpType op) +{ + if (op == AARCH64_SWS_OP_NONE || op >= AARCH64_SWS_OP_TYPE_NB) { + av_assert0(!"Invalid op type!"); + return NULL; + } + return op_types[op]; +} + +/*********************************************************************/ +/* + * Helper string concatenation function that does not depend on the + * FFmpeg libraries, so it may be used standalone. + */ +av_printf_format(3, 4) +static void buf_appendf(char **pbuf, size_t *prem, const char *fmt, ...) +{ + char *buf = *pbuf; + size_t rem = *prem; + if (!rem) + return; + + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(buf, rem, fmt, ap); + va_end(ap); + + if (n > 0) { + if (n < rem) { + buf += n; + rem -= n; + } else { + buf += rem - 1; + rem = 0; + } + *pbuf = buf; + *prem = rem; + } +} + +/*********************************************************************/ +/** + * The following structure is used to describe one field from + * SwsAArch64OpImplParams. This will be used to serialize the parameter + * structure, generate function names and lookup strings, and compare + * two sets of parameters. + */ + +typedef struct ParamField { + const char *name; + size_t offset; + size_t size; + void (*print_val)(char **pbuf, size_t *prem, void *p); + int (*cmp_val)(void *pa, void *pb); +} ParamField; + +#define PARAM_FIELD(name) #name, offsetof(SwsAArch64OpImplParams, name), sizeof(((SwsAArch64OpImplParams *) 0)->name) + +static void print_op_val(char **pbuf, size_t *prem, void *p) +{ + SwsAArch64OpType op = *(SwsAArch64OpType *) p; + buf_appendf(pbuf, prem, "%s", aarch64_op_type(op)); +} + +static int cmp_op(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((SwsAArch64OpType *) pa); + int64_t ib = (int64_t) *((SwsAArch64OpType *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +static void print_pixel_val(char **pbuf, size_t *prem, void *p) +{ + SwsAArch64PixelType type = *(SwsAArch64PixelType *) p; + buf_appendf(pbuf, prem, "%s", aarch64_pixel_type(type)); +} + +static int cmp_pixel(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((SwsAArch64PixelType *) pa); + int64_t ib = (int64_t) *((SwsAArch64PixelType *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +static void print_u8_val(char **pbuf, size_t *prem, void *p) +{ + uint8_t val = *(uint8_t *) p; + buf_appendf(pbuf, prem, "%u", val); +} + +static int cmp_u8(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((uint8_t *) pa); + int64_t ib = (int64_t) *((uint8_t *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +static void print_u16_val(char **pbuf, size_t *prem, void *p) +{ + uint16_t val = *(uint16_t *) p; + buf_appendf(pbuf, prem, "0x%04x", val); +} + +static int cmp_u16(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((uint16_t *) pa); + int64_t ib = (int64_t) *((uint16_t *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +static void print_u40_val(char **pbuf, size_t *prem, void *p) +{ + uint64_t val = *(uint64_t *) p; + buf_appendf(pbuf, prem, "0x%010" PRIx64 "ULL", val); +} + +static int cmp_u40(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((uint64_t *) pa); + int64_t ib = (int64_t) *((uint64_t *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +/*********************************************************************/ +static const ParamField field_op = { PARAM_FIELD(op), print_op_val, cmp_op }; +static const ParamField field_mask = { PARAM_FIELD(mask), print_u16_val, cmp_u16 }; +static const ParamField field_type = { PARAM_FIELD(type), print_pixel_val, cmp_pixel }; +static const ParamField field_block_size = { PARAM_FIELD(block_size), print_u8_val, cmp_u8 }; +static const ParamField field_shift = { PARAM_FIELD(shift), print_u8_val, cmp_u8 }; +static const ParamField field_swizzle = { PARAM_FIELD(swizzle), print_u16_val, cmp_u16 }; +static const ParamField field_pack = { PARAM_FIELD(pack), print_u16_val, cmp_u16 }; +static const ParamField field_to_type = { PARAM_FIELD(to_type), print_pixel_val, cmp_pixel }; +static const ParamField field_linear_mask = { PARAM_FIELD(linear.mask), print_u40_val, cmp_u40 }; +static const ParamField field_linear_fmla = { PARAM_FIELD(linear.fmla), print_u8_val, cmp_u8 }; +static const ParamField field_dither_y_offset = { PARAM_FIELD(dither.y_offset), print_u16_val, cmp_u16 }; +static const ParamField field_dither_size_log2 = { PARAM_FIELD(dither.size_log2), print_u8_val, cmp_u8 }; + +/* Fields needed to uniquely identify each SwsAArch64OpType. */ +#define MAX_LEVELS 8 +static const ParamField *op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS] = { + [AARCH64_SWS_OP_PROCESS ] = { &field_op, &field_mask }, + [AARCH64_SWS_OP_PROCESS_RETURN] = { &field_op, &field_mask }, + [AARCH64_SWS_OP_READ_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_READ_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_READ_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_READ_PLANAR ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_WRITE_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_WRITE_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_WRITE_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_WRITE_PLANAR ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_SWAP_BYTES ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_SWIZZLE ] = { &field_op, &field_swizzle, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_UNPACK ] = { &field_op, &field_pack, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_PACK ] = { &field_op, &field_pack, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_LSHIFT ] = { &field_op, &field_shift, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_RSHIFT ] = { &field_op, &field_shift, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_CLEAR ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_CONVERT ] = { &field_op, &field_to_type, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_EXPAND ] = { &field_op, &field_to_type, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_MIN ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_MAX ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_SCALE ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_LINEAR ] = { &field_op, &field_linear_mask, &field_linear_fmla, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_DITHER ] = { &field_op, &field_dither_y_offset, &field_dither_size_log2, &field_block_size, &field_type, &field_mask }, +}; diff --git a/libswscale/aarch64/ops_impl.h b/libswscale/aarch64/ops_impl.h new file mode 100644 index 0000000000..7bd23dd8e8 --- /dev/null +++ b/libswscale/aarch64/ops_impl.h @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2026 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWSCALE_AARCH64_OPS_IMPL_H +#define SWSCALE_AARCH64_OPS_IMPL_H + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> + +/* Similar to SwsPixelType */ +typedef enum SwsAArch64PixelType { + AARCH64_PIXEL_U8, + AARCH64_PIXEL_U16, + AARCH64_PIXEL_U32, + AARCH64_PIXEL_F32, + AARCH64_PIXEL_TYPE_NB, +} SwsAArch64PixelType; + +/* Similar to SwsOpType */ +typedef enum SwsAArch64OpType { + AARCH64_SWS_OP_NONE = 0, + AARCH64_SWS_OP_PROCESS, + AARCH64_SWS_OP_PROCESS_RETURN, + AARCH64_SWS_OP_READ_BIT, + AARCH64_SWS_OP_READ_NIBBLE, + AARCH64_SWS_OP_READ_PACKED, + AARCH64_SWS_OP_READ_PLANAR, + AARCH64_SWS_OP_WRITE_BIT, + AARCH64_SWS_OP_WRITE_NIBBLE, + AARCH64_SWS_OP_WRITE_PACKED, + AARCH64_SWS_OP_WRITE_PLANAR, + AARCH64_SWS_OP_SWAP_BYTES, + AARCH64_SWS_OP_SWIZZLE, + AARCH64_SWS_OP_UNPACK, + AARCH64_SWS_OP_PACK, + AARCH64_SWS_OP_LSHIFT, + AARCH64_SWS_OP_RSHIFT, + AARCH64_SWS_OP_CLEAR, + AARCH64_SWS_OP_CONVERT, + AARCH64_SWS_OP_EXPAND, + AARCH64_SWS_OP_MIN, + AARCH64_SWS_OP_MAX, + AARCH64_SWS_OP_SCALE, + AARCH64_SWS_OP_LINEAR, + AARCH64_SWS_OP_DITHER, + AARCH64_SWS_OP_TYPE_NB, +} SwsAArch64OpType; + +/* Each nibble in the mask corresponds to one component. */ +typedef uint16_t SwsAArch64OpMask; + +/** + * Affine coefficient mask for linear op. Packs a 4x5 matrix in execution + * order, where the offset is the first element, with 2 bits per element: + * 00: m[i][j] == 0 + * 01: m[i][j] == 1 + * 11: m[i][j] is any other coefficient + */ +typedef uint64_t SwsAArch64LinearOpMask; + +typedef struct SwsAArch64LinearOp { + SwsAArch64LinearOpMask mask; + uint8_t fmla; +} SwsAArch64LinearOp; + +typedef struct SwsAArch64DitherOp { + uint16_t y_offset; + uint8_t size_log2; +} SwsAArch64DitherOp; + +/** + * SwsAArch64OpImplParams describes the parameters for an SwsAArch64OpType + * operation. It consists of simplified parameters from the SwsOp structure, + * with the purpose of being straight-forward to implement and execute. + */ +typedef struct SwsAArch64OpImplParams { + SwsAArch64OpType op; + SwsAArch64OpMask mask; + SwsAArch64PixelType type; + uint8_t block_size; + union { + uint8_t shift; + SwsAArch64OpMask swizzle; + SwsAArch64OpMask pack; + SwsAArch64PixelType to_type; + SwsAArch64LinearOp linear; + SwsAArch64DitherOp dither; + }; +} SwsAArch64OpImplParams; + +/* SwsAArch64OpMask-related helpers. */ + +#define MASK_SET(mask, idx, val) do { (mask) |= (((val) & 0xf) << ((idx) << 2)); } while (0) + +#define LINEAR_MASK_SET(mask, idx, jdx, val) do { \ + (mask) |= ((((SwsAArch64LinearOpMask) (val)) & 3) << (2 * ((5 * (idx) + (jdx))))); \ +} while (0) +#define LINEAR_MASK_0 0 +#define LINEAR_MASK_1 1 +#define LINEAR_MASK_X 3 + +/** + * These values will be used by ops_asmgen to access fields inside of + * SwsOpExec and SwsOpImpl. The sizes are checked below when compiling + * for AArch64 to make sure there is no mismatch. + */ +#define offsetof_exec_in 0 +#define offsetof_exec_out 32 +#define offsetof_exec_in_bump 128 +#define offsetof_exec_out_bump 160 +#define offsetof_impl_cont 0 +#define offsetof_impl_priv 16 +#define sizeof_impl 32 + +#if ARCH_AARCH64 && HAVE_NEON +static_assert(offsetof_exec_in == offsetof(SwsOpExec, in), "SwsOpExec layout mismatch"); +static_assert(offsetof_exec_out == offsetof(SwsOpExec, out), "SwsOpExec layout mismatch"); +static_assert(offsetof_exec_in_bump == offsetof(SwsOpExec, in_bump), "SwsOpExec layout mismatch"); +static_assert(offsetof_exec_out_bump == offsetof(SwsOpExec, out_bump), "SwsOpExec layout mismatch"); +static_assert(offsetof_impl_cont == offsetof(SwsOpImpl, cont), "SwsOpImpl layout mismatch"); +static_assert(offsetof_impl_priv == offsetof(SwsOpImpl, priv), "SwsOpImpl layout mismatch"); +#endif + +#endif /* SWSCALE_AARCH64_OPS_IMPL_H */ diff --git a/libswscale/aarch64/ops_impl_conv.c b/libswscale/aarch64/ops_impl_conv.c new file mode 100644 index 0000000000..fdc398392f --- /dev/null +++ b/libswscale/aarch64/ops_impl_conv.c @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2026 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * NOTE: This file is #include'd directly by both the NEON backend and + * the sws_ops_aarch64 tool. + */ + +#include "libavutil/error.h" +#include "libavutil/rational.h" +#include "libswscale/ops.h" + +#include "ops_impl.h" + +static uint8_t sws_pixel_to_aarch64(SwsPixelType type) +{ + switch (type) { + case SWS_PIXEL_U8: return AARCH64_PIXEL_U8; + case SWS_PIXEL_U16: return AARCH64_PIXEL_U16; + case SWS_PIXEL_U32: return AARCH64_PIXEL_U32; + case SWS_PIXEL_F32: return AARCH64_PIXEL_F32; + } + return 0; +} + +/** + * The column index order for SwsLinearOp.mask follows the affine transform + * order, where the offset is the last element. SwsAArch64LinearOpMask, on + * the other hand, follows execution order, where the offset is the first + * element. + */ +static int linear_index_from_sws_op(int idx) +{ + const int reorder_col[5] = { 1, 2, 3, 4, 0 }; + return reorder_col[idx]; +} + +/** + * Convert SwsOp to a SwsAArch64OpImplParams. Read the comments regarding + * SwsAArch64OpImplParams in ops_impl.h for more information. + */ +static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n, + int block_size, SwsAArch64OpImplParams *out) +{ + const SwsOp *op = &ops->ops[n]; + const SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : op; + + out->block_size = block_size; + + /** + * Most SwsOp work on fields described by next->comps.unused. + * The few that don't will override this field later. + */ + out->mask = 0; + for (int i = 0; i < 4; i++) { + if (!next->comps.unused[i]) + MASK_SET(out->mask, i, 1); + } + + out->type = sws_pixel_to_aarch64(op->type); + + /* Map SwsOpType to SwsAArch64OpType */ + switch (op->op) { + case SWS_OP_READ: + if (op->rw.filter) + return AVERROR(ENOTSUP); + /** + * The different types of read operations have been split into + * their own SwsAArch64OpType to simplify the implementation. + */ + if (op->rw.frac == 1) + out->op = AARCH64_SWS_OP_READ_NIBBLE; + else if (op->rw.frac == 3) + out->op = AARCH64_SWS_OP_READ_BIT; + else if (op->rw.packed && op->rw.elems != 1) + out->op = AARCH64_SWS_OP_READ_PACKED; + else + out->op = AARCH64_SWS_OP_READ_PLANAR; + break; + case SWS_OP_WRITE: + if (op->rw.filter) + return AVERROR(ENOTSUP); + /** + * The different types of write operations have been split into + * their own SwsAArch64OpType to simplify the implementation. + */ + if (op->rw.frac == 1) + out->op = AARCH64_SWS_OP_WRITE_NIBBLE; + else if (op->rw.frac == 3) + out->op = AARCH64_SWS_OP_WRITE_BIT; + else if (op->rw.packed && op->rw.elems != 1) + out->op = AARCH64_SWS_OP_WRITE_PACKED; + else + out->op = AARCH64_SWS_OP_WRITE_PLANAR; + break; + case SWS_OP_SWAP_BYTES: out->op = AARCH64_SWS_OP_SWAP_BYTES; break; + case SWS_OP_SWIZZLE: out->op = AARCH64_SWS_OP_SWIZZLE; break; + case SWS_OP_UNPACK: out->op = AARCH64_SWS_OP_UNPACK; break; + case SWS_OP_PACK: out->op = AARCH64_SWS_OP_PACK; break; + case SWS_OP_LSHIFT: out->op = AARCH64_SWS_OP_LSHIFT; break; + case SWS_OP_RSHIFT: out->op = AARCH64_SWS_OP_RSHIFT; break; + case SWS_OP_CLEAR: out->op = AARCH64_SWS_OP_CLEAR; break; + case SWS_OP_CONVERT: + out->op = op->convert.expand ? AARCH64_SWS_OP_EXPAND : AARCH64_SWS_OP_CONVERT; + break; + case SWS_OP_MIN: out->op = AARCH64_SWS_OP_MIN; break; + case SWS_OP_MAX: out->op = AARCH64_SWS_OP_MAX; break; + case SWS_OP_SCALE: out->op = AARCH64_SWS_OP_SCALE; break; + case SWS_OP_LINEAR: out->op = AARCH64_SWS_OP_LINEAR; break; + case SWS_OP_DITHER: out->op = AARCH64_SWS_OP_DITHER; break; + } + + switch (out->op) { + case AARCH64_SWS_OP_READ_BIT: + case AARCH64_SWS_OP_READ_NIBBLE: + case AARCH64_SWS_OP_READ_PACKED: + case AARCH64_SWS_OP_READ_PLANAR: + case AARCH64_SWS_OP_WRITE_BIT: + case AARCH64_SWS_OP_WRITE_NIBBLE: + case AARCH64_SWS_OP_WRITE_PACKED: + case AARCH64_SWS_OP_WRITE_PLANAR: + switch (op->rw.elems) { + case 1: out->mask = 0x0001; break; + case 2: out->mask = 0x0011; break; + case 3: out->mask = 0x0111; break; + case 4: out->mask = 0x1111; break; + }; + break; + case AARCH64_SWS_OP_SWAP_BYTES: + /* Only the element size matters, not the type. */ + if (out->type == AARCH64_PIXEL_F32) + out->type = AARCH64_PIXEL_U32; + break; + case AARCH64_SWS_OP_SWIZZLE: + out->mask = 0; + MASK_SET(out->mask, 0, op->swizzle.in[0] != 0); + MASK_SET(out->mask, 1, op->swizzle.in[1] != 1); + MASK_SET(out->mask, 2, op->swizzle.in[2] != 2); + MASK_SET(out->mask, 3, op->swizzle.in[3] != 3); + MASK_SET(out->swizzle, 0, op->swizzle.in[0]); + MASK_SET(out->swizzle, 1, op->swizzle.in[1]); + MASK_SET(out->swizzle, 2, op->swizzle.in[2]); + MASK_SET(out->swizzle, 3, op->swizzle.in[3]); + /* The element size and type don't matter. */ + out->block_size = block_size * ff_sws_pixel_type_size(op->type); + out->type = AARCH64_PIXEL_U8; + break; + case AARCH64_SWS_OP_UNPACK: + MASK_SET(out->pack, 0, op->pack.pattern[0]); + MASK_SET(out->pack, 1, op->pack.pattern[1]); + MASK_SET(out->pack, 2, op->pack.pattern[2]); + MASK_SET(out->pack, 3, op->pack.pattern[3]); + break; + case AARCH64_SWS_OP_PACK: + out->mask = 0; + MASK_SET(out->mask, 0, !op->comps.unused[0]); + MASK_SET(out->mask, 1, !op->comps.unused[1]); + MASK_SET(out->mask, 2, !op->comps.unused[2]); + MASK_SET(out->mask, 3, !op->comps.unused[3]); + MASK_SET(out->pack, 0, op->pack.pattern[0]); + MASK_SET(out->pack, 1, op->pack.pattern[1]); + MASK_SET(out->pack, 2, op->pack.pattern[2]); + MASK_SET(out->pack, 3, op->pack.pattern[3]); + break; + case AARCH64_SWS_OP_LSHIFT: + case AARCH64_SWS_OP_RSHIFT: + out->shift = op->c.u; + break; + case AARCH64_SWS_OP_CLEAR: + out->mask = 0; + MASK_SET(out->mask, 0, !!op->c.q4[0].den); + MASK_SET(out->mask, 1, !!op->c.q4[1].den); + MASK_SET(out->mask, 2, !!op->c.q4[2].den); + MASK_SET(out->mask, 3, !!op->c.q4[3].den); + break; + case AARCH64_SWS_OP_EXPAND: + case AARCH64_SWS_OP_CONVERT: + out->to_type = sws_pixel_to_aarch64(op->convert.to); + break; + case AARCH64_SWS_OP_LINEAR: + /** + * The out->linear.mask field packs the 4x5 matrix from SwsLinearOp as + * 2 bits per element: + * 00: m[i][j] == 0 + * 01: m[i][j] == 1 + * 11: m[i][j] is any other coefficient + */ + out->mask = 0; + for (int i = 0; i < 4; i++) { + /* Skip unused or identity rows */ + if (op->comps.unused[i] || !(op->lin.mask & SWS_MASK_ROW(i))) + continue; + MASK_SET(out->mask, i, 1); + for (int j = 0; j < 5; j++) { + int jj = linear_index_from_sws_op(j); + if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1))) + LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1); + else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1))) + LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X); + } + } + out->linear.fmla = !(ctx->flags & SWS_BITEXACT); + break; + case AARCH64_SWS_OP_DITHER: + out->mask = 0; + MASK_SET(out->mask, 0, op->dither.y_offset[0] >= 0); + MASK_SET(out->mask, 1, op->dither.y_offset[1] >= 0); + MASK_SET(out->mask, 2, op->dither.y_offset[2] >= 0); + MASK_SET(out->mask, 3, op->dither.y_offset[3] >= 0); + MASK_SET(out->dither.y_offset, 0, op->dither.y_offset[0]); + MASK_SET(out->dither.y_offset, 1, op->dither.y_offset[1]); + MASK_SET(out->dither.y_offset, 2, op->dither.y_offset[2]); + MASK_SET(out->dither.y_offset, 3, op->dither.y_offset[3]); + out->dither.size_log2 = op->dither.size_log2; + break; + } + + return 0; +} diff --git a/libswscale/tests/sws_ops_aarch64.c b/libswscale/tests/sws_ops_aarch64.c new file mode 100644 index 0000000000..21948ca71b --- /dev/null +++ b/libswscale/tests/sws_ops_aarch64.c @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2026 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdio.h> + +#include "libavutil/mem.h" +#include "libavutil/tree.h" +#include "libswscale/ops.h" +#include "libswscale/ops_chain.h" + +#include "libswscale/aarch64/ops_impl.c" +#include "libswscale/aarch64/ops_impl_conv.c" + +#ifdef _WIN32 +#include <io.h> +#include <fcntl.h> +#endif + +/*********************************************************************/ +static int aarch64_op_impl_cmp(const void *a, const void *b) +{ + const SwsAArch64OpImplParams *pa = (const SwsAArch64OpImplParams *) a; + const SwsAArch64OpImplParams *pb = (const SwsAArch64OpImplParams *) b; + + const ParamField **fields = op_fields[pa->op]; + for (int i = 0; fields[i]; i++) { + const ParamField *field = fields[i]; + int diff = field->cmp_val((void *) (((uintptr_t) pa) + field->offset), + (void *) (((uintptr_t) pb) + field->offset)); + if (diff) + return diff; + } + return 0; +} + +/*********************************************************************/ +/* Insert the SwsAArch64OpImplParams structure into the AVTreeNode. */ +static int aarch64_collect_op(const SwsAArch64OpImplParams *params, struct AVTreeNode **root) +{ + int ret = 0; + + struct AVTreeNode *node = av_tree_node_alloc(); + SwsAArch64OpImplParams *copy = av_memdup(params, sizeof(*params)); + if (!node || !copy) { + ret = AVERROR(ENOMEM); + goto error; + } + av_tree_insert(root, copy, aarch64_op_impl_cmp, &node); + if (!node) + copy = NULL; + +error: + av_free(node); + av_free(copy); + return ret; +} + +/* Collect the parameters for the process/process_return functions. */ +static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **root) +{ + const SwsOp *read = ff_sws_op_list_input(ops); + const SwsOp *write = ff_sws_op_list_output(ops); + const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0; + const int write_planes = write->rw.packed ? 1 : write->rw.elems; + int ret; + + SwsAArch64OpMask mask = 0; + for (int i = 0; i < FFMAX(read_planes, write_planes); i++) + MASK_SET(mask, i, 1); + SwsAArch64OpImplParams params = { + .op = AARCH64_SWS_OP_PROCESS, + .mask = mask, + }; + + ret = aarch64_collect_op(¶ms, root); + if (ret < 0) + return ret; + + params.op = AARCH64_SWS_OP_PROCESS_RETURN; + ret = aarch64_collect_op(¶ms, root); + if (ret < 0) + return ret; + + return 0; +} + +static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops) +{ + struct AVTreeNode **root = (struct AVTreeNode **) opaque; + int ret; + + /* Make on-stack copy of `ops` to iterate over */ + SwsOpList rest = *ops; + /* Use at most two full vregs during the widest precision section */ + int block_size = (ff_sws_op_list_max_size(ops) == 4) ? 8 : 16; + + ret = aarch64_collect_process(&rest, root); + if (ret < 0) + return ret; + + for (int i = 0; i < rest.num_ops; i++) { + SwsAArch64OpImplParams params = { 0 }; + ret = convert_to_aarch64_impl(ctx, &rest, i, block_size, ¶ms); + if (ret < 0) + goto end; + ret = aarch64_collect_op(¶ms, root); + if (ret < 0) + goto end; + if (params.op == AARCH64_SWS_OP_LINEAR) { + /** + * Generate both sets of linear op functions that do use + * and do not use fmla (selected by SWS_BITEXACT). + */ + params.linear.fmla = !params.linear.fmla; + ret = aarch64_collect_op(¶ms, root); + if (ret < 0) + goto end; + } + } + + ret = 0; + +end: + return ret; +} + +/*********************************************************************/ +static void serialize_op(char *buf, size_t size, const SwsAArch64OpImplParams *params) +{ + buf_appendf(&buf, &size, "{"); + const ParamField **fields = op_fields[params->op]; + for (int i = 0; fields[i]; i++) { + const ParamField *field = fields[i]; + void *p = (void *) (((uintptr_t) params) + field->offset); + if (i) + buf_appendf(&buf, &size, ","); + buf_appendf(&buf, &size, " .%s = ", field->name); + field->print_val(&buf, &size, p); + } + buf_appendf(&buf, &size, " }"); + av_assert0(size && "string buffer exhausted"); +} + +/* Serialize SwsAArch64OpImplParams for one function. */ +static int print_op(void *opaque, void *elem) +{ + SwsAArch64OpImplParams *params = (SwsAArch64OpImplParams *) elem; + FILE *fp = (FILE *) opaque; + + char buf[256]; + serialize_op(buf, sizeof(buf), params); + fprintf(fp, "%s,\n", buf); + + av_free(params); + + return 0; +} + +/*********************************************************************/ +int main(int argc, char *argv[]) +{ + struct AVTreeNode *root = NULL; + int ret = 1; + +#ifdef _WIN32 + _setmode(_fileno(stdout), _O_BINARY); +#endif + + SwsContext *ctx = sws_alloc_context(); + if (!ctx) + goto fail; + + ret = ff_sws_enum_op_lists(ctx, &root, AV_PIX_FMT_NONE, AV_PIX_FMT_NONE, + register_op); + + /** + * Generate a C file with all the unique function parameter entries + * collected by aarch64_enum_ops(). + */ + printf("/*\n"); + printf(" * This file is automatically generated. Do not edit manually.\n"); + printf(" * To regenerate, run: make sws_ops_entries_aarch64\n"); + printf(" */\n"); + printf("\n"); + av_tree_enumerate(root, stdout, NULL, print_op); + +fail: + av_tree_destroy(root); + sws_free_context(&ctx); + return ret; +} diff --git a/tests/ref/fate/source b/tests/ref/fate/source index 78d3a2e0fa..cbbd347ef2 100644 --- a/tests/ref/fate/source +++ b/tests/ref/fate/source @@ -16,6 +16,7 @@ libavformat/log2_tab.c libavformat/rangecoder_dec.c libavformat/riscv/cpu_common.c libswresample/log2_tab.c +libswscale/aarch64/ops_entries.c libswscale/log2_tab.c libswscale/riscv/cpu_common.c tools/uncoded_frame.c _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
