zstd compression of tarballs and distribution packages is more common. BusyBox doesn't include zstd code so use external zstd and unzstd programs to support it. Due to the requirement for external utilities the feature isn't enabled by default.
With this change BusyBox tar, rpm and dpkg applets can handle zstd compressed data. When the feature is enabled: function old new delta setup_transformer_on_fd 126 192 +66 tar_main 1114 1176 +62 .rodata 102483 102544 +61 get_header_tar_zstd - 53 +53 unpack_zstd_stream - 47 +47 filter_accept_list_reassign 217 246 +29 packed_usage 35215 35239 +24 dpkg_deb_main 395 415 +20 unpack_package 641 654 +13 init_archive_deb_control 75 88 +13 tar_longopts 314 321 +7 ------------------------------------------------------------------------------ (add/remove: 4/0 grow/shrink: 9/0 up/down: 395/0) Total: 395 bytes Signed-off-by: Ron Yorston <[email protected]> --- archival/Config.src | 6 ++++ archival/dpkg.c | 6 ++++ archival/dpkg_deb.c | 4 +++ archival/libarchive/Kbuild.src | 2 ++ archival/libarchive/decompress_unzstd.c | 13 ++++++++ .../libarchive/filter_accept_list_reassign.c | 6 ++++ archival/libarchive/get_header_tar_zstd.c | 20 ++++++++++++ archival/libarchive/open_transformer.c | 14 +++++++++ archival/tar.c | 31 +++++++++++++++---- include/bb_archive.h | 7 +++++ testsuite/tar.tests | 22 +++++++++++++ 11 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 archival/libarchive/decompress_unzstd.c create mode 100644 archival/libarchive/get_header_tar_zstd.c diff --git a/archival/Config.src b/archival/Config.src index cbcd7217c..bd75f0f65 100644 --- a/archival/Config.src +++ b/archival/Config.src @@ -25,6 +25,12 @@ config FEATURE_SEAMLESS_Z bool "Make tar, rpm, modprobe etc understand .Z data" default n # it is ancient +config FEATURE_SEAMLESS_ZSTD + bool "Make tar, rpm, modprobe etc understand .zst data" + default n + help + This requires external zstd and unzstd binaries. + INSERT config FEATURE_LZMA_FAST diff --git a/archival/dpkg.c b/archival/dpkg.c index eda5ec7eb..430738792 100644 --- a/archival/dpkg.c +++ b/archival/dpkg.c @@ -1505,6 +1505,9 @@ static void init_archive_deb_control(archive_handle_t *ar_handle) #if ENABLE_FEATURE_SEAMLESS_XZ llist_add_to(&(ar_handle->accept), (char*)"control.tar.xz"); #endif +#if ENABLE_FEATURE_SEAMLESS_ZSTD + llist_add_to(&(ar_handle->accept), (char*)"control.tar.zst"); +#endif /* Assign the tar handle as a subarchive of the ar handle */ ar_handle->dpkg__sub_archive = tar_handle; @@ -1532,6 +1535,9 @@ static void init_archive_deb_data(archive_handle_t *ar_handle) #if ENABLE_FEATURE_SEAMLESS_XZ llist_add_to(&(ar_handle->accept), (char*)"data.tar.xz"); #endif +#if ENABLE_FEATURE_SEAMLESS_ZSTD + llist_add_to(&(ar_handle->accept), (char*)"data.tar.zst"); +#endif /* Assign the tar handle as a subarchive of the ar handle */ ar_handle->dpkg__sub_archive = tar_handle; diff --git a/archival/dpkg_deb.c b/archival/dpkg_deb.c index dda931169..0c60ae41a 100644 --- a/archival/dpkg_deb.c +++ b/archival/dpkg_deb.c @@ -77,6 +77,10 @@ int dpkg_deb_main(int argc UNUSED_PARAM, char **argv) llist_add_to(&ar_archive->accept, (char*)"data.tar.xz"); llist_add_to(&control_tar_llist, (char*)"control.tar.xz"); #endif +#if ENABLE_FEATURE_SEAMLESS_ZSTD + llist_add_to(&ar_archive->accept, (char*)"data.tar.zst"); + llist_add_to(&control_tar_llist, (char*)"control.tar.zst"); +#endif /* Must have 1 or 2 args */ opt = getopt32(argv, "^" "cefXx" diff --git a/archival/libarchive/Kbuild.src b/archival/libarchive/Kbuild.src index d2f284b08..bf0ab9c69 100644 --- a/archival/libarchive/Kbuild.src +++ b/archival/libarchive/Kbuild.src @@ -39,6 +39,7 @@ DPKG_FILES:= \ get_header_tar_bz2.o \ get_header_tar_lzma.o \ get_header_tar_xz.o \ + get_header_tar_zstd.o \ INSERT @@ -89,6 +90,7 @@ lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_gunzip. lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o lib-$(CONFIG_FEATURE_SEAMLESS_XZ) += open_transformer.o decompress_unxz.o +lib-$(CONFIG_FEATURE_SEAMLESS_ZSTD) += open_transformer.o decompress_unzstd.o lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += open_transformer.o decompress_bunzip2.o lib-$(CONFIG_FEATURE_COMPRESS_BBCONFIG) += open_transformer.o decompress_bunzip2.o lib-$(CONFIG_FEATURE_SH_EMBEDDED_SCRIPTS) += open_transformer.o decompress_bunzip2.o diff --git a/archival/libarchive/decompress_unzstd.c b/archival/libarchive/decompress_unzstd.c new file mode 100644 index 000000000..c55ab8575 --- /dev/null +++ b/archival/libarchive/decompress_unzstd.c @@ -0,0 +1,13 @@ +#include "libbb.h" +#include "bb_archive.h" + +IF_DESKTOP(long long) int FAST_FUNC +unpack_zstd_stream(transformer_state_t *xstate) +{ + // FIXME Provide an internal implementation of zstd decompression. + char *argv[] = {(char *)"unzstd", (char *)"-cf", (char *)"-", NULL}; + + xmove_fd(xstate->src_fd, STDIN_FILENO); + xmove_fd(xstate->dst_fd, STDOUT_FILENO); + BB_EXECVP_or_die(argv); +} diff --git a/archival/libarchive/filter_accept_list_reassign.c b/archival/libarchive/filter_accept_list_reassign.c index 826c5c29d..a78186a6e 100644 --- a/archival/libarchive/filter_accept_list_reassign.c +++ b/archival/libarchive/filter_accept_list_reassign.c @@ -55,6 +55,12 @@ char FAST_FUNC filter_accept_list_reassign(archive_handle_t *archive_handle) archive_handle->dpkg__action_data_subarchive = get_header_tar_xz; return EXIT_SUCCESS; } + if (ENABLE_FEATURE_SEAMLESS_ZSTD + && strcmp(name_ptr, "zst") == 0 + ) { + archive_handle->dpkg__action_data_subarchive = get_header_tar_zstd; + return EXIT_SUCCESS; + } } return EXIT_FAILURE; } diff --git a/archival/libarchive/get_header_tar_zstd.c b/archival/libarchive/get_header_tar_zstd.c new file mode 100644 index 000000000..6f1b4b977 --- /dev/null +++ b/archival/libarchive/get_header_tar_zstd.c @@ -0,0 +1,20 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +#include "libbb.h" +#include "bb_archive.h" + +char FAST_FUNC get_header_tar_zstd(archive_handle_t *archive_handle) +{ + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + fork_transformer_with_sig(archive_handle->src_fd, unpack_zstd_stream, "unzstd"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libarchive/open_transformer.c b/archival/libarchive/open_transformer.c index 353f68217..371679dcd 100644 --- a/archival/libarchive/open_transformer.c +++ b/archival/libarchive/open_transformer.c @@ -203,11 +203,25 @@ static transformer_state_t *setup_transformer_on_fd(int fd, int die_if_not_compr } } + if (ENABLE_FEATURE_SEAMLESS_ZSTD && xstate->magic.b16[0] == ZSTD_MAGIC1) { + xread(fd, &xstate->magic.b16[1], 2); + if (xstate->magic.b16[1] == ZSTD_MAGIC2) { + xstate->xformer = unpack_zstd_stream; + USE_FOR_NOMMU(xstate->xformer_prog = "unzstd";) + // FIXME We execute an external decompressor even on MMU. + // Force a seek back to the signature. + xstate->signature_skipped = 0; + xlseek(xstate->src_fd, -4, SEEK_CUR); + goto found_magic; + } + } + /* No known magic seen */ if (die_if_not_compressed) bb_simple_error_msg_and_die("no gzip" IF_FEATURE_SEAMLESS_BZ2("/bzip2") IF_FEATURE_SEAMLESS_XZ("/xz") + IF_FEATURE_SEAMLESS_ZSTD("/zstd") " magic"); /* Some callers expect this function to "consume" fd diff --git a/archival/tar.c b/archival/tar.c index 0a100eb31..de039406e 100644 --- a/archival/tar.c +++ b/archival/tar.c @@ -43,7 +43,7 @@ //config:config FEATURE_TAR_AUTODETECT //config: bool "Autodetect compressed tarballs" //config: default y -//config: depends on TAR && (FEATURE_SEAMLESS_Z || FEATURE_SEAMLESS_GZ || FEATURE_SEAMLESS_BZ2 || FEATURE_SEAMLESS_LZMA || FEATURE_SEAMLESS_XZ) +//config: depends on TAR && (FEATURE_SEAMLESS_Z || FEATURE_SEAMLESS_GZ || FEATURE_SEAMLESS_BZ2 || FEATURE_SEAMLESS_LZMA || FEATURE_SEAMLESS_XZ || FEATURE_SEAMLESS_ZSTD) //config: help //config: With this option tar can automatically detect compressed //config: tarballs. Currently it works only on files (not pipes etc). @@ -787,6 +787,11 @@ static llist_t *append_file_list_to_list(llist_t *list) //usage: "\n --lzma (De)compress using lzma" //usage: ) //usage: ) +//usage: IF_FEATURE_SEAMLESS_ZSTD( +//usage: IF_FEATURE_TAR_LONG_OPTIONS( +//usage: "\n --zstd (De)compress using zstd" +//usage: ) +//usage: ) //usage: "\n -a (De)compress based on extension" //usage: IF_FEATURE_TAR_CREATE( //usage: "\n -h Follow symlinks" @@ -827,6 +832,7 @@ enum { IF_FEATURE_TAR_NOPRESERVE_TIME(OPTBIT_NOPRESERVE_TIME,) #if ENABLE_FEATURE_TAR_LONG_OPTIONS OPTBIT_STRIP_COMPONENTS, + IF_FEATURE_SEAMLESS_ZSTD(OPTBIT_ZSTD ,) IF_FEATURE_SEAMLESS_LZMA(OPTBIT_LZMA ,) OPTBIT_NORECURSION, IF_FEATURE_TAR_TO_COMMAND(OPTBIT_2COMMAND ,) @@ -854,6 +860,7 @@ enum { OPT_AUTOCOMPRESS_BY_EXT = 1 << OPTBIT_AUTOCOMPRESS_BY_EXT, // a OPT_NOPRESERVE_TIME = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m OPT_STRIP_COMPONENTS = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_STRIP_COMPONENTS)) + 0, // strip-components + OPT_ZSTD = IF_FEATURE_TAR_LONG_OPTIONS(IF_FEATURE_SEAMLESS_ZSTD((1 << OPTBIT_ZSTD))) + 0, // zstd OPT_LZMA = IF_FEATURE_TAR_LONG_OPTIONS(IF_FEATURE_SEAMLESS_LZMA((1 << OPTBIT_LZMA))) + 0, // lzma OPT_NORECURSION = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION )) + 0, // no-recursion OPT_2COMMAND = IF_FEATURE_TAR_TO_COMMAND( (1 << OPTBIT_2COMMAND )) + 0, // to-command @@ -861,7 +868,7 @@ enum { OPT_NOPRESERVE_PERM = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions OPT_OVERWRITE = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE )) + 0, // overwrite - OPT_ANY_COMPRESS = (OPT_BZIP2 | OPT_LZMA | OPT_GZIP | OPT_XZ | OPT_COMPRESS), + OPT_ANY_COMPRESS = (OPT_BZIP2 | OPT_LZMA | OPT_GZIP | OPT_XZ | OPT_COMPRESS | OPT_ZSTD), }; #if ENABLE_FEATURE_TAR_LONG_OPTIONS static const char tar_longopts[] ALIGN1 = @@ -901,7 +908,10 @@ static const char tar_longopts[] ALIGN1 = # if ENABLE_FEATURE_TAR_NOPRESERVE_TIME "touch\0" No_argument "m" # endif - "strip-components\0" Required_argument "\xf8" + "strip-components\0" Required_argument "\xf7" +# if ENABLE_FEATURE_SEAMLESS_ZSTD + "zstd\0" No_argument "\xf8" +# endif # if ENABLE_FEATURE_SEAMLESS_LZMA "lzma\0" No_argument "\xf9" # endif @@ -999,7 +1009,7 @@ int tar_main(int argc UNUSED_PARAM, char **argv) IF_FEATURE_SEAMLESS_Z( "Z" ) "a" IF_FEATURE_TAR_NOPRESERVE_TIME("m") - IF_FEATURE_TAR_LONG_OPTIONS("\xf8:") // --strip-components + IF_FEATURE_TAR_LONG_OPTIONS("\xf7:") // --strip-components "\0" "tt:vv:" // count -t,-v #if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM @@ -1009,7 +1019,7 @@ int tar_main(int argc UNUSED_PARAM, char **argv) IF_FEATURE_TAR_CREATE("c--tx:t--cx:x--ct") // mutually exclusive IF_NOT_FEATURE_TAR_CREATE("t--x:x--t") // mutually exclusive #if ENABLE_FEATURE_TAR_LONG_OPTIONS - ":\xf8+" // --strip-components=NUM + ":\xf7+" // --strip-components=NUM #endif LONGOPTS , &base_dir // -C dir @@ -1049,6 +1059,7 @@ int tar_main(int argc UNUSED_PARAM, char **argv) showopt(OPT_AUTOCOMPRESS_BY_EXT); showopt(OPT_NOPRESERVE_TIME ); showopt(OPT_STRIP_COMPONENTS); + showopt(OPT_ZSTD ); showopt(OPT_LZMA ); showopt(OPT_NORECURSION ); showopt(OPT_2COMMAND ); @@ -1170,7 +1181,7 @@ int tar_main(int argc UNUSED_PARAM, char **argv) } else { tar_handle->src_fd = xopen(tar_filename, flags); #if ENABLE_FEATURE_TAR_CREATE - if ((OPT_GZIP | OPT_BZIP2 | OPT_XZ | OPT_LZMA) != 0 /* at least one is config-enabled */ + if ((OPT_GZIP | OPT_BZIP2 | OPT_XZ | OPT_LZMA | OPT_ZSTD) != 0 /* at least one is config-enabled */ && (opt & OPT_AUTOCOMPRESS_BY_EXT) && flags != O_RDONLY ) { @@ -1182,6 +1193,8 @@ int tar_main(int argc UNUSED_PARAM, char **argv) opt |= OPT_XZ; if (OPT_LZMA != 0 && is_suffixed_with(tar_filename, "lzma")) opt |= OPT_LZMA; + if (OPT_ZSTD != 0 && is_suffixed_with(tar_filename, "zst")) + opt |= OPT_ZSTD; } #endif } @@ -1206,6 +1219,8 @@ int tar_main(int argc UNUSED_PARAM, char **argv) zipMode = "lzma"; if (opt & OPT_XZ) zipMode = "xz"; + if (opt & OPT_ZSTD) + zipMode = "zstd"; # endif tbInfo = xzalloc(sizeof(*tbInfo)); tbInfo->tarFd = tar_handle->src_fd; @@ -1246,6 +1261,10 @@ int tar_main(int argc UNUSED_PARAM, char **argv) USE_FOR_MMU(IF_FEATURE_SEAMLESS_XZ(xformer = unpack_xz_stream;)) USE_FOR_NOMMU(xformer_prog = "unxz";) } + if (opt & OPT_ZSTD) { + USE_FOR_MMU(xformer = unpack_zstd_stream;) + USE_FOR_NOMMU(xformer_prog = "unzstd";) + } fork_transformer_with_sig(tar_handle->src_fd, xformer, xformer_prog); /* Can't lseek over pipes */ diff --git a/include/bb_archive.h b/include/bb_archive.h index 1dc77f31d..c6230bcb7 100644 --- a/include/bb_archive.h +++ b/include/bb_archive.h @@ -17,6 +17,9 @@ enum { /* (unsigned) cast suppresses "integer overflow in expression" warning */ XZ_MAGIC1a = 256 * (unsigned)(256 * (256 * 0xfd + '7') + 'z') + 'X', XZ_MAGIC2a = 256 * 'Z' + 0, + /* .zst signature: 0x28, 0xb5, 0x2f, 0xfd */ + ZSTD_MAGIC1 = 256 * 0x28 + 0xb5, + ZSTD_MAGIC2 = 256 * 0x2f + 0xfd, #else COMPRESS_MAGIC = 0x9d1f, GZIP_MAGIC = 0x8b1f, @@ -25,6 +28,8 @@ enum { XZ_MAGIC2 = 'z' + ('X' + ('Z' + 0 * 256) * 256) * 256, XZ_MAGIC1a = 0xfd + ('7' + ('z' + 'X' * 256) * 256) * 256, XZ_MAGIC2a = 'Z' + 0 * 256, + ZSTD_MAGIC1 = 0x28 + 0xb5 * 256, + ZSTD_MAGIC2 = 0x2f + 0xfd * 256, #endif }; @@ -198,6 +203,7 @@ char get_header_tar_gz(archive_handle_t *archive_handle) FAST_FUNC; char get_header_tar_bz2(archive_handle_t *archive_handle) FAST_FUNC; char get_header_tar_lzma(archive_handle_t *archive_handle) FAST_FUNC; char get_header_tar_xz(archive_handle_t *archive_handle) FAST_FUNC; +char get_header_tar_zstd(archive_handle_t *archive_handle) FAST_FUNC; void seek_by_jump(int fd, off_t amount) FAST_FUNC; void seek_by_read(int fd, off_t amount) FAST_FUNC; @@ -255,6 +261,7 @@ IF_DESKTOP(long long) int unpack_gz_stream(transformer_state_t *xstate) FAST_FUN IF_DESKTOP(long long) int unpack_bz2_stream(transformer_state_t *xstate) FAST_FUNC; IF_DESKTOP(long long) int unpack_lzma_stream(transformer_state_t *xstate) FAST_FUNC; IF_DESKTOP(long long) int unpack_xz_stream(transformer_state_t *xstate) FAST_FUNC; +IF_DESKTOP(long long) int unpack_zstd_stream(transformer_state_t *xstate) FAST_FUNC; char* append_ext(char *filename, const char *expected_ext) FAST_FUNC; int bbunpack(char **argv, diff --git a/testsuite/tar.tests b/testsuite/tar.tests index 0f2e89112..d2155add7 100755 --- a/testsuite/tar.tests +++ b/testsuite/tar.tests @@ -244,6 +244,28 @@ AAAEWVo= SKIP= cd .. || exit 1; rm -rf tar.tempdir 2>/dev/null +mkdir tar.tempdir && cd tar.tempdir || exit 1 +# Do we detect ZSTD-compressed data (even w/o .tar.zst extension)? +# (uuencoded hello_world.tar.zst contains one empty file named "hello_world") +# Currently we require an external unzstd program to be available. +optional UUDECODE FEATURE_TAR_AUTODETECT FEATURE_SEAMLESS_ZSTD +command -v unzstd >/dev/null || SKIP=1 +testing "tar extract tar.zst" "\ +uudecode -o input && tar tf input && echo Ok +" "\ +hello_world +Ok +" \ +"" "\ +begin-base64 644 hello_world.tar.zst +KLUv/QRYpQIA5ANoZWxsb193b3JsZAAwMDAwNjQ0ADAwMDE3NTAAMTUxNjA0 +NTQwMzAAMDExNDc3ACAwAHVzdGFyICAAcm15AAcAtAbwMBQOAJMCkBq8wApX +awJ0ak0T/A== +==== +" +SKIP= +cd .. || exit 1; rm -rf tar.tempdir 2>/dev/null + mkdir tar.tempdir && cd tar.tempdir || exit 1 # On extract, everything up to and including last ".." component is stripped optional FEATURE_TAR_CREATE -- 2.53.0 _______________________________________________ busybox mailing list [email protected] https://lists.busybox.net/mailman/listinfo/busybox
