Hi, I kindly request you to review the patch below.
I cloned the git repository and created the patch against something that git-describe calls "release_1_22-36-g63e0925".
The patch introduces a level of indirection between the dedicated compressor selector options and the corresponding executable names. The parametrized executable names default to the previous fixed values. Each one can be changed via a specific long option before a compressor is selected with a dedicated named option. Thus they are suitable for adding to TAR_OPTIONS. They generally look like --COMPRESSOR-filter, as in --bzip2-filter. --------------------------------- changelog ---------------------------------- src/common.h: * Add global variables for the executable names of all 6 explicitly handled compressors. src/tar.c: * Add an option identifier enum for each new option. * Reorder the help display under "Compression options" so that it looks more logical and the new options fit in. Add the new options. * Handle the new options by pointing global variables at (parts of) command line arguments. Filter names can be changed only before a compressor is selected. Filter name changes coming after a compressor was selected would be either ineffective or would enable the user to trick the protection against specifying conflicting compressors. * --gzip, --bzip2 etc. select the executables specified by --gzip-filter, --bzip2-filter etc. Defaults are the previous fixed values. src/buffer.c: * In the "magic" table, used for content-based compressor selection, apply indirection instead of fixed compressor names. (Also fix the bug where the "xz" executable name was missing.) * Adapt the "compress_program" macro. src/suffix.c: * Make the "compression_suffixes" table static as it can be static. * In the "S" macro, the # (stringify) preprocessor operator is replaced by the ## (concat) operator, so that executable names are no more fixed but come from variables. * Adapt the "find_compression_program" function. ---------------------------------- "tests" ----------------------------------- Checking for regressions. $ touch f Named option: $ $TAR -c -v -j -f f.tar.bz2 f f $ $TAR -t -v -j -f f.tar.bz2 -rw------- lacos/lacos 0 2009-10-08 03:29 f External filter: $ $TAR -c -v --use=bzip2 -f f.tar.bz2 f f $ $TAR -t -v --use=bzip2 -f f.tar.bz2 -rw------- lacos/lacos 0 2009-10-08 03:29 f Named option conforming to external filter: $ $TAR -c -v -j --use=bzip2 -f f.tar.bz2 f f $ $TAR -t -v -j --use=bzip2 -f f.tar.bz2 -rw------- lacos/lacos 0 2009-10-08 03:29 f Named option conflicting with external filter: $ $TAR -c -v -j --use=gzip -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -t -v -j --use=gzip -f f.tar.bz2 /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. Named option conflicting with named option: $ $TAR -c -v -j -z -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -t -v -j -z -f f.tar.bz2 /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. External filter conflicting with external filter (this is a bit bogus, because both are bzip2 compressors, but I don't consider this a big problem): $ $TAR -c -v --use=bzip2 --use=lbzip2 -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -t -v --use=bzip2 --use=lbzip2 -f f.tar.bz2 /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. Automatic selection by suffix / contents: $ $TAR -c -v -a -f f.tar.bz2 f f $ $TAR -t -v -f f.tar.bz2 -rw------- lacos/lacos 0 2009-10-08 03:29 f $ file f.tar.bz2 f.tar.bz2: bzip2 compressed data, block size = 900k Automatic selection by suffix overrides --use: $ $TAR -c -v -a --use=gzip -f f.tar.bz2 f f $ file f.tar.bz2 f.tar.bz2: bzip2 compressed data, block size = 900k Automatic selection by suffix yields to --use: $ $TAR -c -v -a --use=gzip -f f.tar.qqq f f $ file f.tar.qqq f.tar.qqq: gzip compressed data, from Unix, last modified: Thu Oct 8 03:31:09 2009, max compression Cannot change filter after selecting method: $ $TAR -c -v --bzip2 --bzip2-filter=lbzip2 -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: can't change alternative filters after selecting a compressor Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -c -v --use=bzip2 --bzip2-filter=lbzip2 -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: can't change alternative filters after selecting a compressor Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. The "tests" were successfully repeated after issuing: $ export TAR_OPTIONS="$TAR_OPTIONS --gzip-filter=pigz --bzip2-filter=lbzip2" $ export LBZIP2_PRINT_STATS=1 With the following differences: -j selects lbzip2 instead of bzip2: $ $TAR -c -v -j --use=bzip2 -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -t -v -j --use=bzip2 -f f.tar.bz2 /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. and lbzip2 printed statistics whenever it was run, ie. in the "named option", "automatic selection by suffix / contents" and "automatic selection by suffix overrides --use" cases. -------------------------- "benchmarks", dual-core --------------------------- (TAR_OPTIONS set as above.) Decompression with lbzip2 (http://lacos.hu/), from a pipe: $ cat /usr/src/linux-source-2.6.26.tar.bz2 | time -p $TAR -t -j >/dev/null real 8.74 user 16.54 sys 0.80 Decompression with bzip2: $ (unset TAR_OPTIONS
time -p $TAR tjf /usr/src/linux-source-2.6.26.tar.bz2 >/dev/null)
real 13.96 user 13.49 sys 0.46 Compression with lbzip2: $ time -p $TAR caf linux.tar.bz2 linux-source-2.6.26/ real 26.75 user 52.31 sys 1.00 Compression with bzip2: $ time -p $TAR -c --use=bzip2 -f linux.tar.bz2 linux-source-2.6.26/ real 52.47 user 51.65 sys 0.83 Compression with pigz (http://www.zlib.net/pigz/): $ time -p $TAR caf linux.tar.gz linux-source-2.6.26/ real 14.34 user 27.89 sys 0.67 Compression with gzip: $ time -p $TAR -c -I gzip -f linux.tar.gz linux-source-2.6.26/ real 22.60 user 22.17 sys 0.43 ---------------------------------- "patch" ----------------------------------- diff --git a/src/common.h b/src/common.h index 0020f08..f110fa0 100644 --- a/src/common.h +++ b/src/common.h @@ -130,6 +130,15 @@ GLOBAL unsigned checkpoint_option; /* Specified name of compression program, or "gzip" as implied by -z. */ GLOBAL const char *use_compress_program_option; +/* Alternative filter programs to execute in place of known compressors. */ +GLOBAL const char + *compress_filter_option, + *gzip_filter_option, + *bzip2_filter_option, + *lzma_filter_option, + *lzop_filter_option, + *xz_filter_option; + GLOBAL bool dereference_option; GLOBAL bool hard_dereference_option; diff --git a/src/tar.c b/src/tar.c index a639974..6571612 100644 --- a/src/tar.c +++ b/src/tar.c @@ -257,6 +257,12 @@ enum ANCHORED_OPTION = CHAR_MAX + 1, ATIME_PRESERVE_OPTION, BACKUP_OPTION, + COMPRESS_FILTER_OPTION, + GZIP_FILTER_OPTION, + BZIP2_FILTER_OPTION, + LZMA_FILTER_OPTION, + LZOP_FILTER_OPTION, + XZ_FILTER_OPTION, CHECK_DEVICE_OPTION, CHECKPOINT_OPTION, CHECKPOINT_ACTION_OPTION, @@ -615,22 +621,34 @@ static struct argp_option options[] = { N_("do not use archive suffix to determine the compression program"), GRID+1 }, {"bzip2", 'j', 0, 0, - N_("filter the archive through bzip2"), GRID+1 }, + N_("filter the archive through bzip2"), GRID+3 }, {"gzip", 'z', 0, 0, - N_("filter the archive through gzip"), GRID+1 }, - {"gunzip", 0, 0, OPTION_ALIAS, NULL, GRID+1 }, - {"ungzip", 0, 0, OPTION_ALIAS, NULL, GRID+1 }, + N_("filter the archive through gzip"), GRID+3 }, + {"gunzip", 0, 0, OPTION_ALIAS, NULL, GRID+3 }, + {"ungzip", 0, 0, OPTION_ALIAS, NULL, GRID+3 }, {"compress", 'Z', 0, 0, - N_("filter the archive through compress"), GRID+1 }, - {"uncompress", 0, 0, OPTION_ALIAS, NULL, GRID+1 }, + N_("filter the archive through compress"), GRID+3 }, + {"uncompress", 0, 0, OPTION_ALIAS, NULL, GRID+3 }, {"lzma", LZMA_OPTION, 0, 0, - N_("filter the archive through lzma"), GRID+1 }, + N_("filter the archive through lzma"), GRID+3 }, {"lzop", LZOP_OPTION, 0, 0, - N_("filter the archive through lzop"), GRID+8 }, + N_("filter the archive through lzop"), GRID+3 }, {"xz", 'J', 0, 0, - N_("filter the archive through xz"), GRID+8 }, + N_("filter the archive through xz"), GRID+3 }, {"use-compress-program", 'I', N_("PROG"), 0, - N_("filter through PROG (must accept -d)"), GRID+1 }, + N_("filter through PROG (must accept -d)"), GRID+5 }, + {"bzip2-filter", BZIP2_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for -j / --bzip2 (must accept -d), eg. lbzip2"), GRID+7 }, + {"gzip-filter", GZIP_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for -z / --gzip (must accept -d), eg. pigz"), GRID+7 }, + {"compress-filter", COMPRESS_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for -Z / --compress (must accept -d)"), GRID+7 }, + {"lzma-filter", LZMA_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for --lzma (must accept -d)"), GRID+7 }, + {"lzop-filter", LZOP_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for --lzop (must accept -d)"), GRID+7 }, + {"xz-filter", XZ_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for -J / --xz (must accept -d)"), GRID+7 }, #undef GRID #define GRID 100 @@ -1440,12 +1458,28 @@ parse_opt (int key, char *arg, struct argp_state *state) ignore_zeros_option = true; break; +#define FOP(filter, FILTER) \ + case FILTER ## _FILTER_OPTION: \ + if (0 != use_compress_program_option) \ + USAGE_ERROR ((0, 0, \ + _("can't change alternative filters after selecting a compressor"))); \ + filter ## _filter_option = arg; \ + break; + + FOP(compress, COMPRESS) + FOP(gzip, GZIP) + FOP(bzip2, BZIP2) + FOP(lzma, LZMA) + FOP(lzop, LZOP) + FOP(xz, XZ) +#undef FOP + case 'j': - set_use_compress_program_option ("bzip2"); + set_use_compress_program_option (bzip2_filter_option); break; case 'J': - set_use_compress_program_option ("xz"); + set_use_compress_program_option (xz_filter_option); break; case 'k': @@ -1489,11 +1523,11 @@ parse_opt (int key, char *arg, struct argp_state *state) break; case LZMA_OPTION: - set_use_compress_program_option ("lzma"); + set_use_compress_program_option (lzma_filter_option); break; case LZOP_OPTION: - set_use_compress_program_option ("lzop"); + set_use_compress_program_option (lzop_filter_option); break; case 'm': @@ -1651,11 +1685,11 @@ parse_opt (int key, char *arg, struct argp_state *state) break; case 'z': - set_use_compress_program_option ("gzip"); + set_use_compress_program_option (gzip_filter_option); break; case 'Z': - set_use_compress_program_option ("compress"); + set_use_compress_program_option (compress_filter_option); break; case ANCHORED_OPTION: @@ -2204,6 +2238,15 @@ decode_options (int argc, char **argv) seek_option = -1; +#define DEFOP(filter) filter ## _filter_option = #filter + DEFOP(compress); + DEFOP(gzip); + DEFOP(bzip2); + DEFOP(lzma); + DEFOP(lzop); + DEFOP(xz); +#undef DEFOP + /* Convert old-style tar call by exploding option element and rearranging options accordingly. */ diff --git a/src/buffer.c b/src/buffer.c index dd97682..a9195fc 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -213,25 +213,25 @@ struct zip_magic enum compress_type type; size_t length; char *magic; - char *program; + const char * const *program; char *option; }; static struct zip_magic const magic[] = { { ct_tar }, { ct_none, }, - { ct_compress, 2, "\037\235", "compress", "-Z" }, - { ct_gzip, 2, "\037\213", "gzip", "-z" }, - { ct_bzip2, 3, "BZh", "bzip2", "-j" }, - { ct_lzma, 6, "\xFFLZMA", "lzma", "--lzma" }, /* FIXME: ???? */ - { ct_lzop, 4, "\211LZO", "lzop", "--lzop" }, - { ct_xz, 6, "\0xFD7zXZ", "-J" }, + { ct_compress, 2, "\037\235", &compress_filter_option, "-Z" }, + { ct_gzip, 2, "\037\213", &gzip_filter_option, "-z" }, + { ct_bzip2, 3, "BZh", &bzip2_filter_option, "-j" }, + { ct_lzma, 6, "\xFFLZMA", &lzma_filter_option, "--lzma" }, /* FIXME: ???? */ + { ct_lzop, 4, "\211LZO", &lzop_filter_option, "--lzop" }, + { ct_xz, 6, "\0xFD7zXZ", &xz_filter_option, "-J" } }; #define NMAGIC (sizeof(magic)/sizeof(magic[0])) #define compress_option(t) magic[t].option -#define compress_program(t) magic[t].program +#define compress_program(t) *magic[t].program /* Check if the file ARCHIVE is a compressed archive. */ enum compress_type diff --git a/src/suffix.c b/src/suffix.c index 6dbc68e..d394f32 100644 --- a/src/suffix.c +++ b/src/suffix.c @@ -23,11 +23,11 @@ struct compression_suffix { const char *suffix; size_t length; - const char *program; + const char * const *program; }; -struct compression_suffix compression_suffixes[] = { -#define S(s,p) #s, sizeof (#s) - 1, #p +static struct compression_suffix compression_suffixes[] = { +#define S(s,p) #s, sizeof (#s) - 1, &p ## _filter_option { S(gz, gzip) }, { S(tgz, gzip) }, { S(taz, gzip) }, @@ -64,7 +64,7 @@ find_compression_program (const char *name, const char *defprog) { if (compression_suffixes[i].length == len && memcmp (compression_suffixes[i].suffix, suf, len) == 0) - return compression_suffixes[i].program; + return *compression_suffixes[i].program; } } return defprog;
