Hello Bruno, all, For some invocations, gnulib-tool is rather slow[1], which impacts productivity a bit. The following tries to improve things without sacrificing portability, while hopefully it doesn't compromise readability. ;-)
First, `sort -u' is already used in gnulib-tool, so it would seem only consistent to drop `uniq' where possible, even as it does not lead to a noticeable speedup. Then, the transitive closure algorithm need not walk any module names in the dependency graph more than once. (Looking up and checking a module is rather expensive due to the file operations and fork&exec involved.) Also, the rewriting of file names may be done in batch. Note that this may cause the loop that calls `func_add_or_update' to be executed in a subshell, but this shouldn't matter: the function does not change any state. (Should I provide a similar change for func_create_testdir?) And the include directives need only be read once. What do you think? Cheers, Ralf [1] Times taken on a modern system with: time gnulib-tool --import acl alloca argmatch assert backupfile \ base64 c-strtold calloc canon-host canonicalize chown config-h \ closeout cycle-check d-ino d-type diacrit dirfd dirname error \ euidaccess exclude exitfail fcntl fcntl-safer fdl fileblocks \ filemode filenamecat fnmatch-gnu fprintftime fsusage ftruncate \ fts getdate getgroups getline getloadavg getndelim2 getopt \ getpagesize gettext gettime gettimeofday getugroups getusershell \ group-member hard-locale hash hash-pjw host-os human idcache \ inttostr inttypes lchmod lchown lib-ignore before: 43.07user 25.70system 1:00.87elapsed 112%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+5660467minor)pagefaults 0swaps after: 18.70user 9.34system 0:26.49elapsed 105%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+1995848minor)pagefaults 0swaps PS: For the bootstrapping of gettext, it saves roughly 40s. PPS: should the number of files used by a project exceed a few hundred, it may make sense to exploit bash3's `+=' with a func_append, in order to avoid the quadratic scaling associated with string enlargement: http://lists.gnu.org/archive/html/libtool-patches/2006-05/msg00016.html
* gnulib-tool: Change `sort | uniq' to `sort -u' where appropriate. Index: gnulib-tool =================================================================== RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v retrieving revision 1.160 diff -u -r1.160 gnulib-tool --- gnulib-tool 13 Sep 2006 15:00:47 -0000 1.160 +++ gnulib-tool 14 Sep 2006 22:52:08 -0000 @@ -625,7 +625,7 @@ }' eval `sed -n -e "$my_sed_traces" < "$configure_ac"` if test -n "$prereqs"; then - autoconf_minversion=`for version in $prereqs; do echo $version; done | $SORT -g | uniq | tail -1` + autoconf_minversion=`for version in $prereqs; do echo $version; done | $SORT -g -u | tail -1` fi fi if test -z "$autoconf_minversion"; then @@ -737,8 +737,7 @@ } \ | sed -e '/^CVS$/d' -e '/^ChangeLog$/d' -e '/^COPYING$/d' -e '/^README$/d' -e '/^TEMPLATE$/d' -e '/^TEMPLATE-TESTS$/d' -e '/~$/d' \ | sed -e '/-tests$/d' \ - | LC_ALL=C sort \ - | LC_ALL=C uniq + | LC_ALL=C sort -u } # func_verify_module @@ -1019,7 +1018,7 @@ files="$files "`func_get_filelist $module` fi done - files=`for f in $files; do echo $f; done | LC_ALL=C sort | LC_ALL=C uniq` + files=`for f in $files; do echo $f; done | LC_ALL=C sort -u` } # func_emit_lib_Makefile_am @@ -1423,7 +1422,7 @@ fi # Canonicalize the list of specified modules. - specified_modules=`for m in $specified_modules; do echo $m; done | LC_ALL=C sort | LC_ALL=C uniq` + specified_modules=`for m in $specified_modules; do echo $m; done | LC_ALL=C sort -u` # Determine final module list. modules="$specified_modules" @@ -2011,7 +2010,7 @@ { testdir="$1" modules="$2" - modules=`for m in $modules; do echo $m; done | LC_ALL=C sort | LC_ALL=C uniq` + modules=`for m in $modules; do echo $m; done | LC_ALL=C sort -u` # Subdirectory names. sourcebase=gllib @@ -2467,7 +2466,7 @@ sedexpr1='s,^m4_include(\[\(.*\)])$,\1,p' sedexpr2='s,^[^/]*$,.,' sedexpr3='s,/[^/]*$,,' - m4dirs=`sed -n -e "$sedexpr1" aclocal.m4 | sed -e "$sedexpr2" -e "$sedexpr3" | LC_ALL=C sort | LC_ALL=C uniq` + m4dirs=`sed -n -e "$sedexpr1" aclocal.m4 | sed -e "$sedexpr2" -e "$sedexpr3" | LC_ALL=C sort -u` m4dirs_count=`echo "$m4dirs" | wc -l` fi fi
* gnulib-tool (func_modules_transitive_closure): Rewrite to not check already-checked module names again. Index: gnulib-tool =================================================================== RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v retrieving revision 1.160 diff -u -r1.160 gnulib-tool --- gnulib-tool 13 Sep 2006 15:00:47 -0000 1.160 +++ gnulib-tool 14 Sep 2006 22:52:08 -0000 @@ -928,45 +929,41 @@ # - modules list of modules, including dependencies func_modules_transitive_closure () { - while true; do - xmodules= - for module in $modules; do + oldmodules= # modules (valid or not) that have been checked + newmodules=$modules # modules we still have to check + modules= # accepted modules + while test -n "$newmodules"; do + oldmodules=`for m in $oldmodules $newmodules; do echo $m; done | LC_ALL=C sort -u` + for module in : $newmodules; do + if test "$module" = :; then + newmodules= + continue + fi func_verify_module if test -n "$module"; then - # Duplicate dependencies are harmless, but Jim wants a warning. - duplicated_deps=`func_get_dependencies $module | LC_ALL=C sort | LC_ALL=C uniq -d` - if test -n "$duplicated_deps"; then - echo "warning: module $module has duplicated dependencies: "`echo $duplicated_deps` 1>&2 - fi if func_acceptable $module; then - xmodules="$xmodules $module" - for depmodule in `func_get_dependencies $module`; do - if func_acceptable $depmodule; then - xmodules="$xmodules $depmodule" - fi - done + modules="$modules $module" + deps=`func_get_dependencies $module` + # Duplicate dependencies are harmless, but Jim wants a warning. + duplicated_deps=`echo "$deps" | LC_ALL=C sort | LC_ALL=C uniq -d` + if test -n "$duplicated_deps"; then + echo "warning: module $module has duplicated dependencies: "`echo $duplicated_deps` 1>&2 + fi + newmodules="$newmodules $deps" if test -n "$inctests"; then testsmodule=`func_get_tests_module $module` if test -n "$testsmodule"; then - if func_acceptable $testsmodule; then - xmodules="$xmodules $testsmodule" - for depmodule in `func_get_dependencies $testsmodule`; do - if func_acceptable $depmodule; then - xmodules="$xmodules $depmodule" - fi - done - fi + newmodules="$newmodules $testsmodule" fi fi fi fi done - xmodules=`for m in $xmodules; do echo $m; done | LC_ALL=C sort | LC_ALL=C uniq` - if test "$xmodules" = "$modules"; then - break - fi - modules="$xmodules" + for m in $newmodules; do echo $m; done | LC_ALL=C sort -u >"$tmp"/new-modules + newmodules=`echo "$oldmodules" | LC_ALL=C join -v 2 - "$tmp"/new-modules` done + modules=`for m in $modules; do echo $m; done | LC_ALL=C sort -u` + rm -f "$tmp"/new-modules } # func_modules_add_dummy
* gnulib-tool (func_import): Rewrite all old_files at once when appropriate; likewise for new_files, and the input to func_add_or_update. Index: gnulib-tool =================================================================== RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v retrieving revision 1.160 diff -u -r1.160 gnulib-tool --- gnulib-tool 13 Sep 2006 15:00:47 -0000 1.160 +++ gnulib-tool 14 Sep 2006 23:00:53 -0000 @@ -1546,28 +1549,28 @@ # Copy files or make symbolic links. Remove obsolete files. delimiter=' ' + sed_rewrite_old_files=" + s,^build-aux/,$auxdir/, + s,^doc/,$cached_docbase/, + s,^lib/,$cached_sourcebase/, + s,^m4/,$cached_m4base/, + s,^tests/,$cached_testsbase/," + sed_rewrite_new_files=" + s,^build-aux/,$auxdir/, + s,^doc/,$docbase/, + s,^lib/,$sourcebase/, + s,^m4/,$m4base/, + s,^tests/,$testsbase/," for f in $old_files; do - case "$f" in - build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;; - doc/*) g=`echo "$f" | sed -e "s,^doc/,$cached_docbase/,"` ;; - lib/*) g=`echo "$f" | sed -e "s,^lib/,$cached_sourcebase/,"` ;; - m4/*) g=`echo "$f" | sed -e "s,^m4/,$cached_m4base/,"` ;; - tests/*) g=`echo "$f" | sed -e "s,^tests/,$cached_testsbase/,"` ;; - *) g="$f" ;; - esac - echo "$g""$delimiter""$f" - done | LC_ALL=C sort > "$tmp"/old-files + echo $f + done | + sed -e "s,.*,&$delimiter&," -e "$sed_rewrite_old_files" | + LC_ALL=C sort > "$tmp"/old-files for f in $new_files; do - case "$f" in - build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;; - doc/*) g=`echo "$f" | sed -e "s,^doc/,$docbase/,"` ;; - lib/*) g=`echo "$f" | sed -e "s,^lib/,$sourcebase/,"` ;; - m4/*) g=`echo "$f" | sed -e "s,^m4/,$m4base/,"` ;; - tests/*) g=`echo "$f" | sed -e "s,^tests/,$testsbase/,"` ;; - *) g="$f" ;; - esac - echo "$g""$delimiter""$f" - done | LC_ALL=C sort > "$tmp"/new-files + echo $f + done | + sed -e "s,.*,&$delimiter&," -e "$sed_rewrite_new_files" | + LC_ALL=C sort > "$tmp"/new-files # First the files that are in old-files, but not in new-files: sed_take_first_column='s,'"$delimiter"'.*,,' for g in `LC_ALL=C join -t"$delimiter" -v1 "$tmp"/old-files "$tmp"/new-files | sed -e "$sed_take_first_column"`; do @@ -1644,30 +1647,20 @@ # Then the files that are in new-files, but not in old-files: sed_take_last_column='s,^.*'"$delimiter"',,' already_present= - for f in `LC_ALL=C join -t"$delimiter" -v2 "$tmp"/old-files "$tmp"/new-files | sed -e "$sed_take_last_column"`; do - case "$f" in - build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;; - doc/*) g=`echo "$f" | sed -e "s,^doc/,$docbase/,"` ;; - lib/*) g=`echo "$f" | sed -e "s,^lib/,$sourcebase/,"` ;; - m4/*) g=`echo "$f" | sed -e "s,^m4/,$m4base/,"` ;; - tests/*) g=`echo "$f" | sed -e "s,^tests/,$testsbase/,"` ;; - *) g="$f" ;; - esac - func_add_or_update - done + LC_ALL=C join -t"$delimiter" -v2 "$tmp"/old-files "$tmp"/new-files | + sed -e "$sed_take_last_column" | + sed -e "s,.*,& &," -e "$sed_rewrite_new_files" | + while read g f; do + func_add_or_update + done # Then the files that are in new-files and in old-files: already_present=true - for f in `LC_ALL=C join -t"$delimiter" "$tmp"/old-files "$tmp"/new-files | sed -e "$sed_take_last_column"`; do - case "$f" in - build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;; - doc/*) g=`echo "$f" | sed -e "s,^doc/,$docbase/,"` ;; - lib/*) g=`echo "$f" | sed -e "s,^lib/,$sourcebase/,"` ;; - m4/*) g=`echo "$f" | sed -e "s,^m4/,$m4base/,"` ;; - tests/*) g=`echo "$f" | sed -e "s,^tests/,$testsbase/,"` ;; - *) g="$f" ;; - esac - func_add_or_update - done + LC_ALL=C join -t"$delimiter" "$tmp"/old-files "$tmp"/new-files | + sed -e "$sed_take_last_column" | + sed -e "s,.*,& &," -e "$sed_rewrite_new_files" | + while read g f; do + func_add_or_update + done # Command-line invocation printed in a comment in generated gnulib-cache.m4. actioncmd="gnulib-tool --import"
* gnulib-tool (nl): New variable. (func_import): Rewrite include directive extraction to only read each directive once. Index: gnulib-tool =================================================================== RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v retrieving revision 1.160 diff -u -r1.160 gnulib-tool --- gnulib-tool 13 Sep 2006 15:00:47 -0000 1.160 +++ gnulib-tool 14 Sep 2006 22:57:12 -0000 @@ -25,6 +25,8 @@ cvsdatestamp='$Date: 2006/09/13 15:00:47 $' last_checkin_date=`echo "$cvsdatestamp" | sed -e 's,^\$[D]ate: ,,'` version=`echo "$last_checkin_date" | sed -e 's/ .*$//' -e 's,/,-,g'` +nl=' +' # You can set AUTOCONFPATH to empty if autoconf 2.57 is already in your PATH. AUTOCONFPATH= @@ -1949,29 +1944,30 @@ echo echo "You may need to add #include directives for the following .h files." ( - # First the #include <...> directives without #ifs, sorted for convenience. + # First the #include <...> directives without #ifs, sorted for convenience, + # then the #include "..." directives without #ifs, sorted for convenience, + # then the #include directives that are surrounded by #ifs. Not sorted. + : >"$tmp"/include-lt + : >"$tmp"/include-q + : >"$tmp"/include-if for module in $modules; do - if func_get_include_directive "$module" | grep '^#if' >/dev/null; then - : - else - func_get_include_directive "$module" | grep -v 'include "' - fi - done | LC_ALL=C sort -u - # Then the #include "..." directives without #ifs, sorted for convenience. - for module in $modules; do - if func_get_include_directive "$module" | grep '^#if' >/dev/null; then - : - else - func_get_include_directive "$module" | grep 'include "' - fi - done | LC_ALL=C sort -u - # Then the #include directives that are surrounded by #ifs. Not sorted. - for module in $modules; do - if func_get_include_directive "$module" | grep '^#if' >/dev/null; then - func_get_include_directive "$module" - fi + include_directive=`func_get_include_directive "$module"` + case $nl$include_directive in + *$nl\#if*) + echo "$include_directive" >> "$tmp"/include-if + ;; + *) + echo "$include_directive" | grep -v 'include "' >>"$tmp"/include-lt + echo "$include_directive" | grep 'include "' >>"$tmp"/include-q + ;; + esac done - ) | sed -e '/^$/d;' -e 's/^/ /' + LC_ALL=C sort -u "$tmp"/include-lt + LC_ALL=C sort -u "$tmp"/include-q + cat "$tmp"/include-if + ) | sed -e '/^$/d' -e 's/^/ /' + rm -f "$tmp"/include-lt "$tmp"/include-q "$tmp"/include-if + echo echo "Don't forget to" if test "$makefile_am" = Makefile.am; then