Hello Bruno, all,
For some invocations, gnulib-tool is rather slow[1], which impacts
productivity a bit. The following tries to improve things without
sacrificing portability, while hopefully it doesn't compromise
readability. ;-)
First, `sort -u' is already used in gnulib-tool, so it would seem only
consistent to drop `uniq' where possible, even as it does not lead to a
noticeable speedup.
Then, the transitive closure algorithm need not walk any module names in
the dependency graph more than once. (Looking up and checking a module
is rather expensive due to the file operations and fork&exec involved.)
Also, the rewriting of file names may be done in batch. Note that this
may cause the loop that calls `func_add_or_update' to be executed in a
subshell, but this shouldn't matter: the function does not change any
state. (Should I provide a similar change for func_create_testdir?)
And the include directives need only be read once.
What do you think?
Cheers,
Ralf
[1] Times taken on a modern system with:
time gnulib-tool --import acl alloca argmatch assert backupfile \
base64 c-strtold calloc canon-host canonicalize chown config-h \
closeout cycle-check d-ino d-type diacrit dirfd dirname error \
euidaccess exclude exitfail fcntl fcntl-safer fdl fileblocks \
filemode filenamecat fnmatch-gnu fprintftime fsusage ftruncate \
fts getdate getgroups getline getloadavg getndelim2 getopt \
getpagesize gettext gettime gettimeofday getugroups getusershell \
group-member hard-locale hash hash-pjw host-os human idcache \
inttostr inttypes lchmod lchown lib-ignore
before:
43.07user 25.70system 1:00.87elapsed 112%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+5660467minor)pagefaults 0swaps
after:
18.70user 9.34system 0:26.49elapsed 105%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+1995848minor)pagefaults 0swaps
PS: For the bootstrapping of gettext, it saves roughly 40s.
PPS: should the number of files used by a project exceed a few hundred,
it may make sense to exploit bash3's `+=' with a func_append, in order
to avoid the quadratic scaling associated with string enlargement:
http://lists.gnu.org/archive/html/libtool-patches/2006-05/msg00016.html
* gnulib-tool: Change `sort | uniq' to `sort -u' where
appropriate.
Index: gnulib-tool
===================================================================
RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v
retrieving revision 1.160
diff -u -r1.160 gnulib-tool
--- gnulib-tool 13 Sep 2006 15:00:47 -0000 1.160
+++ gnulib-tool 14 Sep 2006 22:52:08 -0000
@@ -625,7 +625,7 @@
}'
eval `sed -n -e "$my_sed_traces" < "$configure_ac"`
if test -n "$prereqs"; then
- autoconf_minversion=`for version in $prereqs; do echo $version; done |
$SORT -g | uniq | tail -1`
+ autoconf_minversion=`for version in $prereqs; do echo $version; done |
$SORT -g -u | tail -1`
fi
fi
if test -z "$autoconf_minversion"; then
@@ -737,8 +737,7 @@
} \
| sed -e '/^CVS$/d' -e '/^ChangeLog$/d' -e '/^COPYING$/d' -e
'/^README$/d' -e '/^TEMPLATE$/d' -e '/^TEMPLATE-TESTS$/d' -e '/~$/d' \
| sed -e '/-tests$/d' \
- | LC_ALL=C sort \
- | LC_ALL=C uniq
+ | LC_ALL=C sort -u
}
# func_verify_module
@@ -1019,7 +1018,7 @@
files="$files "`func_get_filelist $module`
fi
done
- files=`for f in $files; do echo $f; done | LC_ALL=C sort | LC_ALL=C uniq`
+ files=`for f in $files; do echo $f; done | LC_ALL=C sort -u`
}
# func_emit_lib_Makefile_am
@@ -1423,7 +1422,7 @@
fi
# Canonicalize the list of specified modules.
- specified_modules=`for m in $specified_modules; do echo $m; done | LC_ALL=C
sort | LC_ALL=C uniq`
+ specified_modules=`for m in $specified_modules; do echo $m; done | LC_ALL=C
sort -u`
# Determine final module list.
modules="$specified_modules"
@@ -2011,7 +2010,7 @@
{
testdir="$1"
modules="$2"
- modules=`for m in $modules; do echo $m; done | LC_ALL=C sort | LC_ALL=C uniq`
+ modules=`for m in $modules; do echo $m; done | LC_ALL=C sort -u`
# Subdirectory names.
sourcebase=gllib
@@ -2467,7 +2466,7 @@
sedexpr1='s,^m4_include(\[\(.*\)])$,\1,p'
sedexpr2='s,^[^/]*$,.,'
sedexpr3='s,/[^/]*$,,'
- m4dirs=`sed -n -e "$sedexpr1" aclocal.m4 | sed -e "$sedexpr2" -e
"$sedexpr3" | LC_ALL=C sort | LC_ALL=C uniq`
+ m4dirs=`sed -n -e "$sedexpr1" aclocal.m4 | sed -e "$sedexpr2" -e
"$sedexpr3" | LC_ALL=C sort -u`
m4dirs_count=`echo "$m4dirs" | wc -l`
fi
fi
* gnulib-tool (func_modules_transitive_closure): Rewrite to not
check already-checked module names again.
Index: gnulib-tool
===================================================================
RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v
retrieving revision 1.160
diff -u -r1.160 gnulib-tool
--- gnulib-tool 13 Sep 2006 15:00:47 -0000 1.160
+++ gnulib-tool 14 Sep 2006 22:52:08 -0000
@@ -928,45 +929,41 @@
# - modules list of modules, including dependencies
func_modules_transitive_closure ()
{
- while true; do
- xmodules=
- for module in $modules; do
+ oldmodules= # modules (valid or not) that have been checked
+ newmodules=$modules # modules we still have to check
+ modules= # accepted modules
+ while test -n "$newmodules"; do
+ oldmodules=`for m in $oldmodules $newmodules; do echo $m; done | LC_ALL=C
sort -u`
+ for module in : $newmodules; do
+ if test "$module" = :; then
+ newmodules=
+ continue
+ fi
func_verify_module
if test -n "$module"; then
- # Duplicate dependencies are harmless, but Jim wants a warning.
- duplicated_deps=`func_get_dependencies $module | LC_ALL=C sort |
LC_ALL=C uniq -d`
- if test -n "$duplicated_deps"; then
- echo "warning: module $module has duplicated dependencies: "`echo
$duplicated_deps` 1>&2
- fi
if func_acceptable $module; then
- xmodules="$xmodules $module"
- for depmodule in `func_get_dependencies $module`; do
- if func_acceptable $depmodule; then
- xmodules="$xmodules $depmodule"
- fi
- done
+ modules="$modules $module"
+ deps=`func_get_dependencies $module`
+ # Duplicate dependencies are harmless, but Jim wants a warning.
+ duplicated_deps=`echo "$deps" | LC_ALL=C sort | LC_ALL=C uniq -d`
+ if test -n "$duplicated_deps"; then
+ echo "warning: module $module has duplicated dependencies: "`echo
$duplicated_deps` 1>&2
+ fi
+ newmodules="$newmodules $deps"
if test -n "$inctests"; then
testsmodule=`func_get_tests_module $module`
if test -n "$testsmodule"; then
- if func_acceptable $testsmodule; then
- xmodules="$xmodules $testsmodule"
- for depmodule in `func_get_dependencies $testsmodule`; do
- if func_acceptable $depmodule; then
- xmodules="$xmodules $depmodule"
- fi
- done
- fi
+ newmodules="$newmodules $testsmodule"
fi
fi
fi
fi
done
- xmodules=`for m in $xmodules; do echo $m; done | LC_ALL=C sort | LC_ALL=C
uniq`
- if test "$xmodules" = "$modules"; then
- break
- fi
- modules="$xmodules"
+ for m in $newmodules; do echo $m; done | LC_ALL=C sort -u
>"$tmp"/new-modules
+ newmodules=`echo "$oldmodules" | LC_ALL=C join -v 2 - "$tmp"/new-modules`
done
+ modules=`for m in $modules; do echo $m; done | LC_ALL=C sort -u`
+ rm -f "$tmp"/new-modules
}
# func_modules_add_dummy
* gnulib-tool (func_import): Rewrite all old_files at once when
appropriate; likewise for new_files, and the input to
func_add_or_update.
Index: gnulib-tool
===================================================================
RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v
retrieving revision 1.160
diff -u -r1.160 gnulib-tool
--- gnulib-tool 13 Sep 2006 15:00:47 -0000 1.160
+++ gnulib-tool 14 Sep 2006 23:00:53 -0000
@@ -1546,28 +1549,28 @@
# Copy files or make symbolic links. Remove obsolete files.
delimiter=' '
+ sed_rewrite_old_files="
+ s,^build-aux/,$auxdir/,
+ s,^doc/,$cached_docbase/,
+ s,^lib/,$cached_sourcebase/,
+ s,^m4/,$cached_m4base/,
+ s,^tests/,$cached_testsbase/,"
+ sed_rewrite_new_files="
+ s,^build-aux/,$auxdir/,
+ s,^doc/,$docbase/,
+ s,^lib/,$sourcebase/,
+ s,^m4/,$m4base/,
+ s,^tests/,$testsbase/,"
for f in $old_files; do
- case "$f" in
- build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;;
- doc/*) g=`echo "$f" | sed -e "s,^doc/,$cached_docbase/,"` ;;
- lib/*) g=`echo "$f" | sed -e "s,^lib/,$cached_sourcebase/,"` ;;
- m4/*) g=`echo "$f" | sed -e "s,^m4/,$cached_m4base/,"` ;;
- tests/*) g=`echo "$f" | sed -e "s,^tests/,$cached_testsbase/,"` ;;
- *) g="$f" ;;
- esac
- echo "$g""$delimiter""$f"
- done | LC_ALL=C sort > "$tmp"/old-files
+ echo $f
+ done |
+ sed -e "s,.*,&$delimiter&," -e "$sed_rewrite_old_files" |
+ LC_ALL=C sort > "$tmp"/old-files
for f in $new_files; do
- case "$f" in
- build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;;
- doc/*) g=`echo "$f" | sed -e "s,^doc/,$docbase/,"` ;;
- lib/*) g=`echo "$f" | sed -e "s,^lib/,$sourcebase/,"` ;;
- m4/*) g=`echo "$f" | sed -e "s,^m4/,$m4base/,"` ;;
- tests/*) g=`echo "$f" | sed -e "s,^tests/,$testsbase/,"` ;;
- *) g="$f" ;;
- esac
- echo "$g""$delimiter""$f"
- done | LC_ALL=C sort > "$tmp"/new-files
+ echo $f
+ done |
+ sed -e "s,.*,&$delimiter&," -e "$sed_rewrite_new_files" |
+ LC_ALL=C sort > "$tmp"/new-files
# First the files that are in old-files, but not in new-files:
sed_take_first_column='s,'"$delimiter"'.*,,'
for g in `LC_ALL=C join -t"$delimiter" -v1 "$tmp"/old-files "$tmp"/new-files
| sed -e "$sed_take_first_column"`; do
@@ -1644,30 +1647,20 @@
# Then the files that are in new-files, but not in old-files:
sed_take_last_column='s,^.*'"$delimiter"',,'
already_present=
- for f in `LC_ALL=C join -t"$delimiter" -v2 "$tmp"/old-files "$tmp"/new-files
| sed -e "$sed_take_last_column"`; do
- case "$f" in
- build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;;
- doc/*) g=`echo "$f" | sed -e "s,^doc/,$docbase/,"` ;;
- lib/*) g=`echo "$f" | sed -e "s,^lib/,$sourcebase/,"` ;;
- m4/*) g=`echo "$f" | sed -e "s,^m4/,$m4base/,"` ;;
- tests/*) g=`echo "$f" | sed -e "s,^tests/,$testsbase/,"` ;;
- *) g="$f" ;;
- esac
- func_add_or_update
- done
+ LC_ALL=C join -t"$delimiter" -v2 "$tmp"/old-files "$tmp"/new-files |
+ sed -e "$sed_take_last_column" |
+ sed -e "s,.*,& &," -e "$sed_rewrite_new_files" |
+ while read g f; do
+ func_add_or_update
+ done
# Then the files that are in new-files and in old-files:
already_present=true
- for f in `LC_ALL=C join -t"$delimiter" "$tmp"/old-files "$tmp"/new-files |
sed -e "$sed_take_last_column"`; do
- case "$f" in
- build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;;
- doc/*) g=`echo "$f" | sed -e "s,^doc/,$docbase/,"` ;;
- lib/*) g=`echo "$f" | sed -e "s,^lib/,$sourcebase/,"` ;;
- m4/*) g=`echo "$f" | sed -e "s,^m4/,$m4base/,"` ;;
- tests/*) g=`echo "$f" | sed -e "s,^tests/,$testsbase/,"` ;;
- *) g="$f" ;;
- esac
- func_add_or_update
- done
+ LC_ALL=C join -t"$delimiter" "$tmp"/old-files "$tmp"/new-files |
+ sed -e "$sed_take_last_column" |
+ sed -e "s,.*,& &," -e "$sed_rewrite_new_files" |
+ while read g f; do
+ func_add_or_update
+ done
# Command-line invocation printed in a comment in generated gnulib-cache.m4.
actioncmd="gnulib-tool --import"
* gnulib-tool (nl): New variable.
(func_import): Rewrite include directive extraction to only
read each directive once.
Index: gnulib-tool
===================================================================
RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v
retrieving revision 1.160
diff -u -r1.160 gnulib-tool
--- gnulib-tool 13 Sep 2006 15:00:47 -0000 1.160
+++ gnulib-tool 14 Sep 2006 22:57:12 -0000
@@ -25,6 +25,8 @@
cvsdatestamp='$Date: 2006/09/13 15:00:47 $'
last_checkin_date=`echo "$cvsdatestamp" | sed -e 's,^\$[D]ate: ,,'`
version=`echo "$last_checkin_date" | sed -e 's/ .*$//' -e 's,/,-,g'`
+nl='
+'
# You can set AUTOCONFPATH to empty if autoconf 2.57 is already in your PATH.
AUTOCONFPATH=
@@ -1949,29 +1944,30 @@
echo
echo "You may need to add #include directives for the following .h files."
(
- # First the #include <...> directives without #ifs, sorted for convenience.
+ # First the #include <...> directives without #ifs, sorted for convenience,
+ # then the #include "..." directives without #ifs, sorted for convenience,
+ # then the #include directives that are surrounded by #ifs. Not sorted.
+ : >"$tmp"/include-lt
+ : >"$tmp"/include-q
+ : >"$tmp"/include-if
for module in $modules; do
- if func_get_include_directive "$module" | grep '^#if' >/dev/null; then
- :
- else
- func_get_include_directive "$module" | grep -v 'include "'
- fi
- done | LC_ALL=C sort -u
- # Then the #include "..." directives without #ifs, sorted for convenience.
- for module in $modules; do
- if func_get_include_directive "$module" | grep '^#if' >/dev/null; then
- :
- else
- func_get_include_directive "$module" | grep 'include "'
- fi
- done | LC_ALL=C sort -u
- # Then the #include directives that are surrounded by #ifs. Not sorted.
- for module in $modules; do
- if func_get_include_directive "$module" | grep '^#if' >/dev/null; then
- func_get_include_directive "$module"
- fi
+ include_directive=`func_get_include_directive "$module"`
+ case $nl$include_directive in
+ *$nl\#if*)
+ echo "$include_directive" >> "$tmp"/include-if
+ ;;
+ *)
+ echo "$include_directive" | grep -v 'include "' >>"$tmp"/include-lt
+ echo "$include_directive" | grep 'include "' >>"$tmp"/include-q
+ ;;
+ esac
done
- ) | sed -e '/^$/d;' -e 's/^/ /'
+ LC_ALL=C sort -u "$tmp"/include-lt
+ LC_ALL=C sort -u "$tmp"/include-q
+ cat "$tmp"/include-if
+ ) | sed -e '/^$/d' -e 's/^/ /'
+ rm -f "$tmp"/include-lt "$tmp"/include-q "$tmp"/include-if
+
echo
echo "Don't forget to"
if test "$makefile_am" = Makefile.am; then