Hello Bruno, all,

For some invocations, gnulib-tool is rather slow[1], which impacts
productivity a bit.  The following tries to improve things without
sacrificing portability, while hopefully it doesn't compromise
readability.  ;-)

First, `sort -u' is already used in gnulib-tool, so it would seem only
consistent to drop `uniq' where possible, even as it does not lead to a
noticeable speedup.

Then, the transitive closure algorithm need not walk any module names in
the dependency graph more than once.  (Looking up and checking a module
is rather expensive due to the file operations and fork&exec involved.)

Also, the rewriting of file names may be done in batch.  Note that this
may cause the loop that calls `func_add_or_update' to be executed in a
subshell, but this shouldn't matter: the function does not change any
state.  (Should I provide a similar change for func_create_testdir?)

And the include directives need only be read once.

What do you think?

Cheers,
Ralf

[1] Times taken on a modern system with:
  time gnulib-tool --import acl alloca argmatch assert backupfile \
    base64 c-strtold calloc canon-host canonicalize chown config-h \
    closeout cycle-check d-ino d-type diacrit dirfd dirname error \
    euidaccess exclude exitfail fcntl fcntl-safer fdl fileblocks  \
    filemode filenamecat fnmatch-gnu fprintftime fsusage ftruncate \
    fts getdate getgroups getline getloadavg getndelim2 getopt \
    getpagesize gettext gettime gettimeofday getugroups getusershell \
    group-member hard-locale hash hash-pjw host-os human idcache \
    inttostr inttypes lchmod lchown lib-ignore

before:
43.07user 25.70system 1:00.87elapsed 112%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+5660467minor)pagefaults 0swaps

after:
18.70user 9.34system 0:26.49elapsed 105%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+1995848minor)pagefaults 0swaps

PS: For the bootstrapping of gettext, it saves roughly 40s.

PPS: should the number of files used by a project exceed a few hundred,
it may make sense to exploit bash3's `+=' with a func_append, in order
to avoid the quadratic scaling associated with string enlargement:
http://lists.gnu.org/archive/html/libtool-patches/2006-05/msg00016.html
        * gnulib-tool: Change `sort | uniq' to `sort -u' where
        appropriate.

Index: gnulib-tool
===================================================================
RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v
retrieving revision 1.160
diff -u -r1.160 gnulib-tool
--- gnulib-tool 13 Sep 2006 15:00:47 -0000      1.160
+++ gnulib-tool 14 Sep 2006 22:52:08 -0000
@@ -625,7 +625,7 @@
       }'
     eval `sed -n -e "$my_sed_traces" < "$configure_ac"`
     if test -n "$prereqs"; then
-      autoconf_minversion=`for version in $prereqs; do echo $version; done | 
$SORT -g | uniq | tail -1`
+      autoconf_minversion=`for version in $prereqs; do echo $version; done | 
$SORT -g -u | tail -1`
     fi
   fi
   if test -z "$autoconf_minversion"; then
@@ -737,8 +737,7 @@
   } \
       | sed -e '/^CVS$/d' -e '/^ChangeLog$/d' -e '/^COPYING$/d' -e 
'/^README$/d' -e '/^TEMPLATE$/d' -e '/^TEMPLATE-TESTS$/d' -e '/~$/d' \
       | sed -e '/-tests$/d' \
-      | LC_ALL=C sort \
-      | LC_ALL=C uniq
+      | LC_ALL=C sort -u
 }
 
 # func_verify_module
@@ -1019,7 +1018,7 @@
       files="$files "`func_get_filelist $module`
     fi
   done
-  files=`for f in $files; do echo $f; done | LC_ALL=C sort | LC_ALL=C uniq`
+  files=`for f in $files; do echo $f; done | LC_ALL=C sort -u`
 }
 
 # func_emit_lib_Makefile_am
@@ -1423,7 +1422,7 @@
   fi
 
   # Canonicalize the list of specified modules.
-  specified_modules=`for m in $specified_modules; do echo $m; done | LC_ALL=C 
sort | LC_ALL=C uniq`
+  specified_modules=`for m in $specified_modules; do echo $m; done | LC_ALL=C 
sort -u`
 
   # Determine final module list.
   modules="$specified_modules"
@@ -2011,7 +2010,7 @@
 {
   testdir="$1"
   modules="$2"
-  modules=`for m in $modules; do echo $m; done | LC_ALL=C sort | LC_ALL=C uniq`
+  modules=`for m in $modules; do echo $m; done | LC_ALL=C sort -u`
 
   # Subdirectory names.
   sourcebase=gllib
@@ -2467,7 +2466,7 @@
           sedexpr1='s,^m4_include(\[\(.*\)])$,\1,p'
           sedexpr2='s,^[^/]*$,.,'
           sedexpr3='s,/[^/]*$,,'
-          m4dirs=`sed -n -e "$sedexpr1" aclocal.m4 | sed -e "$sedexpr2" -e 
"$sedexpr3" | LC_ALL=C sort | LC_ALL=C uniq`
+          m4dirs=`sed -n -e "$sedexpr1" aclocal.m4 | sed -e "$sedexpr2" -e 
"$sedexpr3" | LC_ALL=C sort -u`
           m4dirs_count=`echo "$m4dirs" | wc -l`
         fi
       fi
        * gnulib-tool (func_modules_transitive_closure): Rewrite to not
        check already-checked module names again.

Index: gnulib-tool
===================================================================
RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v
retrieving revision 1.160
diff -u -r1.160 gnulib-tool
--- gnulib-tool 13 Sep 2006 15:00:47 -0000      1.160
+++ gnulib-tool 14 Sep 2006 22:52:08 -0000
@@ -928,45 +929,41 @@
 # - modules         list of modules, including dependencies
 func_modules_transitive_closure ()
 {
-  while true; do
-    xmodules=
-    for module in $modules; do
+  oldmodules=           # modules (valid or not) that have been checked
+  newmodules=$modules   # modules we still have to check
+  modules=              # accepted modules
+  while test -n "$newmodules"; do
+    oldmodules=`for m in $oldmodules $newmodules; do echo $m; done | LC_ALL=C 
sort -u`
+    for module in : $newmodules; do
+      if test "$module" = :; then
+        newmodules=
+        continue
+      fi
       func_verify_module
       if test -n "$module"; then
-        # Duplicate dependencies are harmless, but Jim wants a warning.
-        duplicated_deps=`func_get_dependencies $module | LC_ALL=C sort | 
LC_ALL=C uniq -d`
-        if test -n "$duplicated_deps"; then
-          echo "warning: module $module has duplicated dependencies: "`echo 
$duplicated_deps` 1>&2
-        fi
         if func_acceptable $module; then
-          xmodules="$xmodules $module"
-          for depmodule in `func_get_dependencies $module`; do
-            if func_acceptable $depmodule; then
-              xmodules="$xmodules $depmodule"
-            fi
-          done
+          modules="$modules $module"
+          deps=`func_get_dependencies $module`
+          # Duplicate dependencies are harmless, but Jim wants a warning.
+          duplicated_deps=`echo "$deps" | LC_ALL=C sort | LC_ALL=C uniq -d`
+          if test -n "$duplicated_deps"; then
+            echo "warning: module $module has duplicated dependencies: "`echo 
$duplicated_deps` 1>&2
+          fi
+          newmodules="$newmodules $deps"
           if test -n "$inctests"; then
             testsmodule=`func_get_tests_module $module`
             if test -n "$testsmodule"; then
-              if func_acceptable $testsmodule; then
-                xmodules="$xmodules $testsmodule"
-                for depmodule in `func_get_dependencies $testsmodule`; do
-                  if func_acceptable $depmodule; then
-                    xmodules="$xmodules $depmodule"
-                  fi
-                done
-              fi
+              newmodules="$newmodules $testsmodule"
             fi
           fi
         fi
       fi
     done
-    xmodules=`for m in $xmodules; do echo $m; done | LC_ALL=C sort | LC_ALL=C 
uniq`
-    if test "$xmodules" = "$modules"; then
-      break
-    fi
-    modules="$xmodules"
+    for m in $newmodules; do echo $m; done | LC_ALL=C sort -u 
>"$tmp"/new-modules
+    newmodules=`echo "$oldmodules" | LC_ALL=C join -v 2 - "$tmp"/new-modules`
   done
+  modules=`for m in $modules; do echo $m; done | LC_ALL=C sort -u`
+  rm -f "$tmp"/new-modules
 }
 
 # func_modules_add_dummy
        * gnulib-tool (func_import): Rewrite all old_files at once when
        appropriate; likewise for new_files, and the input to
        func_add_or_update.

Index: gnulib-tool
===================================================================
RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v
retrieving revision 1.160
diff -u -r1.160 gnulib-tool
--- gnulib-tool 13 Sep 2006 15:00:47 -0000      1.160
+++ gnulib-tool 14 Sep 2006 23:00:53 -0000
@@ -1546,28 +1549,28 @@
 
   # Copy files or make symbolic links. Remove obsolete files.
   delimiter='  '
+  sed_rewrite_old_files="
+    s,^build-aux/,$auxdir/,
+    s,^doc/,$cached_docbase/,
+    s,^lib/,$cached_sourcebase/,
+    s,^m4/,$cached_m4base/,
+    s,^tests/,$cached_testsbase/,"
+  sed_rewrite_new_files="
+    s,^build-aux/,$auxdir/,
+    s,^doc/,$docbase/,
+    s,^lib/,$sourcebase/,
+    s,^m4/,$m4base/,
+    s,^tests/,$testsbase/,"
   for f in $old_files; do
-    case "$f" in
-      build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;;
-      doc/*) g=`echo "$f" | sed -e "s,^doc/,$cached_docbase/,"` ;;
-      lib/*) g=`echo "$f" | sed -e "s,^lib/,$cached_sourcebase/,"` ;;
-      m4/*) g=`echo "$f" | sed -e "s,^m4/,$cached_m4base/,"` ;;
-      tests/*) g=`echo "$f" | sed -e "s,^tests/,$cached_testsbase/,"` ;;
-      *) g="$f" ;;
-    esac
-    echo "$g""$delimiter""$f"
-  done | LC_ALL=C sort > "$tmp"/old-files
+    echo $f
+  done |
+    sed -e "s,.*,&$delimiter&," -e "$sed_rewrite_old_files" |
+    LC_ALL=C sort > "$tmp"/old-files
   for f in $new_files; do
-    case "$f" in
-      build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;;
-      doc/*) g=`echo "$f" | sed -e "s,^doc/,$docbase/,"` ;;
-      lib/*) g=`echo "$f" | sed -e "s,^lib/,$sourcebase/,"` ;;
-      m4/*) g=`echo "$f" | sed -e "s,^m4/,$m4base/,"` ;;
-      tests/*) g=`echo "$f" | sed -e "s,^tests/,$testsbase/,"` ;;
-      *) g="$f" ;;
-    esac
-    echo "$g""$delimiter""$f"
-  done | LC_ALL=C sort > "$tmp"/new-files
+    echo $f
+  done |
+    sed -e "s,.*,&$delimiter&," -e "$sed_rewrite_new_files" |
+    LC_ALL=C sort > "$tmp"/new-files
   # First the files that are in old-files, but not in new-files:
   sed_take_first_column='s,'"$delimiter"'.*,,'
   for g in `LC_ALL=C join -t"$delimiter" -v1 "$tmp"/old-files "$tmp"/new-files 
| sed -e "$sed_take_first_column"`; do
@@ -1644,30 +1647,20 @@
   # Then the files that are in new-files, but not in old-files:
   sed_take_last_column='s,^.*'"$delimiter"',,'
   already_present=
-  for f in `LC_ALL=C join -t"$delimiter" -v2 "$tmp"/old-files "$tmp"/new-files 
| sed -e "$sed_take_last_column"`; do
-    case "$f" in
-      build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;;
-      doc/*) g=`echo "$f" | sed -e "s,^doc/,$docbase/,"` ;;
-      lib/*) g=`echo "$f" | sed -e "s,^lib/,$sourcebase/,"` ;;
-      m4/*) g=`echo "$f" | sed -e "s,^m4/,$m4base/,"` ;;
-      tests/*) g=`echo "$f" | sed -e "s,^tests/,$testsbase/,"` ;;
-      *) g="$f" ;;
-    esac
-    func_add_or_update
-  done
+  LC_ALL=C join -t"$delimiter" -v2 "$tmp"/old-files "$tmp"/new-files |
+    sed -e "$sed_take_last_column" |
+    sed -e "s,.*,& &," -e "$sed_rewrite_new_files" |
+    while read g f; do
+      func_add_or_update
+    done
   # Then the files that are in new-files and in old-files:
   already_present=true
-  for f in `LC_ALL=C join -t"$delimiter" "$tmp"/old-files "$tmp"/new-files | 
sed -e "$sed_take_last_column"`; do
-    case "$f" in
-      build-aux/*) g=`echo "$f" | sed -e "s,^build-aux/,$auxdir/,"` ;;
-      doc/*) g=`echo "$f" | sed -e "s,^doc/,$docbase/,"` ;;
-      lib/*) g=`echo "$f" | sed -e "s,^lib/,$sourcebase/,"` ;;
-      m4/*) g=`echo "$f" | sed -e "s,^m4/,$m4base/,"` ;;
-      tests/*) g=`echo "$f" | sed -e "s,^tests/,$testsbase/,"` ;;
-      *) g="$f" ;;
-    esac
-    func_add_or_update
-  done
+  LC_ALL=C join -t"$delimiter" "$tmp"/old-files "$tmp"/new-files |
+    sed -e "$sed_take_last_column" |
+    sed -e "s,.*,& &," -e "$sed_rewrite_new_files" |
+    while read g f; do
+      func_add_or_update
+    done
 
   # Command-line invocation printed in a comment in generated gnulib-cache.m4.
   actioncmd="gnulib-tool --import"
        * gnulib-tool (nl): New variable.
        (func_import): Rewrite include directive extraction to only
        read each directive once.

Index: gnulib-tool
===================================================================
RCS file: /cvsroot/gnulib/gnulib/gnulib-tool,v
retrieving revision 1.160
diff -u -r1.160 gnulib-tool
--- gnulib-tool 13 Sep 2006 15:00:47 -0000      1.160
+++ gnulib-tool 14 Sep 2006 22:57:12 -0000
@@ -25,6 +25,8 @@
 cvsdatestamp='$Date: 2006/09/13 15:00:47 $'
 last_checkin_date=`echo "$cvsdatestamp" | sed -e 's,^\$[D]ate: ,,'`
 version=`echo "$last_checkin_date" | sed -e 's/ .*$//' -e 's,/,-,g'`
+nl='
+'
 
 # You can set AUTOCONFPATH to empty if autoconf 2.57 is already in your PATH.
 AUTOCONFPATH=
@@ -1949,29 +1944,30 @@
   echo
   echo "You may need to add #include directives for the following .h files."
   (
-   # First the #include <...> directives without #ifs, sorted for convenience.
+   # First the #include <...> directives without #ifs, sorted for convenience,
+   # then the #include "..." directives without #ifs, sorted for convenience,
+   # then the #include directives that are surrounded by #ifs. Not sorted.
+   : >"$tmp"/include-lt 
+   : >"$tmp"/include-q
+   : >"$tmp"/include-if
    for module in $modules; do
-     if func_get_include_directive "$module" | grep '^#if' >/dev/null; then
-       :
-     else
-       func_get_include_directive "$module" | grep -v 'include "'
-     fi
-   done | LC_ALL=C sort -u
-   # Then the #include "..." directives without #ifs, sorted for convenience.
-   for module in $modules; do
-     if func_get_include_directive "$module" | grep '^#if' >/dev/null; then
-       :
-     else
-       func_get_include_directive "$module" | grep 'include "'
-     fi
-   done | LC_ALL=C sort -u
-   # Then the #include directives that are surrounded by #ifs. Not sorted.
-   for module in $modules; do
-     if func_get_include_directive "$module" | grep '^#if' >/dev/null; then
-       func_get_include_directive "$module"
-     fi
+     include_directive=`func_get_include_directive "$module"`
+     case $nl$include_directive in
+     *$nl\#if*)
+       echo "$include_directive" >> "$tmp"/include-if
+     ;;
+     *)
+       echo "$include_directive" | grep -v 'include "' >>"$tmp"/include-lt
+       echo "$include_directive" | grep    'include "' >>"$tmp"/include-q
+     ;;
+     esac
    done
-  ) | sed -e '/^$/d;' -e 's/^/  /'
+   LC_ALL=C sort -u "$tmp"/include-lt
+   LC_ALL=C sort -u "$tmp"/include-q
+   cat "$tmp"/include-if
+  ) | sed -e '/^$/d' -e 's/^/  /'
+  rm -f "$tmp"/include-lt "$tmp"/include-q "$tmp"/include-if
+
   echo
   echo "Don't forget to"
   if test "$makefile_am" = Makefile.am; then

Reply via email to