Hello Ralf, 20% is a good speedup that you achieve here by use of shell built-ins instead of subprocesses and sed invocations.
> * gnulib-tool (func_strip): New function. You only ever need to strip the prefix _or_ the suffix, never both simultaneously. Therefore it will be faster to define 2 functions. > +# func_strip name prefix suffix This function is insufficiently documented: What restrictions apply to 'name', 'prefix', 'suffix'? > +if ( eval 'foo=bar; test "${foo%r}" = ba' ) >/dev/null 2>&1; then > + func_strip () > + { > + stripped=$1 > + stripped=${stripped#$2} > + stripped=${stripped%$3} > + } I don't much like functions which assign to a particular variable: it makes it hard to remember how to use the function. > @@ -1495,11 +1516,15 @@ func_get_automake_snippet () > | sed -n -e "$sed_extract_mentioned_files" | sed -e 's/#.*//'` > func_get_filelist "$mymodule" > all_files=$module_files > - lib_files=`for f in $all_files; do \ > - case $f in \ > - lib/*) echo $f ;; \ > - esac; \ > - done | sed -e 's,^lib/,,'` > + lib_files= > + for f in $all_files; do > + case $f in > + lib/*) > + func_strip "$f" lib/ '' > + func_append lib_files "$nl$stripped" > + ;; > + esac > + done > # Remove $already_mentioned_files from $lib_files. > echo "$lib_files" | LC_ALL=C sort -u > "$tmp"/lib-files > extra_files=`func_reset_sigpipe; \ This introduces an extra blank line in "$tmp"/lib-files. It doesn't matter in the end, but nevertheless. > @@ -1524,36 +1549,47 @@ func_get_automake_snippet () > case "$mymodule" in > relocatable-prog-wrapper) ;; > *) > - sed_extract_c_files='/\.c$/p' > - extra_files=`echo "$extra_files" | sed -n -e > "$sed_extract_c_files"` > - if test -n "$extra_files"; then > - set x $extra_files > + extra_c_files= > + for f in $extra_files; do > + case $f in > + *.c) > + func_append extra_c_files " $f" ;; > + esac > + done > + if test -n "$extra_c_files"; then > + set x $extra_c_files > shift This hunk is not well indented. Also, I don't find this code easier to understand: it uses string manipulations instead of a filtering 'sed' command, and consumes more lines of code. Despite the general objections against string processing in bash that I listed in the other mail, I'm applying this modified patch. It reduces the total complexity of the code to an amount that will hopefully increase maintainability instead of reducing it. The speedup I saw (on the m4 1.4 branch, like you did): before: real 0m32.802s user 0m11.053s sys 0m21.693s real 0m33.114s user 0m11.149s sys 0m21.773s real 0m32.823s user 0m11.145s sys 0m21.657s after: real 0m29.970s user 0m10.469s sys 0m19.457s real 0m29.715s user 0m10.573s sys 0m18.941s 2009-01-01 Ralf Wildenhues <ralf.wildenh...@gmx.de> Bruno Haible <br...@clisp.org> Speed up gnulib-tool by doing more string processing through shell built-ins. * gnulib-tool (fast_func_append): New variable. (func_remove_prefix, func_remove_suffix): New functions. (fast_func_remove_prefix, fast_func_remove_suffix): New variables. (func_filter_filelist): New function. (func_get_dependencies): Use func_remove_suffix instead of sed. (func_get_automake_snippet): Use func_filter_filelist instead of a subshell and sed invocation. --- gnulib-tool.orig 2009-01-02 02:19:59.000000000 +0100 +++ gnulib-tool 2009-01-02 01:32:56.000000000 +0100 @@ -398,11 +398,57 @@ { eval "$1+=\"\$2\"" } + fast_func_append=true else func_append () { eval "$1=\"\$$1\$2\"" } + fast_func_append=false +fi + +# func_remove_prefix var prefix +# removes the given prefix from the value of the shell variable var. +# var should be the name of a shell variable. +# Its value should not contain a newline and not start or end with whitespace. +# prefix should not contain the characters "$`\{}|. +if ( foo=bar; eval 'test "${foo#b}" = ar' ) >/dev/null 2>&1; then + func_remove_prefix () + { + eval "$1=\${$1#\$2}" + } + fast_func_remove_prefix=true +else + func_remove_prefix () + { + eval "value=\"\$$1\"" + prefix="$2" + value=`echo "$value" | sed -e "s|^${prefix}||"` + eval "$1=\"\$value\"" + } + fast_func_remove_prefix=false +fi + +# func_remove_suffix var suffix +# removes the given suffix from the value of the shell variable var. +# var should be the name of a shell variable. +# Its value should not contain a newline and not start or end with whitespace. +# suffix should not contain the characters "$`\{}|. +if ( foo=bar; eval 'test "${foo%r}" = ba' ) >/dev/null 2>&1; then + func_remove_suffix () + { + eval "$1=\${$1%\$2}" + } + fast_func_remove_suffix=true +else + func_remove_suffix () + { + eval "value=\"\$$1\"" + suffix="$2" + value=`echo "$value" | sed -e "s|${suffix}\$||"` + eval "$1=\"\$value\"" + } + fast_func_remove_suffix=false fi # func_fatal_error message @@ -1328,13 +1374,63 @@ esac } +# func_filter_filelist outputvar separator filelist prefix suffix removed_prefix removed_suffix [added_prefix [added_suffix]] +# stores in outputvar the filtered and processed filelist. Filtering: Only the +# elements starting with prefix and ending with suffix are considered. +# Processing: removed_prefix and removed_suffix are removed from each element, +# added_prefix and added_suffix are added to each element. +# removed_prefix, removed_suffix should not contain the characters "$`\{}|. +# added_prefix, added_suffix should not contain the characters \|. +func_filter_filelist () +{ + if test "$2" != "$nl" \ + || { $fast_func_append \ + && { test -z "$6" || $fast_func_remove_prefix; } \ + && { test -z "$7" || $fast_func_remove_suffix; }; \ + }; then + ffflist= + for fff in $3; do + case "$fff" in + "$4"*"$5") + if test -n "$6"; then + func_remove_prefix fff "$6" + fi + if test -n "$7"; then + func_remove_suffix fff "$7" + fi + fff="$8${fff}$9" + if test -z "$ffflist"; then + ffflist="${fff}" + else + func_append ffflist "$2${fff}" + fi + ;; + esac + done + else + sed_fff_filter="s|^$6\(.*\)$7\\$|$8\\1$9|" + ffflist=`for fff in $3; do + case "$fff" in + "$4"*"$5") echo "$fff" ;; + esac + done | sed -e "$sed_fff_filter"` + fi + eval "$1=\"\$ffflist\"" +} + # func_get_dependencies module # Input: # - local_gnulib_dir from --local-dir func_get_dependencies () { # ${module}-tests always implicitly depends on ${module}. - echo "$1" | sed -n -e 's/-tests$//p' + case "$1" in + *-tests) + fgd1="$1" + func_remove_suffix fgd1 '-tests' + echo "$fgd1" + ;; + esac # Then the explicit dependencies listed in the module description. func_lookup_file "modules/$1" sed -n -e "/^Depends-on$sed_extract_prog" < "$lookedup_file" @@ -1370,11 +1466,7 @@ # *-tests module live in tests/, not lib/. # Synthesize an EXTRA_DIST augmentation. all_files=`func_get_filelist $1` - tests_files=`for f in $all_files; do \ - case $f in \ - tests/*) echo $f ;; \ - esac; \ - done | sed -e 's,^tests/,,'` + func_filter_filelist tests_files " " "$all_files" 'tests/' '' 'tests/' '' extra_files="$tests_files" if test -n "$extra_files"; then echo "EXTRA_DIST +=" $extra_files @@ -1396,11 +1488,7 @@ | sed -e "$sed_combine_lines" \ | sed -n -e "$sed_extract_mentioned_files" | sed -e 's/#.*//'` all_files=`func_get_filelist $1` - lib_files=`for f in $all_files; do \ - case $f in \ - lib/*) echo $f ;; \ - esac; \ - done | sed -e 's,^lib/,,'` + func_filter_filelist lib_files "$nl" "$all_files" 'lib/' '' 'lib/' '' # Remove $already_mentioned_files from $lib_files. echo "$lib_files" | LC_ALL=C sort -u > "$tmp"/lib-files extra_files=`func_reset_sigpipe; \ @@ -1424,8 +1512,7 @@ case "$1" in relocatable-prog-wrapper) ;; *) - sed_extract_c_files='/\.c$/p' - extra_files=`echo "$extra_files" | sed -n -e "$sed_extract_c_files"` + func_filter_filelist extra_files "$nl" "$extra_files" '' '.c' '' '' if test -n "$extra_files"; then echo "EXTRA_lib_SOURCES +=" $extra_files echo @@ -1433,22 +1520,14 @@ ;; esac # Synthesize an EXTRA_DIST augmentation also for the files in build-aux/. - buildaux_files=`for f in $all_files; do \ - case $f in \ - build-aux/*) echo $f ;; \ - esac; \ - done | sed -e 's,^build-aux/,,'` + func_filter_filelist buildaux_files "$nl" "$all_files" 'build-aux/' '' 'build-aux/' '' if test -n "$buildaux_files"; then sed_prepend_auxdir='s,^,$(top_srcdir)/'"$auxdir"'/,' echo "EXTRA_DIST += "`echo "$buildaux_files" | sed -e "$sed_prepend_auxdir"` echo fi # Synthesize an EXTRA_DIST augmentation also for the files from top/. - top_files=`for f in $all_files; do \ - case $f in \ - top/*) echo $f ;; \ - esac; \ - done | sed -e 's,^top/,,'` + func_filter_filelist top_files "$nl" "$all_files" 'top/' '' 'top/' '' if test -n "$top_files"; then sed_prepend_topdir='s,^,$(top_srcdir)/,' echo "EXTRA_DIST += "`echo "$top_files" | sed -e "$sed_prepend_topdir"`