Package: etckeeper Version: 0.43 Severity: wishlist File: /etc/etckeeper/pre-commit.d/30store-metadata Tags: patch
Using Darcs, etckeeper pre-commit was annoyingly slow. Profiling pre-commit.d, I found about 3s were spent running readlink(1) on each link in /etc/, though there were only 590 links. Fortunately, GNU find supports -printf, so readlink(1) isn't required. Similarly, the stat(1) per chown/chmod/chgrp entry isn't required. Finally, I wanted .etckeeper to ignore backup~ files. I added -name \*~ -o to $NOVCS, which required turning it into a wrapper function. Ugly, but at least it's applied consistently to all find(1) calls (cf. filter_ignore). I threw in an -O2 for the hell of it. Before: # time sh -c 'etckeeper pre-commit; etckeeper pre-commit; etckeeper pre-commit' real 0m10.402s user 0m1.200s sys 0m2.536s After: # time sh -c 'etckeeper pre-commit; etckeeper pre-commit; etckeeper pre-commit' real 0m2.770s user 0m1.284s sys 0m1.500s Feel free to WONTFIX this if you have a "no GNUisms" policy. -- System Information: Debian Release: squeeze/sid APT prefers unstable APT policy: (500, 'unstable'), (1, 'experimental') Architecture: amd64 (x86_64) Kernel: Linux 2.6.33-2-amd64 (SMP w/2 CPU cores) Locale: LANG=en_AU.utf8, LC_CTYPE=en_AU.utf8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Versions of packages etckeeper depends on: ii bzr 2.1.1-1 easy to use distributed version co ii darcs 2.4+152 a distributed, interactive, smart ii debconf [debconf-2.0] 1.5.30 Debian configuration management sy ii git-core 1:1.7.0.4-1 fast, scalable, distributed revisi ii mercurial 1.5.1-1 scalable distributed version contr Versions of packages etckeeper recommends: ii cron 3.0pl1-108 process scheduling daemon etckeeper suggests no packages. -- debconf information: etckeeper/commit_failed: etckeeper/purge: true
#!/bin/sh set -e my_find() { # We maintain the permissions on the directory containing VCS data # but we want find to ignore the VCS files themselves. # # (Note that when using this, the find expression must end with # -print or -exec, else the excluded directories will actually be # printed!) find -O2 . \ -wholename ./.git -prune -o \ -wholename ./.bzr -prune -o \ -wholename ./.hg -prune -o \ -wholename ./_darcs -prune -o \ -name "*~" -o "$@" } generate_metadata() { # This function generates the script commands to fix any files # that aren't owner=root, group=root, or mode=0644 or 0755. # The script is produced on stdout. Errors go to stderr. # # The script can use a 'maybe' function, which only runs a command # if the file in its last argument exists. # Keep the sort order the same at all times. LC_COLLATE=C export LC_COLLATE if [ "$VCS" = git ] || [ "$VCS" = hg ]; then # These version control systems do not track directories, # so empty directories must be stored specially. my_find -type d -empty -printf "mkdir -p '%p'\\n" | sort -t\' -k2 fi if [ "$VCS" = darcs ]; then # This version control system does not track symlinks, # so they must be stored specially. # # FIXME: NO LONGER OMITS .darcsignore MATCHES. # Why was it done only for THIS find, anyway? my_find -type l -printf "ln -sf '%l' '%p'\\n" | sort -t\' -k4 fi # Find all files and directories that don't have the current user as the owner my_find -nouser -fprintf /dev/stderr "Bad owner for maybe chown %u '%p'\\n" \ -o \! -user "$(id -u)" -printf "maybe chown %u '%p'\\n" | sort # Find all files and directories that don't have root as the group my_find -nogroup -fprintf /dev/stderr "Bad group for maybe chgrp %g '%p'\\n" \ -o \! -group "$(id -g)" -printf "maybe chgrp %g '%p'\\n" | sort # Find all directories that aren't 0755 my_find -type d \! -perm 0755 -printf "maybe chmod %m '%p'\\n" | sort if [ "$VCS" = darcs ]; then # Find all files that aren't 0644 (darcs doesn't maintain # the executable bit). my_find -type f \! -perm 0644 -printf "maybe chmod %m '%p'\\n" | sort else # Find all files that aren't 0644 or 0755 (we can assume the VCS will # maintain the executable bit). my_find -type f \! -perm 0644 \! -perm 0755 -printf "maybe chmod %m '%p'\\n" | sort fi # We don't handle xattrs. # Maybe check for getfattr/setfattr and use them if they're available? } if [ "$VCS" = git ] || [ "$VCS" = hg ] || [ "$VCS" = bzr ] || [ "$VCS" = darcs ]; then if [ -f .metadata ]; then # remove obsolete .metadata file # git allows fully deleting it at this point, other VCS # may not (the repo is locked for hg). if [ "$VCS" = git ]; then $VCS rm .metadata else rm -f .metadata fi fi echo "# Generated by etckeeper. Do not edit." > .etckeeper echo >> .etckeeper # Make sure the file is not readable by others, since it can leak # information about contents of non-readable directories in /etc. chmod 700 .etckeeper generate_metadata >> .etckeeper # stage the file as part of the current commit if [ "$VCS" = git ]; then # this will do nothing if the metadata file is unchanged. git add .etckeeper fi # hg, bzr and darcs add not done, they will automatically # include the file in the current commit fi
--- old-etc/etckeeper/pre-commit.d/30store-metadata 2010-04-08 08:26:52.323314988 +0000 +++ new-etc/etckeeper/pre-commit.d/30store-metadata 2010-04-08 08:26:52.354720496 +0000 @@ -1,40 +1,20 @@ #!/bin/sh set -e -# Filters out UNKNOWN users and groups, prints a warning on stderr. -filter_unknown() { - CMD=$1 - while read line; do - # if the first n chars of $line equal "$CMD UNKNOWN "... - if [ "$(printf %.$((9+${#CMD}))s "$line")" = "$CMD UNKNOWN " ]; then - echo Bad "$2" for "$line" >&2 - else - echo "$line" - fi - done -} - -filter_ignore() { - if [ "$VCS" = darcs ]; then - ignorefile=.darcsignore - fi - - if [ "$VCS" = darcs ] && [ -e "$ignorefile" ]; then - # Spaces embedded into patterns would break it. - # But really, why would anyone want to use ' ' instead of '\s' ? - #patterns=$( grep -v '^[[:space:]]*\(#\|$\)' "$ignorefile" | xargs -n 1 printf " -e %s" ) - #grep -Ev $patterns - #unset patterns - # Alternative using a temp file - patternsfile="$( mktemp -t etckeeper-$VCS.XXXXXXXXXX )" - grep -v '^[[:space:]]*\(#\|$\)' "$ignorefile" > "$patternsfile" || true - grep -Evf "$patternsfile" - rm -f "$patternsfile" - unset patternsfile - else - cat - - fi -} +my_find() { + # We maintain the permissions on the directory containing VCS data + # but we want find to ignore the VCS files themselves. + # + # (Note that when using this, the find expression must end with + # -print or -exec, else the excluded directories will actually be + # printed!) + find -O2 . \ + -wholename ./.git -prune -o \ + -wholename ./.bzr -prune -o \ + -wholename ./.hg -prune -o \ + -wholename ./_darcs -prune -o \ + -name "*~" -o "$@" + } generate_metadata() { # This function generates the script commands to fix any files @@ -44,14 +24,6 @@ # The script can use a 'maybe' function, which only runs a command # if the file in its last argument exists. - # We maintain the permissions on the directory containing VCS data - # but we want find to ignore the VCS files themselves. - # - # (Note that when using this, the find expression must end with - # -print or -exec, else the excluded directories will actually be - # printed!) - NOVCS='. -wholename ./.git -prune -o -wholename ./.bzr -prune -o -wholename ./.hg -prune -o -wholename ./_darcs -prune -o' - # Keep the sort order the same at all times. LC_COLLATE=C export LC_COLLATE @@ -59,40 +31,36 @@ if [ "$VCS" = git ] || [ "$VCS" = hg ]; then # These version control systems do not track directories, # so empty directories must be stored specially. - find $NOVCS -type d -empty -print | - sort | sed -e "s/^/mkdir -p '/" -e "s/\$/'/" + my_find -type d -empty -printf "mkdir -p '%p'\\n" | sort -t\' -k2 fi if [ "$VCS" = darcs ]; then # This version control system does not track symlinks, # so they must be stored specially. - find $NOVCS -type l -print | sort | filter_ignore | while read link; do - dest=$( readlink "$link" ) - printf "ln -sf '%s' '%s'\n" "$dest" "$link" - done + # + # FIXME: NO LONGER OMITS .darcsignore MATCHES. + # Why was it done only for THIS find, anyway? + my_find -type l -printf "ln -sf '%l' '%p'\\n" | sort -t\' -k4 fi # Find all files and directories that don't have the current user as the owner - find $NOVCS \! -user "$(id -u)" -exec stat --format="maybe chown %U '{}'" {} \; \ - | sort | filter_unknown 'maybe chown' owner + my_find -nouser -fprintf /dev/stderr "Bad owner for maybe chown %u '%p'\\n" \ + -o \! -user "$(id -u)" -printf "maybe chown %u '%p'\\n" | sort # Find all files and directories that don't have root as the group - find $NOVCS \! -group $(id -g) -exec stat --format="maybe chgrp %G '{}'" {} \; \ - | sort | filter_unknown 'maybe chgrp' group + my_find -nogroup -fprintf /dev/stderr "Bad group for maybe chgrp %g '%p'\\n" \ + -o \! -group "$(id -g)" -printf "maybe chgrp %g '%p'\\n" | sort # Find all directories that aren't 0755 - find $NOVCS -type d \! -perm 0755 \ - -exec stat --format="maybe chmod %a '{}'" {} \; | sort + my_find -type d \! -perm 0755 -printf "maybe chmod %m '%p'\\n" | sort if [ "$VCS" = darcs ]; then # Find all files that aren't 0644 (darcs doesn't maintain # the executable bit). - find $NOVCS -type f \! -perm 0644 \ - -exec stat --format="maybe chmod %a '{}'" {} \; | sort + my_find -type f \! -perm 0644 -printf "maybe chmod %m '%p'\\n" | sort else # Find all files that aren't 0644 or 0755 (we can assume the VCS will # maintain the executable bit). - find $NOVCS -type f \! -perm 0644 \! -perm 0755 \ - -exec stat --format="maybe chmod %a '{}'" {} \; | sort + my_find -type f \! -perm 0644 \! -perm 0755 -printf "maybe chmod %m '%p'\\n" | sort fi # We don't handle xattrs.