Package: etckeeper
Version: 0.43
Severity: wishlist
File: /etc/etckeeper/pre-commit.d/30store-metadata
Tags: patch

Using Darcs, etckeeper pre-commit was annoyingly slow.  Profiling
pre-commit.d, I found about 3s were spent running readlink(1) on each
link in /etc/, though there were only 590 links.

Fortunately, GNU find supports -printf, so readlink(1) isn't required.
Similarly, the stat(1) per chown/chmod/chgrp entry isn't required.

Finally, I wanted .etckeeper to ignore backup~ files.  I added -name
\*~ -o to $NOVCS, which required turning it into a wrapper function.
Ugly, but at least it's applied consistently to all find(1) calls
(cf. filter_ignore).  I threw in an -O2 for the hell of it.

Before:

    # time sh -c 'etckeeper pre-commit; etckeeper pre-commit; etckeeper 
pre-commit'

    real    0m10.402s
    user    0m1.200s
    sys     0m2.536s

After:

    # time sh -c 'etckeeper pre-commit; etckeeper pre-commit; etckeeper 
pre-commit'

    real    0m2.770s
    user    0m1.284s
    sys     0m1.500s

Feel free to WONTFIX this if you have a "no GNUisms" policy.

-- System Information:
Debian Release: squeeze/sid
  APT prefers unstable
  APT policy: (500, 'unstable'), (1, 'experimental')
Architecture: amd64 (x86_64)

Kernel: Linux 2.6.33-2-amd64 (SMP w/2 CPU cores)
Locale: LANG=en_AU.utf8, LC_CTYPE=en_AU.utf8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash

Versions of packages etckeeper depends on:
ii  bzr                          2.1.1-1     easy to use distributed version co
ii  darcs                        2.4+152     a distributed, interactive, smart 
ii  debconf [debconf-2.0]        1.5.30      Debian configuration management sy
ii  git-core                     1:1.7.0.4-1 fast, scalable, distributed revisi
ii  mercurial                    1.5.1-1     scalable distributed version contr

Versions of packages etckeeper recommends:
ii  cron                          3.0pl1-108 process scheduling daemon

etckeeper suggests no packages.

-- debconf information:
  etckeeper/commit_failed:
  etckeeper/purge: true
#!/bin/sh
set -e

my_find() {
	# We maintain the permissions on the directory containing VCS data
	# but we want find to ignore the VCS files themselves.
	#
	# (Note that when using this, the find expression must end with
	# -print or -exec, else the excluded directories will actually be
	# printed!)
	find -O2 . \
            -wholename ./.git -prune -o \
            -wholename ./.bzr -prune -o \
            -wholename ./.hg -prune -o \
            -wholename ./_darcs -prune -o \
            -name "*~" -o "$@"
    }

generate_metadata() {
	# This function generates the script commands to fix any files
	# that aren't owner=root, group=root, or mode=0644 or 0755.
	# The script is produced on stdout.  Errors go to stderr.
	# 
	# The script can use a 'maybe' function, which only runs a command
	# if the file in its last argument exists.

	# Keep the sort order the same at all times.
	LC_COLLATE=C
	export LC_COLLATE

	if [ "$VCS" = git ] || [ "$VCS" = hg ]; then
		# These version control systems do not track directories,
		# so empty directories must be stored specially.
		my_find -type d -empty -printf "mkdir -p '%p'\\n" | sort -t\' -k2
	fi

	if [ "$VCS" = darcs ]; then
		# This version control system does not track symlinks,
		# so they must be stored specially.
		#
		# FIXME: NO LONGER OMITS .darcsignore MATCHES.
		# Why was it done only for THIS find, anyway?
		my_find -type l -printf "ln -sf '%l' '%p'\\n" | sort -t\' -k4
	fi

	# Find all files and directories that don't have the current user as the owner
	my_find -nouser -fprintf /dev/stderr "Bad owner for maybe chown %u '%p'\\n" \
            -o \! -user "$(id -u)" -printf "maybe chown %u '%p'\\n" | sort
	# Find all files and directories that don't have root as the group
	my_find -nogroup -fprintf /dev/stderr "Bad group for maybe chgrp %g '%p'\\n" \
            -o \! -group "$(id -g)" -printf "maybe chgrp %g '%p'\\n" | sort

	# Find all directories that aren't 0755
	my_find -type d \! -perm 0755 -printf "maybe chmod %m '%p'\\n" | sort

	if [ "$VCS" = darcs ]; then
		# Find all files that aren't 0644 (darcs doesn't maintain
		# the executable bit).
		my_find -type f \! -perm 0644 -printf "maybe chmod %m '%p'\\n" | sort
	else
		# Find all files that aren't 0644 or 0755 (we can assume the VCS will
		# maintain the executable bit).
		my_find -type f \! -perm 0644 \! -perm 0755 -printf "maybe chmod %m '%p'\\n" | sort
	fi

	# We don't handle xattrs.
	# Maybe check for getfattr/setfattr and use them if they're available?
}

if [ "$VCS" = git ] || [ "$VCS" = hg ] || [ "$VCS" = bzr ] || [ "$VCS" = darcs ]; then
	if [ -f .metadata ]; then
		# remove obsolete .metadata file
		# git allows fully deleting it at this point, other VCS
		# may not (the repo is locked for hg).
		if [ "$VCS" = git ]; then
			$VCS rm .metadata
		else
			rm -f .metadata
		fi
	fi

	echo "# Generated by etckeeper.  Do not edit." > .etckeeper
	echo >> .etckeeper

	# Make sure the file is not readable by others, since it can leak
	# information about contents of non-readable directories in /etc.
	chmod 700 .etckeeper

	generate_metadata >> .etckeeper

	# stage the file as part of the current commit
	if [ "$VCS" = git ]; then
		# this will do nothing if the metadata file is unchanged.
		git add .etckeeper
	fi
	# hg, bzr and darcs add not done, they will automatically
	# include the file in the current commit
fi
--- old-etc/etckeeper/pre-commit.d/30store-metadata     2010-04-08 
08:26:52.323314988 +0000
+++ new-etc/etckeeper/pre-commit.d/30store-metadata     2010-04-08 
08:26:52.354720496 +0000
@@ -1,40 +1,20 @@
 #!/bin/sh
 set -e
 
-# Filters out UNKNOWN users and groups, prints a warning on stderr.
-filter_unknown() {
-       CMD=$1
-       while read line; do
-               # if the first n chars of $line equal "$CMD UNKNOWN "...
-               if [ "$(printf %.$((9+${#CMD}))s "$line")" = "$CMD UNKNOWN " ]; 
then
-                       echo Bad "$2" for "$line" >&2
-               else
-                       echo "$line"
-               fi
-       done
-}
-
-filter_ignore() {
-       if [ "$VCS" = darcs ]; then
-               ignorefile=.darcsignore
-       fi
-
-       if [ "$VCS" = darcs ] && [ -e "$ignorefile" ]; then
-               # Spaces embedded into patterns would break it.
-               # But really, why would anyone want to use ' ' instead of '\s' ?
-               #patterns=$( grep -v '^[[:space:]]*\(#\|$\)' "$ignorefile" | 
xargs -n 1 printf " -e %s" )
-               #grep -Ev $patterns
-               #unset patterns
-               # Alternative using a temp file
-               patternsfile="$( mktemp -t etckeeper-$VCS.XXXXXXXXXX )"
-               grep -v '^[[:space:]]*\(#\|$\)' "$ignorefile" > "$patternsfile" 
|| true
-               grep -Evf "$patternsfile"
-               rm -f "$patternsfile"
-               unset patternsfile
-       else
-               cat -
-       fi
-}
+my_find() {
+       # We maintain the permissions on the directory containing VCS data
+       # but we want find to ignore the VCS files themselves.
+       #
+       # (Note that when using this, the find expression must end with
+       # -print or -exec, else the excluded directories will actually be
+       # printed!)
+       find -O2 . \
+            -wholename ./.git -prune -o \
+            -wholename ./.bzr -prune -o \
+            -wholename ./.hg -prune -o \
+            -wholename ./_darcs -prune -o \
+            -name "*~" -o "$@"
+    }
 
 generate_metadata() {
        # This function generates the script commands to fix any files
@@ -44,14 +24,6 @@
        # The script can use a 'maybe' function, which only runs a command
        # if the file in its last argument exists.
 
-       # We maintain the permissions on the directory containing VCS data
-       # but we want find to ignore the VCS files themselves.
-       # 
-       # (Note that when using this, the find expression must end with 
-       # -print or -exec, else the excluded directories will actually be
-       # printed!)
-       NOVCS='. -wholename ./.git -prune -o -wholename ./.bzr -prune -o 
-wholename ./.hg -prune -o -wholename ./_darcs -prune -o'
-
        # Keep the sort order the same at all times.
        LC_COLLATE=C
        export LC_COLLATE
@@ -59,40 +31,36 @@
        if [ "$VCS" = git ] || [ "$VCS" = hg ]; then
                # These version control systems do not track directories,
                # so empty directories must be stored specially.
-               find $NOVCS -type d -empty -print |
-                       sort | sed -e "s/^/mkdir -p '/" -e "s/\$/'/"
+               my_find -type d -empty -printf "mkdir -p '%p'\\n" | sort -t\' 
-k2
        fi
 
        if [ "$VCS" = darcs ]; then
                # This version control system does not track symlinks,
                # so they must be stored specially.
-               find $NOVCS -type l -print | sort | filter_ignore | while read 
link; do
-                       dest=$( readlink "$link" )
-                       printf "ln -sf '%s' '%s'\n" "$dest" "$link"
-               done
+               #
+               # FIXME: NO LONGER OMITS .darcsignore MATCHES.
+               # Why was it done only for THIS find, anyway?
+               my_find -type l -printf "ln -sf '%l' '%p'\\n" | sort -t\' -k4
        fi
 
        # Find all files and directories that don't have the current user as 
the owner
-       find $NOVCS \! -user "$(id -u)" -exec stat --format="maybe chown %U 
'{}'" {} \; \
-               | sort | filter_unknown 'maybe chown' owner
+       my_find -nouser -fprintf /dev/stderr "Bad owner for maybe chown %u 
'%p'\\n" \
+            -o \! -user "$(id -u)" -printf "maybe chown %u '%p'\\n" | sort
        # Find all files and directories that don't have root as the group
-       find $NOVCS \! -group $(id -g) -exec stat --format="maybe chgrp %G 
'{}'" {} \; \
-               | sort | filter_unknown 'maybe chgrp' group
+       my_find -nogroup -fprintf /dev/stderr "Bad group for maybe chgrp %g 
'%p'\\n" \
+            -o \! -group "$(id -g)" -printf "maybe chgrp %g '%p'\\n" | sort
 
        # Find all directories that aren't 0755
-       find $NOVCS -type d \! -perm 0755 \
-               -exec stat --format="maybe chmod %a '{}'" {} \; | sort
+       my_find -type d \! -perm 0755 -printf "maybe chmod %m '%p'\\n" | sort
 
        if [ "$VCS" = darcs ]; then
                # Find all files that aren't 0644 (darcs doesn't maintain
                # the executable bit).
-               find $NOVCS -type f \! -perm 0644 \
-                       -exec stat --format="maybe chmod %a '{}'" {} \; | sort
+               my_find -type f \! -perm 0644 -printf "maybe chmod %m '%p'\\n" 
| sort
        else
                # Find all files that aren't 0644 or 0755 (we can assume the 
VCS will
                # maintain the executable bit).
-               find $NOVCS -type f \! -perm 0644 \! -perm 0755 \
-                       -exec stat --format="maybe chmod %a '{}'" {} \; | sort
+               my_find -type f \! -perm 0644 \! -perm 0755 -printf "maybe 
chmod %m '%p'\\n" | sort
        fi
 
        # We don't handle xattrs.

Reply via email to