Package: dwww
Version: 1.10.8
Severity: wishlist
Tags: patch
*** Please type your report below this line ***
Included find a patch that makes the following changes (for the better,
I hope) in dwww-index++:
+ eliminate duplicates by hashing, not by sorting
+ eliminate multiple symlinks from list of indexed files, by using stat
+ sort list of files by device:inode instead of name, to improve locality
+ added option -l to just print the list of files (for users who want to
do their own incremental indexing)
-- System Information:
Debian Release: lenny/sid
APT prefers testing
APT policy: (500, 'testing')
Architecture: i386 (i686)
Kernel: Linux 2.6.22-6unicorn200712031135 (SMP w/1 CPU core)
Locale: LANG=C, LC_CTYPE=C (charmap=ANSI_X3.4-1968)
Shell: /bin/sh linked to /bin/dash
Versions of packages dwww depends on:
ii apache [httpd-cgi] 1.3.34-4.1 versatile, high-performance HTTP s
ii debconf [debconf-2.0] 1.5.17 Debian configuration management sy
ii debianutils 2.25.1 Miscellaneous utilities specific t
ii doc-base 0.8.6 utilities to manage online documen
ii file 4.21-3 Determines file type using "magic"
ii libc6 2.6.1-1+b1 GNU C Library: Shared libraries
ii libfile-ncopy-perl 0.34-1 file copying like cp for perl
ii libmime-types-perl 1.22-1 Perl extension for determining MIM
ii man-db 2.5.0-4 on-line manual pager
ii menu 2.1.36 generates programs menu for all me
ii mime-support 3.39-1 MIME files 'mime.types' & 'mailcap
ii perl 5.8.8-12 Larry Wall's Practical Extraction
Versions of packages dwww recommends:
ii apt 0.7.6 Advanced front-end for dpkg
ii dlocate 0.5-0.3 fast alternative to dpkg -L and dp
ii info2www 1.2.2.9-23 Read info files with a WWW browser
-- debconf information excluded
--- dwww-index++.orig 2007-12-07 09:16:35.000000000 -0500
+++ dwww-index++ 2007-12-07 10:16:58.000000000 -0500
@@ -52,7 +52,7 @@
my $dwww_swish_index_tmp = "/var/cache/dwww/dwww.swish++.tmp.index";
my $dwww_swish_index_res = $dwww_swish_index_tmp;
my @files = (); # list of files to index;
-our($opt_v, $opt_f); # set by getopt
+our($opt_v, $opt_f, $opt_l); # set by getopt
my $dwwwconf = &DwwwInitialize("/etc/dwww/dwww.conf");
&DwwwSetupDirs($dwwwconf);
@@ -65,7 +65,7 @@
exit(1);
}
$Getopt::Std::STANDARD_HELP_VERSION=1;
-&getopts('vf');
+&getopts('vfl');
my $do_index = $dwwwconf->{'DWWW_INDEX_DOCUMENTATION'};
if (!$opt_f and defined $do_index and lc($do_index) eq "no") {
@@ -77,7 +77,7 @@
my $m2h_merge = $dwwwconf->{'DWWW_MERGE_MAN2HTML_INDEX'};
my $m2h_idx_file = '/var/cache/man2html/man2html.swish++.index';
-if (defined $m2h_merge and lc($m2h_merge) eq "yes" and -r $m2h_idx_file) {
+if (!$opt_l and defined $m2h_merge and lc($m2h_merge) eq "yes" and -r
$m2h_idx_file) {
if (copy($m2h_idx_file, $dwww_swish_index_tmp)) {
$dwww_swish_index_res = $dwww_swish_index_tmp . '.new';
push(@index_command, '--incremental');
@@ -96,16 +96,28 @@
&FilesFromDocBaseDir("/usr/share/doc-base");
&FilesFromDocBaseDir("/var/lib/dwww/menu-method");
-print STDERR "Sorting list of files\n" if $opt_v;
[EMAIL PROTECTED] = sort @files;
+my %filenames_hash = ( );
+$filenames_hash{$_} = 1 foreach (@files);
+$filenames_hash{$_} = [ stat ] foreach (keys %filenames_hash);
+
+my %inodes_hash = ( );
+foreach my $k (keys %filenames_hash) {
+ $inodes_hash{"$filenames_hash{$k}->[0]:$filenames_hash{$k}->[1]"} = $k;
+}
+
+if ($opt_l) {
+ foreach my $ino (sort (keys %inodes_hash)) {
+ syswrite STDOUT, "$inodes_hash{$ino}\n";
+ }
+ exit 0;
+}
print STDERR "Executing: @index_command\n" if $opt_v;
open (INDEX, '|-')
|| exec { $index_command[0] } @index_command;
-# try to avoid indexing the same file twice
-for (my $i = 0; $i <= $#files; $i++) {
- syswrite INDEX, "$files[$i]\n" unless ($i > 0 and $files[$i] eq
$files[$i - 1]);
+foreach my $ino (sort (keys %inodes_hash)) {
+ syswrite INDEX, "$inodes_hash{$ino}\n";
# sleep 150 ms
select(undef, undef, undef, 0.15);
}
@@ -209,6 +221,7 @@
print STDOUT "Usage: $prog [-v] [-f] [-- swish_option [...]]\n";
print STDOUT " -v be more verbose\n";
print STDOUT " -f build the index even if it's disabled in the
configuration file\n";
+ print STDOUT " -l do not really index, only output the list of
files to index\n";
print STDOUT " -- opt option passed to swish's index++ program\n";
}
--
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]