Package: dwww
Version: 1.10.8
Severity: wishlist
Tags: patch

*** Please type your report below this line ***
Included find a patch that makes the following changes (for the better,
I hope) in dwww-index++:

+ eliminate duplicates by hashing, not by sorting
+ eliminate multiple symlinks from list of indexed files, by using stat
+ sort list of files by device:inode instead of name, to improve locality
+ added option -l to just print the list of files (for users who want to
  do their own incremental indexing)

-- System Information:
Debian Release: lenny/sid
  APT prefers testing
  APT policy: (500, 'testing')
Architecture: i386 (i686)

Kernel: Linux 2.6.22-6unicorn200712031135 (SMP w/1 CPU core)
Locale: LANG=C, LC_CTYPE=C (charmap=ANSI_X3.4-1968)
Shell: /bin/sh linked to /bin/dash

Versions of packages dwww depends on:
ii  apache [httpd-cgi]            1.3.34-4.1 versatile, high-performance HTTP s
ii  debconf [debconf-2.0]         1.5.17     Debian configuration management sy
ii  debianutils                   2.25.1     Miscellaneous utilities specific t
ii  doc-base                      0.8.6      utilities to manage online documen
ii  file                          4.21-3     Determines file type using "magic"
ii  libc6                         2.6.1-1+b1 GNU C Library: Shared libraries
ii  libfile-ncopy-perl            0.34-1     file copying like cp for perl
ii  libmime-types-perl            1.22-1     Perl extension for determining MIM
ii  man-db                        2.5.0-4    on-line manual pager
ii  menu                          2.1.36     generates programs menu for all me
ii  mime-support                  3.39-1     MIME files 'mime.types' & 'mailcap
ii  perl                          5.8.8-12   Larry Wall's Practical Extraction 

Versions of packages dwww recommends:
ii  apt                           0.7.6      Advanced front-end for dpkg
ii  dlocate                       0.5-0.3    fast alternative to dpkg -L and dp
ii  info2www                      1.2.2.9-23 Read info files with a WWW browser

-- debconf information excluded

--- dwww-index++.orig   2007-12-07 09:16:35.000000000 -0500
+++ dwww-index++        2007-12-07 10:16:58.000000000 -0500
@@ -52,7 +52,7 @@
 my $dwww_swish_index_tmp       = "/var/cache/dwww/dwww.swish++.tmp.index";
 my $dwww_swish_index_res       = $dwww_swish_index_tmp;
 my @files                      = ();           # list of files to index;
-our($opt_v, $opt_f);                           # set by getopt
+our($opt_v, $opt_f, $opt_l);                           # set by getopt
 
 my $dwwwconf                   = &DwwwInitialize("/etc/dwww/dwww.conf");
 &DwwwSetupDirs($dwwwconf);
@@ -65,7 +65,7 @@
        exit(1);
 }
 $Getopt::Std::STANDARD_HELP_VERSION=1;
-&getopts('vf');
+&getopts('vfl');
 
 my $do_index   =       $dwwwconf->{'DWWW_INDEX_DOCUMENTATION'};
 if (!$opt_f and defined $do_index and lc($do_index) eq "no") {
@@ -77,7 +77,7 @@
 
 my $m2h_merge = $dwwwconf->{'DWWW_MERGE_MAN2HTML_INDEX'};
 my $m2h_idx_file = '/var/cache/man2html/man2html.swish++.index';
-if (defined $m2h_merge and lc($m2h_merge) eq "yes" and -r $m2h_idx_file) {
+if (!$opt_l and defined $m2h_merge and lc($m2h_merge) eq "yes" and -r 
$m2h_idx_file) {
        if (copy($m2h_idx_file, $dwww_swish_index_tmp)) {
                $dwww_swish_index_res  = $dwww_swish_index_tmp . '.new';
                push(@index_command, '--incremental');
@@ -96,16 +96,28 @@
 &FilesFromDocBaseDir("/usr/share/doc-base");
 &FilesFromDocBaseDir("/var/lib/dwww/menu-method");
 
-print STDERR "Sorting list of files\n" if $opt_v;
[EMAIL PROTECTED] = sort @files;
+my %filenames_hash = ( );
+$filenames_hash{$_} = 1 foreach (@files);
+$filenames_hash{$_} = [ stat ] foreach (keys %filenames_hash);
+
+my %inodes_hash = ( );
+foreach my $k (keys %filenames_hash) {
+        $inodes_hash{"$filenames_hash{$k}->[0]:$filenames_hash{$k}->[1]"} = $k;
+}
+
+if ($opt_l) {
+        foreach my $ino (sort (keys %inodes_hash)) {
+                syswrite STDOUT, "$inodes_hash{$ino}\n";
+        }
+        exit 0;
+}
 
 print STDERR "Executing: @index_command\n" if $opt_v;
 open (INDEX, '|-')
        || exec { $index_command[0] } @index_command;
 
-# try to avoid indexing the same file twice
-for (my $i = 0; $i <= $#files; $i++) {
-       syswrite INDEX,  "$files[$i]\n" unless ($i > 0 and $files[$i] eq 
$files[$i - 1]);
+foreach my $ino (sort (keys %inodes_hash)) {
+        syswrite INDEX, "$inodes_hash{$ino}\n";
        # sleep 150 ms
         select(undef, undef, undef, 0.15);
 }
@@ -209,6 +221,7 @@
        print STDOUT "Usage: $prog [-v] [-f] [-- swish_option [...]]\n";
        print STDOUT "   -v     be more verbose\n";
        print STDOUT "   -f     build the index even if it's disabled in the 
configuration file\n";
+        print STDOUT "   -l     do not really index, only output the list of 
files to index\n";
        print STDOUT "   -- opt option passed to swish's index++ program\n";
 }      
 



-- 
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]

Reply via email to