Ian> Well, here's a patch (still against the original) that really does
Ian> that, sorting device/inode numbers numerically, not
Ian> lexicographically.  This makes the sort a bit slower i guess but it
Ian> doesn't matter here (the standard sort speed could be recovered by
Ian> zero-padding the numbers, but that really isn't worth it).

Thrid time is the charm: now there's no need for the intermediate step
of constructing a hash of all the filenames.

--- dwww-index++.distrib        2007-12-07 13:04:29.000000000 -0500
+++ dwww-index++        2007-12-07 13:05:17.000000000 -0500
@@ -52,7 +52,7 @@
 my $dwww_swish_index_tmp       = "/var/cache/dwww/dwww.swish++.tmp.index";
 my $dwww_swish_index_res       = $dwww_swish_index_tmp;
 my @files                      = ();           # list of files to index;
-our($opt_v, $opt_f);                           # set by getopt
+our($opt_v, $opt_f, $opt_l);                           # set by getopt
 
 my $dwwwconf                   = &DwwwInitialize("/etc/dwww/dwww.conf");
 &DwwwSetupDirs($dwwwconf);
@@ -65,7 +65,7 @@
        exit(1);
 }
 $Getopt::Std::STANDARD_HELP_VERSION=1;
-&getopts('vf');
+&getopts('vfl');
 
 my $do_index   =       $dwwwconf->{'DWWW_INDEX_DOCUMENTATION'};
 if (!$opt_f and defined $do_index and lc($do_index) eq "no") {
@@ -77,7 +77,7 @@
 
 my $m2h_merge = $dwwwconf->{'DWWW_MERGE_MAN2HTML_INDEX'};
 my $m2h_idx_file = '/var/cache/man2html/man2html.swish++.index';
-if (defined $m2h_merge and lc($m2h_merge) eq "yes" and -r $m2h_idx_file) {
+if (!$opt_l and defined $m2h_merge and lc($m2h_merge) eq "yes" and -r 
$m2h_idx_file) {
        if (copy($m2h_idx_file, $dwww_swish_index_tmp)) {
                $dwww_swish_index_res  = $dwww_swish_index_tmp . '.new';
                push(@index_command, '--incremental');
@@ -96,18 +96,33 @@
 &FilesFromDocBaseDir("/usr/share/doc-base");
 &FilesFromDocBaseDir("/var/lib/dwww/menu-method");
 
-print STDERR "Sorting list of files\n" if $opt_v;
[EMAIL PROTECTED] = sort @files;
+my %filenames_hash = ( );
+$filenames_hash{$_} = [ stat ] foreach (@files);
+
+my ($last_d, $last_ino) = (-1, -1);
+if ($opt_l) {
+  LISTFILE:
+        foreach my $f (sort { $filenames_hash{$a}->[0] <=> 
$filenames_hash{$b}->[0]
+                         or $filenames_hash{$a}->[1] <=> 
$filenames_hash{$b}->[1] } (keys %filenames_hash)) {
+                next LISTFILE if $filenames_hash{$f}->[0] == $last_d and 
$filenames_hash{$f}->[1] == $last_ino;
+                syswrite STDOUT, "$f\n";
+                ($last_d, $last_ino) = ($filenames_hash{$f}->[0], 
$filenames_hash{$f}->[1])
+        }
+        exit 0;
+}
 
 print STDERR "Executing: @index_command\n" if $opt_v;
 open (INDEX, '|-')
        || exec { $index_command[0] } @index_command;
 
-# try to avoid indexing the same file twice
-for (my $i = 0; $i <= $#files; $i++) {
-       syswrite INDEX,  "$files[$i]\n" unless ($i > 0 and $files[$i] eq 
$files[$i - 1]);
+INDEXFILE:
+foreach my $f (sort { $filenames_hash{$a}->[0] <=> $filenames_hash{$b}->[0]
+                 or $filenames_hash{$a}->[1] <=> $filenames_hash{$b}->[1] } 
(keys %filenames_hash)) {
+        next INDEXFILE if $filenames_hash{$f}->[0] == $last_d and 
$filenames_hash{$f}->[1] == $last_ino;
+        syswrite INDEX, "$f\n";
        # sleep 150 ms
         select(undef, undef, undef, 0.15);
+        ($last_d, $last_ino) = ($filenames_hash{$f}->[0], 
$filenames_hash{$f}->[1])
 }
 
 close INDEX;
@@ -209,6 +224,7 @@
        print STDOUT "Usage: $prog [-v] [-f] [-- swish_option [...]]\n";
        print STDOUT "   -v     be more verbose\n";
        print STDOUT "   -f     build the index even if it's disabled in the 
configuration file\n";
+        print STDOUT "   -l     do not really index, only output the list of 
files to index\n";
        print STDOUT "   -- opt option passed to swish's index++ program\n";
 }      
 



-- 
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]

Reply via email to