oops, still not best. No need to stat every file twice, and the @files array
is entirely redundant now. Here's the last one for now (I promise):
--- dwww-index++.distrib 2007-12-07 13:04:29.000000000 -0500
+++ dwww-index++ 2007-12-07 17:32:35.000000000 -0500
@@ -45,19 +45,20 @@
# 'ps',
);
+my $stopfiles = qr(\.(css|dsl|gif|jpg|lfig|mp4|sasl)(\.gz)?$);
+
my $dwww_url = "/cgi-bin/dwww";
my $dwww_swish_conf = "/usr/share/dwww/swish++.conf";
my $dwww_swish_index = "/var/cache/dwww/dwww.swish++.index";
# Why index++ does not create temporary file by itself???
my $dwww_swish_index_tmp = "/var/cache/dwww/dwww.swish++.tmp.index";
my $dwww_swish_index_res = $dwww_swish_index_tmp;
-my @files = (); # list of files to index;
-our($opt_v, $opt_f); # set by getopt
+our($opt_v, $opt_f, $opt_l); # set by getopt
my $dwwwconf = &DwwwInitialize("/etc/dwww/dwww.conf");
&DwwwSetupDirs($dwwwconf);
-my @index_command = ('/usr/bin/index++', '--config-file',
"$dwww_swish_conf",
+my @index_command = ('/usr/bin/index++', '--no-recurse', '--config-file',
"$dwww_swish_conf",
'--index-file', "$dwww_swish_index_tmp");
if (! -x $index_command[0]) {
print STDERR "Can't find index++ command.\n";
@@ -65,7 +66,7 @@
exit(1);
}
$Getopt::Std::STANDARD_HELP_VERSION=1;
-&getopts('vf');
+&getopts('vfl');
my $do_index = $dwwwconf->{'DWWW_INDEX_DOCUMENTATION'};
if (!$opt_f and defined $do_index and lc($do_index) eq "no") {
@@ -77,7 +78,7 @@
my $m2h_merge = $dwwwconf->{'DWWW_MERGE_MAN2HTML_INDEX'};
my $m2h_idx_file = '/var/cache/man2html/man2html.swish++.index';
-if (defined $m2h_merge and lc($m2h_merge) eq "yes" and -r $m2h_idx_file) {
+if (!$opt_l and defined $m2h_merge and lc($m2h_merge) eq "yes" and -r
$m2h_idx_file) {
if (copy($m2h_idx_file, $dwww_swish_index_tmp)) {
$dwww_swish_index_res = $dwww_swish_index_tmp . '.new';
push(@index_command, '--incremental');
@@ -93,21 +94,34 @@
$ErrorProc = \&ErrorHandle;
print STDERR "Parsing doc-base files\n" if $opt_v;
+my %filenames_hash = ( );
&FilesFromDocBaseDir("/usr/share/doc-base");
&FilesFromDocBaseDir("/var/lib/dwww/menu-method");
-print STDERR "Sorting list of files\n" if $opt_v;
[EMAIL PROTECTED] = sort @files;
+my ($last_d, $last_ino) = (-1, -1);
+if ($opt_l) {
+ LISTFILE:
+ foreach my $f (sort { $filenames_hash{$a}->[0] <=>
$filenames_hash{$b}->[0]
+ or $filenames_hash{$a}->[1] <=>
$filenames_hash{$b}->[1] } (keys %filenames_hash)) {
+ next LISTFILE if $filenames_hash{$f}->[0] == $last_d and
$filenames_hash{$f}->[1] == $last_ino;
+ syswrite STDOUT, "$f\n";
+ ($last_d, $last_ino) = ($filenames_hash{$f}->[0],
$filenames_hash{$f}->[1])
+ }
+ exit 0;
+}
print STDERR "Executing: @index_command\n" if $opt_v;
open (INDEX, '|-')
|| exec { $index_command[0] } @index_command;
-# try to avoid indexing the same file twice
-for (my $i = 0; $i <= $#files; $i++) {
- syswrite INDEX, "$files[$i]\n" unless ($i > 0 and $files[$i] eq
$files[$i - 1]);
+INDEXFILE:
+foreach my $f (sort { $filenames_hash{$a}->[0] <=> $filenames_hash{$b}->[0]
+ or $filenames_hash{$a}->[1] <=> $filenames_hash{$b}->[1] }
(keys %filenames_hash)) {
+ next INDEXFILE if $filenames_hash{$f}->[0] == $last_d and
$filenames_hash{$f}->[1] == $last_ino;
+ syswrite INDEX, "$f\n";
# sleep 150 ms
select(undef, undef, undef, 0.15);
+ ($last_d, $last_ino) = ($filenames_hash{$f}->[0],
$filenames_hash{$f}->[1])
}
close INDEX;
@@ -186,9 +200,22 @@
}
}
- if ($#globbed >= 0) {
- push(@files, @globbed);
- return;
+ while ($#globbed >= 0) {
+ my $d = shift @globbed;
+ my $refstat = [ stat $d ];
+ if (not -d _) {
+ $filenames_hash{$d} = $refstat unless $d =~
$stopfiles;
+ next;
+
+ }
+ if (not opendir DOCSUBDIR, $d) {
+ print STDERR "Can't open directory $d: $!\n"
if $opt_v, next;
+ }
+ while (my $f = readdir (DOCSUBDIR)) {
+ next if $f =~ /^\./;
+ push(@globbed, "$d/$f");
+ }
+ closedir DOCSUBDIR;
}
}
}
@@ -209,6 +236,7 @@
print STDOUT "Usage: $prog [-v] [-f] [-- swish_option [...]]\n";
print STDOUT " -v be more verbose\n";
print STDOUT " -f build the index even if it's disabled in the
configuration file\n";
+ print STDOUT " -l do not really index, only output the list of
files to index\n";
print STDOUT " -- opt option passed to swish's index++ program\n";
}
--
Ham is for reading, not for eating.
--
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]