Package: libmail-mbox-messageparser-perl Version: 1.4002-1 Severity: grave Justification: causes non-serious data loss
Hello, my program mail-expire uses this module to split mbox files into individual messages. Sometimes, however, the end of file is reported too early and data is _lost_ because of that. I did not try to investigate the issue yet, test data is in: http://people.debian.org/~blade/debian-user-german.Apr_2006.bz2 and the current version of the script is attached, with debugging output enabled. If you look at that, it stops splitting the contents at <[EMAIL PROTECTED]> and returns the rest as one big message. Regards, Eduard. -- System Information: Debian Release: testing/unstable APT prefers unstable APT policy: (500, 'unstable'), (1, 'experimental') Architecture: amd64 (x86_64) Shell: /bin/sh linked to /bin/bash Kernel: Linux 2.6.16-1-amd64-k8 Locale: LANG=de_DE.UTF-8, LC_CTYPE=de_DE.UTF-8 (charmap=UTF-8) Versions of packages libmail-mbox-messageparser-perl depends on: ii libfilehandle-unget-perl 0.1621-2 a FileHandle which supports ungett ii perl 5.8.8-4 Larry Wall's Practical Extraction libmail-mbox-messageparser-perl recommends no packages. -- no debconf information --
#!/usr/bin/perl # SEE DEBIAN CHANGELOG FOR NEWER ENTRIES # mail-expire, Version 0.2; Fri, 16 Aug 2002 11:39:10 +0200 # Copyright: Eduard Bloch <[EMAIL PROTECTED]> # # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. The full text of GPL can be # found on http://www.gnu.org or in /usr/share/common-licenses/GPL on # modern Debian systems. # # ---------------------------------------------------------- # If you make changes to this script, please forward the new # version to <[EMAIL PROTECTED]> or <[EMAIL PROTECTED]> # ---------------------------------------------------------- # # REQUIRED PACKAGES: # # libcompress-zlib-perl - Perl module for creation of gzip files # libdate-calc-perl - Perl library for accessing dates # # Changes by Johannes Kolb: # * use Date::Calc instead of Date::Manip to increase performance # * no buffering of whole mailbox-files in memory # # Changes by Florian Krohs <[EMAIL PROTECTED]> # * append old mails to mailbox.month_year.gz # * added zlib to free some space:] # # Changes by Eduard Bloch <[EMAIL PROTECTED]> # * small hack to vary the output filename to prevent overwritting # * some cosmetics, fixed typos # * dropped silly size comparison, trust return values of syswrite use strict; my $target="./"; sub help { die "Usage: $0 [ options ] DAYS FILES where DAYS is an integer specifying the maximum age of a mail in days and FILES one or more mbox file(s). Options: -u choose different filenames if the target file already exists --delete drops the old messages. Be warned, no backup will be made! --dry-run Just print what it would do, no real action -t DIR new target directory DIR "; } use Getopt::Long qw(:config no_ignore_case bundling pass_through); my $uoption=0; my $deloption=0; my $dry_run=0; my $help; my %opts = ( "t=s", \$target, "delete", \$deloption, "dry-run", \$dry_run, "help|h", \$help, "u", \$uoption ); &help if !GetOptions(%opts); &help if $help; my $days=shift(@ARGV); die "Please specify a valid day count!\n" if abs($days)<1; die "Please specify mbox file names!\n" if ! @ARGV; for(@ARGV) { die "Unable to read $_\n" if not -r $_ }; use Date::Calc qw(Parse_Date Today Delta_Days); use Compress::Zlib ; use Fcntl; use Mail::Mbox::MessageParser; my $c=-1; my @today = Today(); my $old_all = localtime(time - $days * 86400); $old_all =~ s/\ +/\ /g; my @splitdate=split(/\ /,$old_all); my $olddate=$splitdate[1] . "_" . $splitdate[4] . ".gz"; JOB: foreach my $filename (@ARGV) { my @st; my @time; my $c; my $oldsize = (stat($filename))[7]; if ($oldsize == 0) { syswrite(STDOUT,"Empty file $filename, skipping."); next JOB; }; if(-e "$filename.new") { syswrite(STDOUT,"Temporary file $filename.new already exists, skipping $filename.\n"); next JOB; }; if(!open(fh,$filename)) { syswrite(STDOUT,"$filename could not be opened, skipping"); next JOB; }; if(flock(fh,2|4)){ # lock when not locked already by another process flock(fh,2) || die "unexpected trouble on locking $filename"; } else { # skip file close(fh); syswrite(STDOUT,"$filename is locked by an other prozess, skipping."); next JOB; }; my $file_handle = new FileHandle($filename); my $folder_reader = new Mail::Mbox::MessageParser( { 'file_name' => $filename, 'file_handle' => $file_handle, 'enable_cache' => 0 } ); # die "he? ".ref($folder_reader); #die ref($folder_reader); if (ref($folder_reader) ne "Mail::Mbox::MessageParser::Grep") { syswrite STDERR, "Unable to parse contents of $filename, skipping.\n"; next JOB; } my $gzfilename; if(!$dry_run) { sysopen(neu,"$filename.new", O_RDWR|O_EXCL|O_CREAT) || die "Error creating temporary file, move $filename.new out of the way"; $gzfilename="$target/$filename".".$olddate"; } while(-s $gzfilename && $uoption) { my $modnumber += 0; # to preset a value $gzfilename="$target/$filename.".$splitdate[1] . "($modnumber)_" . $splitdate[4] . ".gz";; $modnumber++; } my $gzfile_ist_neu=1 if(!-e $gzfilename); my $alt; if(!$deloption && !$dry_run) { $alt = gzopen($gzfilename, "ab") or die "cannot open file: $gzerrno\n"; } syswrite (STDOUT,"I: Reading and splitting $filename ($oldsize bytes)...\n"); syswrite(STDOUT, "I: Analyzing ages (days before expiration): "); my $alte=0; my $neue=0; while(!$folder_reader->end_of_file()) { my $email = $folder_reader->read_next_email(); my $isold; $$email=~/^From\s\S+\s+(.*)/m; my $date=$1; #syswrite (STDERR, "hm, $1\n"); if($1) { $c++; my @maildate = Parse_Date($date); @maildate = (1970,1,1) if scalar @maildate ==0; my $diff = Delta_Days(@maildate,@today); syswrite(STDERR, "Date, Diff, Contents:\n$date, $diff\n$$email\nEND_OF_MAIL\n"); if ($#maildate != 2) { # mail header broken $neue++; syswrite(STDOUT, "(new: date could not be parsed!), "); } else # mail okay { syswrite(STDOUT, $diff); if ($diff > $days) { $isold = 1; $alte++; syswrite(STDOUT, "(old), "); } else { $neue++; syswrite(STDOUT, "(new), "); } } } if ($isold) { if(!$deloption && !$dry_run) { $alt->gzwrite($$email) or die "error writing to gz buffer : $gzerrno\n"; } } else { defined(syswrite(neu, $$email)) || die "Failure while writting - disc full?"; } } $alt->gzclose if(!$deloption && !$dry_run); flock(fh, 8); close(fh); close(neu); #die "ohje"; if($alte==0 && $gzfile_ist_neu==1 && !$deloption && !$dry_run) { unlink($gzfilename)|| die "failed - removed gzip file [empty]\n"; } if( ($alte+$neue) < 1) { syswrite STDOUT, "No changes, ignoring file\n"; next JOB; } my $newsize = (stat($gzfilename))[7]; # no longer interessting, beautify it $gzfilename=~s!^\.//?!!; syswrite (STDOUT,"\n\nI: Wrote $neue new entries to $filename.new\n") if(!$deloption); syswrite (STDOUT,"\nI: Wrote $alte old entries to $gzfilename\n"); if(!$dry_run) { syswrite (STDOUT,"Deleting $filename... "); unlink($filename) || die "failed while deleting original mbox"; syswrite (STDOUT,"replacing with the new mailbox... "); rename("$filename.new", $filename) || die "failed"; syswrite (STDOUT,"done"); # syswrite (STDOUT," (saved $diff bytes)") if(!$deloption); syswrite (STDOUT,".\n"); if(-e "$filename.new"){unlink("$filename.new") || die "Could not remove temporary file... Odd things happen!";} } }