--- Begin Message ---
Package: libmail-mbox-messageparser-perl
Version: 1.4002-1
Severity: grave
Justification: causes non-serious data loss
Hello,
my program mail-expire uses this module to split mbox files into
individual messages. Sometimes, however, the end of file is reported too
early and data is _lost_ because of that. I did not try to investigate
the issue yet, test data is in:
http://people.debian.org/~blade/debian-user-german.Apr_2006.bz2
and the current version of the script is attached, with debugging output
enabled. If you look at that, it stops splitting the contents at <[EMAIL
PROTECTED]> and returns the rest as one big message.
Regards,
Eduard.
-- System Information:
Debian Release: testing/unstable
APT prefers unstable
APT policy: (500, 'unstable'), (1, 'experimental')
Architecture: amd64 (x86_64)
Shell: /bin/sh linked to /bin/bash
Kernel: Linux 2.6.16-1-amd64-k8
Locale: LANG=de_DE.UTF-8, LC_CTYPE=de_DE.UTF-8 (charmap=UTF-8)
Versions of packages libmail-mbox-messageparser-perl depends on:
ii libfilehandle-unget-perl 0.1621-2 a FileHandle which supports ungett
ii perl 5.8.8-4 Larry Wall's Practical Extraction
libmail-mbox-messageparser-perl recommends no packages.
-- no debconf information
--
#!/usr/bin/perl
# SEE DEBIAN CHANGELOG FOR NEWER ENTRIES
# mail-expire, Version 0.2; Fri, 16 Aug 2002 11:39:10 +0200
# Copyright: Eduard Bloch <[EMAIL PROTECTED]>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details. The full text of GPL can be
# found on http://www.gnu.org or in /usr/share/common-licenses/GPL on
# modern Debian systems.
#
# ----------------------------------------------------------
# If you make changes to this script, please forward the new
# version to <[EMAIL PROTECTED]> or <[EMAIL PROTECTED]>
# ----------------------------------------------------------
#
# REQUIRED PACKAGES:
#
# libcompress-zlib-perl - Perl module for creation of gzip files
# libdate-calc-perl - Perl library for accessing dates
#
# Changes by Johannes Kolb:
# * use Date::Calc instead of Date::Manip to increase performance
# * no buffering of whole mailbox-files in memory
#
# Changes by Florian Krohs <[EMAIL PROTECTED]>
# * append old mails to mailbox.month_year.gz
# * added zlib to free some space:]
#
# Changes by Eduard Bloch <[EMAIL PROTECTED]>
# * small hack to vary the output filename to prevent overwritting
# * some cosmetics, fixed typos
# * dropped silly size comparison, trust return values of syswrite
use strict;
my $target="./";
sub help {
die "Usage: $0 [ options ] DAYS FILES
where
DAYS is an integer specifying the maximum age of a mail in days and
FILES one or more mbox file(s).
Options:
-u choose different filenames if the target file already exists
--delete drops the old messages. Be warned, no backup will be made!
--dry-run Just print what it would do, no real action
-t DIR new target directory DIR
";
}
use Getopt::Long qw(:config no_ignore_case bundling pass_through);
my $uoption=0;
my $deloption=0;
my $dry_run=0;
my $help;
my %opts = (
"t=s", \$target,
"delete", \$deloption,
"dry-run", \$dry_run,
"help|h", \$help,
"u", \$uoption
);
&help if !GetOptions(%opts);
&help if $help;
my $days=shift(@ARGV);
die "Please specify a valid day count!\n" if abs($days)<1;
die "Please specify mbox file names!\n" if ! @ARGV;
for(@ARGV) {
die "Unable to read $_\n" if not -r $_
};
use Date::Calc qw(Parse_Date Today Delta_Days);
use Compress::Zlib ;
use Fcntl;
use Mail::Mbox::MessageParser;
my $c=-1;
my @today = Today();
my $old_all = localtime(time - $days * 86400);
$old_all =~ s/\ +/\ /g;
my @splitdate=split(/\ /,$old_all);
my $olddate=$splitdate[1] . "_" . $splitdate[4] . ".gz";
JOB:
foreach my $filename (@ARGV) {
my @st;
my @time;
my $c;
my $oldsize = (stat($filename))[7];
if ($oldsize == 0) {
syswrite(STDOUT,"Empty file $filename, skipping.");
next JOB;
};
if(-e "$filename.new")
{
syswrite(STDOUT,"Temporary file $filename.new already exists, skipping
$filename.\n");
next JOB;
};
if(!open(fh,$filename)) {
syswrite(STDOUT,"$filename could not be opened, skipping");
next JOB;
};
if(flock(fh,2|4)){
# lock when not locked already by another process
flock(fh,2) || die "unexpected trouble on locking $filename";
} else {
# skip file
close(fh);
syswrite(STDOUT,"$filename is locked by an other prozess, skipping.");
next JOB;
};
my $file_handle = new FileHandle($filename);
my $folder_reader = new Mail::Mbox::MessageParser( {
'file_name' => $filename,
'file_handle' => $file_handle,
'enable_cache' => 0
} );
# die "he? ".ref($folder_reader);
#die ref($folder_reader);
if (ref($folder_reader) ne "Mail::Mbox::MessageParser::Grep") {
syswrite STDERR, "Unable to parse contents of $filename, skipping.\n";
next JOB;
}
my $gzfilename;
if(!$dry_run) {
sysopen(neu,"$filename.new", O_RDWR|O_EXCL|O_CREAT) || die "Error
creating temporary file, move $filename.new out of the way";
$gzfilename="$target/$filename".".$olddate";
}
while(-s $gzfilename && $uoption)
{
my $modnumber += 0; # to preset a value
$gzfilename="$target/$filename.".$splitdate[1] . "($modnumber)_" .
$splitdate[4] . ".gz";;
$modnumber++;
}
my $gzfile_ist_neu=1 if(!-e $gzfilename);
my $alt;
if(!$deloption && !$dry_run) {
$alt = gzopen($gzfilename, "ab")
or die "cannot open file: $gzerrno\n";
}
syswrite (STDOUT,"I: Reading and splitting $filename ($oldsize
bytes)...\n");
syswrite(STDOUT, "I: Analyzing ages (days before expiration): ");
my $alte=0;
my $neue=0;
while(!$folder_reader->end_of_file())
{
my $email = $folder_reader->read_next_email();
my $isold;
$$email=~/^From\s\S+\s+(.*)/m;
my $date=$1;
#syswrite (STDERR, "hm, $1\n");
if($1) {
$c++;
my @maildate = Parse_Date($date);
@maildate = (1970,1,1) if scalar @maildate ==0;
my $diff = Delta_Days(@maildate,@today);
syswrite(STDERR, "Date, Diff, Contents:\n$date,
$diff\n$$email\nEND_OF_MAIL\n");
if ($#maildate != 2) {
# mail header broken
$neue++;
syswrite(STDOUT, "(new: date could not be parsed!), ");
}
else
# mail okay
{
syswrite(STDOUT, $diff);
if ($diff > $days) {
$isold = 1;
$alte++;
syswrite(STDOUT, "(old), ");
}
else {
$neue++;
syswrite(STDOUT, "(new), ");
}
}
}
if ($isold) {
if(!$deloption && !$dry_run) {
$alt->gzwrite($$email)
or die "error writing to gz buffer : $gzerrno\n";
}
} else {
defined(syswrite(neu, $$email)) || die "Failure while writting -
disc full?";
}
}
$alt->gzclose if(!$deloption && !$dry_run);
flock(fh, 8);
close(fh);
close(neu);
#die "ohje";
if($alte==0 && $gzfile_ist_neu==1 && !$deloption && !$dry_run)
{
unlink($gzfilename)|| die "failed - removed gzip file [empty]\n";
}
if( ($alte+$neue) < 1) {
syswrite STDOUT, "No changes, ignoring file\n";
next JOB;
}
my $newsize = (stat($gzfilename))[7];
# no longer interessting, beautify it
$gzfilename=~s!^\.//?!!;
syswrite (STDOUT,"\n\nI: Wrote $neue new entries to $filename.new\n")
if(!$deloption);
syswrite (STDOUT,"\nI: Wrote $alte old entries to $gzfilename\n");
if(!$dry_run) {
syswrite (STDOUT,"Deleting $filename... ");
unlink($filename) || die "failed while deleting original mbox";
syswrite (STDOUT,"replacing with the new mailbox... ");
rename("$filename.new", $filename) || die "failed";
syswrite (STDOUT,"done");
# syswrite (STDOUT," (saved $diff bytes)") if(!$deloption);
syswrite (STDOUT,".\n");
if(-e "$filename.new"){unlink("$filename.new") || die "Could not remove
temporary file... Odd things happen!";}
}
}
--- End Message ---