Package: libmail-mbox-messageparser-perl
Version: 1.4002-1
Severity: grave
Justification: causes non-serious data loss

Hello,

my program mail-expire uses this module to split mbox files into
individual messages. Sometimes, however, the end of file is reported too
early and data is _lost_ because of that. I did not try to investigate
the issue yet, test data is in:
http://people.debian.org/~blade/debian-user-german.Apr_2006.bz2
and the current version of the script is attached, with debugging output
enabled. If you look at that, it stops splitting the contents at <[EMAIL 
PROTECTED]> and returns the rest as one big message.

Regards,
Eduard.

-- System Information:
Debian Release: testing/unstable
  APT prefers unstable
  APT policy: (500, 'unstable'), (1, 'experimental')
Architecture: amd64 (x86_64)
Shell:  /bin/sh linked to /bin/bash
Kernel: Linux 2.6.16-1-amd64-k8
Locale: LANG=de_DE.UTF-8, LC_CTYPE=de_DE.UTF-8 (charmap=UTF-8)

Versions of packages libmail-mbox-messageparser-perl depends on:
ii  libfilehandle-unget-perl      0.1621-2   a FileHandle which supports ungett
ii  perl                          5.8.8-4    Larry Wall's Practical Extraction 

libmail-mbox-messageparser-perl recommends no packages.

-- no debconf information

-- 
#!/usr/bin/perl

# SEE DEBIAN CHANGELOG FOR NEWER ENTRIES

# mail-expire, Version 0.2; Fri, 16 Aug 2002 11:39:10 +0200
# Copyright: Eduard Bloch <[EMAIL PROTECTED]>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details. The full text of GPL can be
# found on http://www.gnu.org or in /usr/share/common-licenses/GPL on
# modern Debian systems.
#
# ----------------------------------------------------------
# If you make changes to this script, please forward the new 
# version to <[EMAIL PROTECTED]> or <[EMAIL PROTECTED]>
# ----------------------------------------------------------
# 
# REQUIRED PACKAGES:
#
# libcompress-zlib-perl - Perl module for creation of gzip files 
# libdate-calc-perl - Perl library for accessing dates
# 
# Changes by Johannes Kolb:
#  * use Date::Calc instead of Date::Manip to increase performance
#  * no buffering of whole mailbox-files in memory
#
# Changes by Florian Krohs <[EMAIL PROTECTED]>
#  * append old mails to mailbox.month_year.gz
#  * added zlib to free some space:]
#
# Changes by Eduard Bloch <[EMAIL PROTECTED]>
#  * small hack to vary the output filename to prevent overwritting
#  * some cosmetics, fixed typos
#  * dropped silly size comparison, trust return values of syswrite

use strict;

my $target="./";

sub help {
    die "Usage: $0 [ options ] DAYS FILES
    where
    DAYS is an integer specifying the maximum age of a mail in days and
    FILES one or more mbox file(s).

    Options:
    -u        choose different filenames if the target file already exists
    --delete  drops the old messages. Be warned, no backup will be made!
    --dry-run Just print what it would do, no real action
    -t DIR    new target directory DIR

";
}

use Getopt::Long qw(:config no_ignore_case bundling pass_through);
my $uoption=0;
my $deloption=0;
my $dry_run=0;
my $help;

my %opts = (
    "t=s", \$target, 
    "delete", \$deloption,
    "dry-run", \$dry_run,
    "help|h", \$help,
    "u", \$uoption
);

&help if !GetOptions(%opts);
&help if $help;
my $days=shift(@ARGV);
die "Please specify a valid day count!\n" if abs($days)<1;
die "Please specify mbox file names!\n" if ! @ARGV;
for(@ARGV) { 
    die "Unable to read $_\n" if not -r $_
};

use Date::Calc qw(Parse_Date Today Delta_Days);
use Compress::Zlib ;
use Fcntl;
use Mail::Mbox::MessageParser;

my $c=-1;
my @today = Today();
my $old_all = localtime(time - $days * 86400);
$old_all =~ s/\ +/\ /g;
my @splitdate=split(/\ /,$old_all);
my $olddate=$splitdate[1] . "_" . $splitdate[4] . ".gz";

JOB: 
foreach my $filename (@ARGV) {
    my @st;
    my @time;
    my $c;

    my $oldsize = (stat($filename))[7];
    if ($oldsize == 0) {
        syswrite(STDOUT,"Empty file $filename, skipping.");
        next JOB;
    };

    if(-e "$filename.new")
    { 
        syswrite(STDOUT,"Temporary file $filename.new already exists, skipping 
$filename.\n");
        next JOB;
    };

    if(!open(fh,$filename)) {
        syswrite(STDOUT,"$filename could not be opened, skipping");
        next JOB;
    };
    if(flock(fh,2|4)){
        # lock when not locked already by another process
        flock(fh,2) || die "unexpected trouble on locking $filename";
    } else {
        # skip file
        close(fh);
        syswrite(STDOUT,"$filename is locked by an other prozess, skipping.");
        next JOB;
    };

    my $file_handle = new FileHandle($filename);
    my $folder_reader = new Mail::Mbox::MessageParser( {
            'file_name' => $filename,
            'file_handle' => $file_handle,
            'enable_cache' => 0
        } );

    # die "he? ".ref($folder_reader);
    #die ref($folder_reader);
    if (ref($folder_reader) ne "Mail::Mbox::MessageParser::Grep") {
        syswrite STDERR, "Unable to parse contents of $filename, skipping.\n";
        next JOB;
    }

    my $gzfilename;
    if(!$dry_run) {
        sysopen(neu,"$filename.new", O_RDWR|O_EXCL|O_CREAT) || die "Error 
creating temporary file, move $filename.new out of the way";
        $gzfilename="$target/$filename".".$olddate";
    }

    while(-s $gzfilename && $uoption)
    {
        my $modnumber += 0; # to preset a value
        $gzfilename="$target/$filename.".$splitdate[1] . "($modnumber)_" . 
$splitdate[4] . ".gz";;
        $modnumber++;
    }

    my $gzfile_ist_neu=1 if(!-e $gzfilename);
    my $alt;

    if(!$deloption && !$dry_run) {
        $alt = gzopen($gzfilename, "ab") 
            or die "cannot open file: $gzerrno\n";
    }
    syswrite (STDOUT,"I: Reading and splitting $filename ($oldsize 
bytes)...\n");
    syswrite(STDOUT, "I: Analyzing ages (days before expiration): ");
    my $alte=0;
    my $neue=0;

    while(!$folder_reader->end_of_file())
    {
        my $email = $folder_reader->read_next_email();
        my $isold;

        $$email=~/^From\s\S+\s+(.*)/m;
        my $date=$1;
        #syswrite (STDERR, "hm, $1\n");

        if($1) {
            $c++;
            my @maildate = Parse_Date($date);
            @maildate = (1970,1,1) if scalar @maildate ==0;
            my $diff = Delta_Days(@maildate,@today);
            syswrite(STDERR, "Date, Diff, Contents:\n$date, 
$diff\n$$email\nEND_OF_MAIL\n");
            if ($#maildate != 2) {
                # mail header broken
                $neue++;
                syswrite(STDOUT, "(new: date could not be parsed!), ");
            }
            else
            # mail okay
            {
                syswrite(STDOUT, $diff);
                if ($diff > $days) {
                    $isold = 1;
                    $alte++;
                    syswrite(STDOUT, "(old), ");
                }
                else {
                    $neue++;
                    syswrite(STDOUT, "(new), ");
                }
            }
        }
        if ($isold) {
            if(!$deloption && !$dry_run) {
                $alt->gzwrite($$email) 
                    or die "error writing to gz buffer : $gzerrno\n";
            }

        } else {
            defined(syswrite(neu, $$email)) || die "Failure while writting - 
disc full?";
        }
    }

    $alt->gzclose if(!$deloption && !$dry_run);
    flock(fh, 8);
    close(fh);
    close(neu);

    #die  "ohje";
    if($alte==0 && $gzfile_ist_neu==1 && !$deloption && !$dry_run)
    {
        unlink($gzfilename)|| die "failed - removed gzip file [empty]\n";
    }

    if( ($alte+$neue) < 1) {
        syswrite STDOUT, "No changes, ignoring file\n";
        next JOB;
    }

    my $newsize = (stat($gzfilename))[7];

    # no longer interessting, beautify it
    $gzfilename=~s!^\.//?!!;

    syswrite (STDOUT,"\n\nI: Wrote $neue new entries to $filename.new\n") 
if(!$deloption);
    syswrite (STDOUT,"\nI: Wrote $alte old entries to $gzfilename\n");

    if(!$dry_run) {
        syswrite (STDOUT,"Deleting $filename... ");
        unlink($filename) || die "failed while deleting original mbox";
        syswrite (STDOUT,"replacing with the new mailbox... ");
        rename("$filename.new", $filename) || die "failed";
        syswrite (STDOUT,"done");
#    syswrite (STDOUT," (saved $diff bytes)") if(!$deloption);
        syswrite (STDOUT,".\n");
        if(-e "$filename.new"){unlink("$filename.new") || die "Could not remove 
temporary file... Odd things happen!";}
    }
}

Reply via email to