Package: libparse-debian-packages-perl Version: 0.01-1 Severity: normal Hello,
Using this package to parse my mirror Packages and Sources I found some problems: - packages with a Homepage: at the end of the body make the next package have a ' Homepage' key, - more generaly: a packages with a "word:" at the begining of a line in long description cause that package to have an extra key, I add some code to make it possible to pass a string to the contructor, this one is in chage to figure what type of file it is (plain/text, gzip or bzip2) and open it correctly. I had the possibility to parse a Sources file (maybe the lib should change its name ?) adding a Files key: $source{Files}{$filename} = { size => MD5sum => } Now a parser have a __readline "hidden" methode, so "next" methode can get a line of a package without figuring how to do it (getline fo FileHandle, gzreadline or bzreadline). This version is full compatible with version 0.01. If a caller pass a filename, new() can return undef if the file does not exist of is not in supported format or can not be open. Note that this version depend on: use Compress::Zlib; use Compress::Bzip2; use File::MMagic; use FileHandle; -- System Information: Debian Release: testing/unstable APT prefers unstable APT policy: (990, 'unstable') Architecture: i386 (i686) Shell: /bin/sh linked to /bin/dash Kernel: Linux 2.6.12+thorr.2 Locale: [EMAIL PROTECTED], [EMAIL PROTECTED] (charmap=ISO-8859-15) Versions of packages libparse-debian-packages-perl depends on: ii libyaml-perl 0.38-2 YAML Ain't Markup Language (tm) ii perl 5.8.7-4 Larry Wall's Practical Extraction libparse-debian-packages-perl recommends no packages. -- no debconf information
--- Packages.pm.old 2005-08-26 02:58:55.000000000 +0200 +++ Packages.pm 2005-08-26 04:53:37.000000000 +0200 @@ -1,25 +1,67 @@ use strict; package Parse::Debian::Packages; -our $VERSION = '0.01'; +our $VERSION = "0.02"; + +use Compress::Zlib; +use Compress::Bzip2; +use File::MMagic; +use FileHandle; sub new { my $class = shift; - my $fh = shift; + my $file = shift; + my $fh; - return bless { fh => $fh }, $class; + if (! ref $file) { + # Caller give us a filename + return undef unless -f $file; + + # Default magic is ok for application/x-gzip application/x-bzip2 and text/plain + my $magic = File::MMagic->new(); + my $type = $magic->checktype_filename($file); + + SWITCH: for ($type) { + /text\/plain/ && do { + $fh = new FileHandle; + if (! $fh->open("< $file")) { + return undef; + } + last; + }; + + /application\/x-gzip/ && do { + $fh = gzopen ($file, "rb") + return undef; + last; + }; + + /application\/x-bzip2/ && do { + $fh = bzopen ($file, "rb") + return undef; + last; + }; + # It's not a supported file format + return undef; + } + return bless { FH => $fh, TYPE => $type}, $class; + } else { + return bless { FH => $file, TYPE => "IOFile"}, $class; + } } sub next { my $self = shift; - my $fh = $self->{fh}; my %parsed; - while (<$fh>) { + while ($_ = $self->__readline) { last if /^$/; - if (my ($key, $value) = m/^(.*): (.*)/) { - $parsed{$key} = $value; - } - else { + + if (my ($key, $value) = m/^([^\s:]*):\s?(.*)/) { + # Do not add an empty Files key when parsing Sources + $parsed{$key} = $value unless $key eq "Files"; + } elsif (my ($md5, $size, $filename) = /^\s(\w{32})\s(\d+)\s(.*)/) { + $parsed{Files} = { $filename => { size => $size, MD5sum => $md5 } }; + } else { s/ //; s/^\.$//; $parsed{body} .= $_; @@ -29,7 +71,37 @@ return %parsed; } -1; +sub __readline { + my $self = shift; + my $line = ""; + + SWITCH: for ($self->{TYPE}) { + /text\/plain|IOFile/ && do { + $line = $self->{FH}->getline; + last; + }; + + /application\/x-gzip/ && do { + my $bytesread = $self->{FH}->gzreadline($line); + if ($bytesread == 0) { + $line = ""; + } + last; + }; + + /application\/x-bzip2/ && do { + my $bytesread = $self->{FH}->bzreadline($line); + if ($bytesread == 0) { + $line = ""; + } + last; + }; + die "Should Never Happend\n"; + } + return $line; +} + +1 =head1 NAME @@ -40,24 +112,48 @@ use YAML; use IO::File; + use FileHandle; use Parse::Debian::Packages; - my $fh = IO::File->new("Packages"); - my $parser = Parse::Debian::Packages->new( $fh ); - while (my %package = $parser->next) { + my $pkg_file = "Packages"; + my $src_file = "Sources"; + my $other_src_file = "Sources.bz2"; + + my $fh_io = IO::File->new($pkg_file); + my $fh_FH = new FileHandle; + $fh_FH->open("< $src_file"); + + my $parser_on_io = Parse::Debian::Packages->new( $fh_io ); + my $parser_on_FH = Parse::Debian::Packages->new( $fh_FH ); + my $parser_on_filename = Parse::Debian::Packages->new( $other_src_file ); + + my %pkg_with_io = $parser_on_io->next; + my %pkg_with_FH = $parser_on_FH->next; + my %pkg_with_filename = $parser_on_filename->next; + + print Dump \%pkg_with_io; + print Dump \%pkg_with_FH; + print Dump \%pkg_with_filename; + + while (my %package = $parser_on_io->next) { print Dump \%package; } =head1 DESCRIPTION -This module parses the Packages files used by the debian package -management tools. +This module parses the Packages and Sources files used by the debian +package management tools. It presents itself as an iterator. Each call of the ->next method will return the next package found in the file. -For laziness, we take a filehandle in to the constructor. Please open -the file for us. +You can pass a FileHandle to the constructor of a filename, the +advantage of the filename is that you can parse plain/text, gziped or +bziped files. + +If the filename passed to the constructor don't repressent a file in +supported format (text/plain, application/x-gzip, +application/x-bzip2) or if that file can not be open, new() return undef. =head1 AUTHOR