On Thu, Sep 07, 2006 at 09:35:28AM +0200, Goswin von Brederlow wrote: > Peter Colberg <[EMAIL PROTECTED]> writes: > > > Tackling full pdiff support for debmirror tonight, I took the last > > patch a bit further and implemented APT's RRED method, basically > > by translating from C++ to perl in a linear fashion. > > > > This means that debmirror is now capable of using the pdiff files to > > patch already available, out-dated Packages or Sources files and thus > > saving some bandwidth by not having to download the full (albeit > > compressed) files. > > [...] > > > I've done some work in that direction locally already that checks the > Release file for support of pdiff files but not the actual > fetch&patch and a better Packages/Packages.gz/Package.bz2 > fetch/generate mechanism that works if any of the three is available. > > I will merge in your patch today, got some spare time now.
Thanks for your quick response. After real world tests, I spotted some missing unlink calls; these minor changes are included in the first patch below. > As for reimplementing the rred method: Do we realy want that? I was > thinking of depending on patch and then just firing that up. No, I guess you are right about not including a home-grown method. It does not matter performance wise: Both the perl-implemented rred method (see below for an earlier version without hashing) and patch take less than 1s patching a current main/binary-i386/Packages file of about 19M with a single day's pdiff. So a further dependency on a matured program like patch would definitely be the preferable solution. Grüße, Peter
--- debmirror-20051209~/debmirror 2005-12-09 19:13:09.000000000 +0100 +++ debmirror-20051209/debmirror 2006-09-07 09:31:39.000000000 +0200 @@ -355,6 +355,7 @@ use LockFile::Simple; use Compress::Zlib; use Digest::MD5; +use Digest::SHA1; use LWP::UserAgent; # Yeah, I use too many global variables in this program. @@ -634,6 +635,7 @@ add_bytes("dists/$dist/$section/binary-$arch/Packages.gz"); add_bytes("dists/$dist/$section/binary-$arch/Packages.bz2"); add_bytes("dists/$dist/$section/binary-$arch/Release"); + add_bytes("dists/$dist/$section/binary-$arch/Packages.diff/Index"); } # d-i has no sources over there, sources are in main next if ($section =~ /debian-installer/); @@ -642,6 +644,7 @@ add_bytes("dists/$dist/$section/source/Sources.gz"); add_bytes("dists/$dist/$section/source/Sources.bz2"); add_bytes("dists/$dist/$section/source/Release"); + add_bytes("dists/$dist/$section/source/Sources.diff/Index"); } } } @@ -650,10 +653,12 @@ add_bytes("$_/Packages.gz"); add_bytes("$_/Packages.bz2"); add_bytes("$_/Release"); + add_bytes("$_/Packages.diff/Index"); if ($do_source) { add_bytes("$_/Sources"); add_bytes("$_/Sources.gz"); add_bytes("$_/Sources.bz2"); + add_bytes("$_/Sources.diff/Index"); } } if ($getcontents) { @@ -1059,6 +1064,24 @@ return 0; } +# Check uncompressed pdiff content against sha1sum from Index file. +sub check_pdiff { + my ($filename, $size, $sha1) = @_; + my $digest = Digest::SHA1->new; + my $ret = 0; + + if (-f "$filename.gz") { + system_redirect_io("gunzip", "$filename.gz", "$filename"); + if ($size == -s $filename) { + open HANDLE, $filename or die "$filename: $!"; + $digest->addfile(*HANDLE); + $ret = ($sha1 eq $digest->hexdigest); + } + unlink ($filename); + } + return $ret; +} + # Check file against md5sum and size from the Release file. # It will return true if the md5sum matches. sub check_lists { @@ -1276,6 +1299,32 @@ make_dir($subdir); make_dir("$tempdir/$subdir"); + if (exists $file_lists_size{"$tempdir/$subdir/Packages.diff/Index"}) { + if (!check_lists ("$tempdir/$subdir/Packages.diff/Index")) { + make_dir("$subdir/Packages.diff"); + make_dir("$tempdir/$subdir/Packages.diff"); + say("$subdir/Packages.diff/Index needs fetch"); + remote_get("$subdir/Packages.diff/Index"); + if (!check_lists ("$tempdir/$subdir/Packages.diff/Index")) { + say("$subdir/Packages.diff/Index failed md5sum check, removing"); + push (@errlog,"$subdir/Packages.diff/Index failed md5sum check, removing\n"); + unlink "$tempdir/$subdir/Packages.diff/Index"; + } else { + fetch_and_apply_pdiffs($subdir, "Packages"); + if (check_lists ("$tempdir/$subdir/Packages")) { + system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Packages", "$tempdir/$subdir/Packages.gz"); + system_redirect_io("bzip2", "$tempdir/$subdir/Packages", "$tempdir/$subdir/Packages.bz2"); + } + } + } else { + $bytes_gotten += $file_lists_size{"$tempdir/$subdir/Packages.diff/Index"}; + fetch_and_apply_pdiffs($subdir, "Packages"); + if (check_lists ("$tempdir/$subdir/Packages")) { + system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Packages", "$tempdir/$subdir/Packages.gz"); + system_redirect_io("bzip2", "$tempdir/$subdir/Packages", "$tempdir/$subdir/Packages.bz2"); + } + } + } if (exists $file_lists_size{"$tempdir/$subdir/Packages.gz"}) { if (!check_lists ("$tempdir/$subdir/Packages.gz")) { say("$subdir/Packages.gz needs fetch"); @@ -1345,10 +1394,12 @@ $files{"$subdir/Packages.bz2"}=1; $files{"$subdir/Packages"}=1; $files{"$subdir/Release"}=1; + $files{"$subdir/Packages.diff/Index"}=1; $files{"$tempdir/$subdir/Packages.gz"}=1; $files{"$tempdir/$subdir/Packages.bz2"}=1; $files{"$tempdir/$subdir/Packages"}=1; $files{"$tempdir/$subdir/Release"}=1; + $files{"$tempdir/$subdir/Packages.diff/Index"}=1; } # Get Sources file @@ -1358,6 +1409,32 @@ if ($do_source) { make_dir($subdir); make_dir($tempdir."/".$subdir); + if (exists $file_lists_size{"$tempdir/$subdir/Sources.diff/Index"}) { + if (!check_lists ("$tempdir/$subdir/Sources.diff/Index")) { + make_dir("$subdir/Sources.diff"); + make_dir("$tempdir/$subdir/Sources.diff"); + say("$subdir/Sources.diff/Index needs fetch"); + remote_get("$subdir/Sources.diff/Index"); + if (!check_lists ("$tempdir/$subdir/Sources.diff/Index")) { + say("$subdir/Sources.diff/Index failed md5sum check, removing"); + push (@errlog,"$subdir/Sources.diff/Index failed md5sum check, removing\n"); + unlink "$tempdir/$subdir/Sources.diff/Index"; + } else { + fetch_and_apply_pdiffs($subdir, "Sources"); + if (check_lists ("$tempdir/$subdir/Sources")) { + system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Sources", "$tempdir/$subdir/Sources.gz"); + system_redirect_io("bzip2", "$tempdir/$subdir/Sources", "$tempdir/$subdir/Sources.bz2"); + } + } + } else { + $bytes_gotten += $file_lists_size{"$tempdir/$subdir/Sources.diff/Index"}; + fetch_and_apply_pdiffs($subdir, "Sources"); + if (check_lists ("$tempdir/$subdir/Sources")) { + system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Sources", "$tempdir/$subdir/Sources.gz"); + system_redirect_io("bzip2", "$tempdir/$subdir/Sources", "$tempdir/$subdir/Sources.bz2"); + } + } + } if (exists $file_lists_size{"$tempdir/$subdir/Sources.gz"}) { if (!check_lists ("$tempdir/$subdir/Sources.gz")) { say("$subdir/Sources.gz needs fetch"); @@ -1427,10 +1504,205 @@ $files{"$subdir/Sources"}=1; $files{"$subdir/Sources.bz2"}=1; $files{"$subdir/Release"}=1; + $files{"$subdir/Sources.diff/Index"}=1; $files{"$tempdir/$subdir/Sources.gz"}=1; $files{"$tempdir/$subdir/Sources"}=1; $files{"$tempdir/$subdir/Sources.bz2"}=1; $files{"$tempdir/$subdir/Release"}=1; + $files{"$tempdir/$subdir/Sources.diff/Index"}=1; + } +} + +sub fetch_and_apply_pdiffs { + my ($subdir, $list) = @_; + local (*INDEX, *LIST, *LIST_OLD, *PDIFF); + my (%history_sha1, %history_size, %pdiff_sha1, %pdiff_size); + my ($current_sha1, $current_size, $sha1, $size, $file, $digest, $ret); + + # Parse DiffIndex file + open(INDEX, "$tempdir/$subdir/$list.diff/Index") or die "$tempdir/$subdir/$list.diff/Index: $!"; + $_ = <INDEX>; + while (defined($_)) { + if (m/^SHA1-Current:/m) { + ($current_sha1, $current_size) = m/^SHA1-Current:\s+([A-Za-z0-9]+)\s+(\d+)/m; + $_ = <INDEX>; + } + elsif (m/^SHA1-History:/m) { + while (defined($_ = <INDEX>)) { + last if (!m/^\s/m); + ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m; + $history_sha1{$file} = $sha1; + $history_size{$file} = $size; + } + } + elsif (m/^SHA1-Patches:/m) { + while (defined($_ = <INDEX>)) { + last if (!m/^\s/m); + ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m; + $pdiff_sha1{$file} = $sha1; + $pdiff_size{$file} = $size; + } + } + } + close(INDEX); + + # Download pdiff files as necessary + $ret = 1; + foreach $file (sort keys %pdiff_sha1) { + if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", $pdiff_size{$file}, $pdiff_sha1{$file})) { + say("$subdir/$list.diff/$file.gz needs fetch"); + remote_get("$subdir/$list.diff/$file.gz"); + $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz"; + if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", $pdiff_size{$file}, $pdiff_sha1{$file})) { + say("$subdir/$list.diff/$file.gz failed sha1sum check, removing"); + push (@errlog,"$subdir/$list.diff/$file.gz failed sha1sum check, removing\n"); + unlink "$tempdir/$subdir/$list.diff/$file.gz"; + $ret = 0; + } + } else { + $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz"; + $bytes_gotten += -s "$tempdir/$subdir/$list.diff/$file.gz"; + } + $files{"$subdir/$list.diff/$file.gz"}=1; + $files{"$tempdir/$subdir/$list.diff/$file.gz"}=1; + } + return unless ($ret); + + # Apply pdiff files + open(LIST, "$subdir/$list") or return; + $digest = Digest::SHA1->new; + $digest->addfile(*LIST); + $sha1 = $digest->hexdigest; + $size = -s "$subdir/$list"; + unlink("$tempdir/$subdir/$list"); + system("cp $subdir/$list $tempdir/$subdir/$list"); + foreach $file (sort keys %history_sha1) { + next unless ($sha1 eq $history_sha1{$file} && $size eq $history_size{$file}); + system_redirect_io("gunzip", "$tempdir/$subdir/$list.diff/$file.gz", "$tempdir/$subdir/$list.diff/$file"); + unlink("$tempdir/$subdir/$list.old"); + system("mv $tempdir/$subdir/$list $tempdir/$subdir/$list.old"); + open(PDIFF, "$tempdir/$subdir/$list.diff/$file") or die "$tempdir/$subdir/$list.diff/$file: $!"; + open(LIST_OLD, "$tempdir/$subdir/$list.old") or die "$tempdir/$subdir/$list.old: $!"; + open(LIST, ">$tempdir/$subdir/$list") or die "$tempdir/$subdir/$list: $!"; + rred_ed_file(*PDIFF, *LIST_OLD, *LIST, $digest); + close(PDIFF); + close(LIST_OLD); + close(LIST); + unlink("$tempdir/$subdir/$list.diff/$file"); + $sha1 = $digest->hexdigest; + $size = -s "$tempdir/$subdir/$list"; + say("$subdir/$list patched with $subdir/$list.diff/$file.gz"); + } + unlink("$tempdir/$subdir/$list.old"); + if (!($sha1 eq $current_sha1 && $size eq $current_size)) { + say("$subdir/$list failed sha1sum check, removing"); + push (@errlog,"$subdir/$list failed sha1sum check, removing\n"); + unlink "$tempdir/$subdir/$list"; + } +} + +# apt pdiff patch algorithm (see apt-0.6.45/methods/rred.cc) +{ + # this method implements a patch functionality similar to "patch --ed" that is + # used by the "tiffany" incremental packages download stuff. it differs from + # "ed" insofar that it is way more restricted (and therefore secure). in the + # moment only the "c", "a" and "d" commands of ed are implemented (diff + # doesn't output any other). additionally the records must be reverse sorted + # by line number and may not overlap (diff *seems* to produce this kind of + # output). + + # rred method ed commands + use constant RRED_MODE_CHANGED => 'c'; + use constant RRED_MODE_DELETED => 'd'; + use constant RRED_MODE_ADDED => 'a'; + # rred method return values + use constant RRED_ED_OK => 0; + use constant RRED_ED_ORDERING => 1; + use constant RRED_ED_PARSER => 2; + use constant RRED_ED_FAILURE => 3; + + sub rred_ed_file { + local (*ED_CMDS, *IN_FILE, *OUT_FILE) = splice @_, 0, 3; + my ($digest) = @_; + my $result; + + # we do a tail recursion to read the commands in the right order + $result = rred_ed_rec(*ED_CMDS, *IN_FILE, *OUT_FILE, 0, $digest); + + # read the rest from infile + if ($result > 0) { + while (<IN_FILE>) { + print OUT_FILE; + $digest->add($_); + } + } + else { + return RRED_ED_FAILURE; + } + return RRED_ED_OK; + } + + sub rred_ed_rec { + no warnings 'recursion'; + local (*ED_CMDS, *IN_FILE, *OUT_FILE) = splice @_, 0, 3; + my ($line, $digest) = @_; + my ($pos, $startline, $stopline, $mode); + + # get the current command and parse it + if (defined($_ = <ED_CMDS>)) { + ($startline, $stopline, $mode) = m/^(\d+)(?:,(\d+))?([cda])$/m; + + if (not defined($startline)) { + return RRED_ED_PARSER; + } + elsif ($startline < $line) { + return RRED_ED_ORDERING; + } + $stopline = $startline unless defined($stopline); + } + else { + return $line; + } + # get the current position + $pos = tell(ED_CMDS); + # if this is add or change then go to the next full stop + if (($mode eq RRED_MODE_CHANGED) || ($mode eq RRED_MODE_ADDED)) { + while (<ED_CMDS>) { + last if (m/^\./m); + } + } + # do the recursive call + $line = rred_ed_rec(*ED_CMDS, *IN_FILE, *OUT_FILE, $line, $digest); + # pass on errors + if ($line < 0) { + return $line; + } + # apply our hunk + seek(ED_CMDS, $pos, 0); + # first wind to the current position + $startline -= 1 unless ($mode eq RRED_MODE_ADDED); + while ($line < $startline) { + $_ = <IN_FILE>; + print OUT_FILE $_; + $digest->add($_); + $line++; + } + # include from ed script + if (($mode eq RRED_MODE_ADDED) || ($mode eq RRED_MODE_CHANGED)) { + while (<ED_CMDS>) { + last if (m/^\./m); + print OUT_FILE; + $digest->add($_); + } + } + # ignore the corresponding number of lines from input + if (($mode eq RRED_MODE_DELETED) || ($mode eq RRED_MODE_CHANGED)) { + while ($line < $stopline) { + <IN_FILE>; + $line++; + } + } + return $line; } }
#!/usr/bin/perl -w use strict; # rred method ed commands use constant RRED_MODE_CHANGED => 'c'; use constant RRED_MODE_DELETED => 'd'; use constant RRED_MODE_ADDED => 'a'; # rred method return values use constant RRED_ED_OK => 0; use constant RRED_ED_ORDERING => 1; use constant RRED_ED_PARSER => 2; use constant RRED_ED_FAILURE => 3; local (*ED_CMDS, *IN_FILE, *OUT_FILE); open (ED_CMDS, "patch"); open (IN_FILE, "in"); open (OUT_FILE, ">out"); rred_ed_file(*ED_CMDS, *IN_FILE, *OUT_FILE); # this method implements a patch functionality similar to "patch --ed" that is # used by the "tiffany" incremental packages download stuff. it differs from # "ed" insofar that it is way more restricted (and therefore secure). in the # moment only the "c", "a" and "d" commands of ed are implemented (diff # doesn't output any other). additionally the records must be reverse sorted # by line number and may not overlap (diff *seems* to produce this kind of # output). sub rred_ed_file { local (*ED_CMDS, *IN_FILE, *OUT_FILE) = @_; my ($result); # we do a tail recursion to read the commands in the right order $result = rred_ed_rec(*ED_CMDS, *IN_FILE, *OUT_FILE, 0); # read the rest from infile if ($result > 0) { print OUT_FILE <IN_FILE>; } else { return RRED_ED_FAILURE; } return RRED_ED_OK; } sub rred_ed_rec { no warnings 'recursion'; my $line = pop; local (*ED_CMDS, *IN_FILE, *OUT_FILE) = @_; my ($pos, $startline, $stopline, $mode); # get the current command and parse it if (defined($_ = <ED_CMDS>)) { ($startline, $stopline, $mode) = m/^(\d+)(?:,(\d+))?([cda])$/m; if (not defined($startline)) { return RRED_ED_PARSER; } elsif ($startline < $line) { return RRED_ED_ORDERING; } $stopline = $startline unless defined($stopline); } else { return $line; } # get the current position $pos = tell(ED_CMDS); # if this is add or change then go to the next full stop if (($mode eq RRED_MODE_CHANGED) || ($mode eq RRED_MODE_ADDED)) { while (<ED_CMDS>) { last if (m/^\./m); } } # do the recursive call $line = rred_ed_rec(*ED_CMDS, *IN_FILE, *OUT_FILE, $line); # pass on errors if ($line < 0) { return $line; } # apply our hunk seek(ED_CMDS, $pos, 0); # first wind to the current position $startline -= 1 unless ($mode eq RRED_MODE_ADDED); while ($line < $startline) { print OUT_FILE scalar <IN_FILE>; $line++; } # include from ed script if (($mode eq RRED_MODE_ADDED) || ($mode eq RRED_MODE_CHANGED)) { while (<ED_CMDS>) { last if (m/^\./m); print OUT_FILE; } } # ignore the corresponding number of lines from input if (($mode eq RRED_MODE_DELETED) || ($mode eq RRED_MODE_CHANGED)) { while ($line < $stopline) { <IN_FILE>; $line++; } } return $line; }
diff -urN debmirror-20051209~/debian/control debmirror-20051209/debian/control --- debmirror-20051209~/debian/control 2004-12-09 18:56:46.000000000 +0100 +++ debmirror-20051209/debian/control 2006-09-07 10:08:42.000000000 +0200 @@ -6,7 +6,7 @@ Package: debmirror Architecture: all -Depends: libnet-perl, libdigest-md5-perl, liblockfile-simple-perl, rsync, libcompress-zlib-perl, bzip2, libwww-perl +Depends: libnet-perl, libdigest-md5-perl, liblockfile-simple-perl, rsync, libcompress-zlib-perl, bzip2, libwww-perl, libdigest-sha1-perl, patch Recommends: gnupg Description: Debian partial mirror script, with ftp and package pool support This program downloads and maintains a partial local Debian mirror. diff -urN debmirror-20051209~/debmirror debmirror-20051209/debmirror --- debmirror-20051209~/debmirror 2005-12-09 19:13:09.000000000 +0100 +++ debmirror-20051209/debmirror 2006-09-07 10:14:01.000000000 +0200 @@ -355,6 +355,7 @@ use LockFile::Simple; use Compress::Zlib; use Digest::MD5; +use Digest::SHA1; use LWP::UserAgent; # Yeah, I use too many global variables in this program. @@ -634,6 +635,7 @@ add_bytes("dists/$dist/$section/binary-$arch/Packages.gz"); add_bytes("dists/$dist/$section/binary-$arch/Packages.bz2"); add_bytes("dists/$dist/$section/binary-$arch/Release"); + add_bytes("dists/$dist/$section/binary-$arch/Packages.diff/Index"); } # d-i has no sources over there, sources are in main next if ($section =~ /debian-installer/); @@ -642,6 +644,7 @@ add_bytes("dists/$dist/$section/source/Sources.gz"); add_bytes("dists/$dist/$section/source/Sources.bz2"); add_bytes("dists/$dist/$section/source/Release"); + add_bytes("dists/$dist/$section/source/Sources.diff/Index"); } } } @@ -650,10 +653,12 @@ add_bytes("$_/Packages.gz"); add_bytes("$_/Packages.bz2"); add_bytes("$_/Release"); + add_bytes("$_/Packages.diff/Index"); if ($do_source) { add_bytes("$_/Sources"); add_bytes("$_/Sources.gz"); add_bytes("$_/Sources.bz2"); + add_bytes("$_/Sources.diff/Index"); } } if ($getcontents) { @@ -1059,6 +1064,24 @@ return 0; } +# Check uncompressed pdiff content against sha1sum from Index file. +sub check_pdiff { + my ($filename, $size, $sha1) = @_; + my $digest = Digest::SHA1->new; + my $ret = 0; + + if (-f "$filename.gz") { + system_redirect_io("gunzip", "$filename.gz", "$filename"); + if ($size == -s $filename) { + open HANDLE, $filename or die "$filename: $!"; + $digest->addfile(*HANDLE); + $ret = ($sha1 eq $digest->hexdigest); + } + unlink ($filename); + } + return $ret; +} + # Check file against md5sum and size from the Release file. # It will return true if the md5sum matches. sub check_lists { @@ -1276,6 +1299,32 @@ make_dir($subdir); make_dir("$tempdir/$subdir"); + if (exists $file_lists_size{"$tempdir/$subdir/Packages.diff/Index"}) { + if (!check_lists ("$tempdir/$subdir/Packages.diff/Index")) { + make_dir("$subdir/Packages.diff"); + make_dir("$tempdir/$subdir/Packages.diff"); + say("$subdir/Packages.diff/Index needs fetch"); + remote_get("$subdir/Packages.diff/Index"); + if (!check_lists ("$tempdir/$subdir/Packages.diff/Index")) { + say("$subdir/Packages.diff/Index failed md5sum check, removing"); + push (@errlog,"$subdir/Packages.diff/Index failed md5sum check, removing\n"); + unlink "$tempdir/$subdir/Packages.diff/Index"; + } else { + fetch_and_apply_pdiffs($subdir, "Packages"); + if (check_lists ("$tempdir/$subdir/Packages")) { + system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Packages", "$tempdir/$subdir/Packages.gz"); + system_redirect_io("bzip2", "$tempdir/$subdir/Packages", "$tempdir/$subdir/Packages.bz2"); + } + } + } else { + $bytes_gotten += $file_lists_size{"$tempdir/$subdir/Packages.diff/Index"}; + fetch_and_apply_pdiffs($subdir, "Packages"); + if (check_lists ("$tempdir/$subdir/Packages")) { + system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Packages", "$tempdir/$subdir/Packages.gz"); + system_redirect_io("bzip2", "$tempdir/$subdir/Packages", "$tempdir/$subdir/Packages.bz2"); + } + } + } if (exists $file_lists_size{"$tempdir/$subdir/Packages.gz"}) { if (!check_lists ("$tempdir/$subdir/Packages.gz")) { say("$subdir/Packages.gz needs fetch"); @@ -1345,10 +1394,12 @@ $files{"$subdir/Packages.bz2"}=1; $files{"$subdir/Packages"}=1; $files{"$subdir/Release"}=1; + $files{"$subdir/Packages.diff/Index"}=1; $files{"$tempdir/$subdir/Packages.gz"}=1; $files{"$tempdir/$subdir/Packages.bz2"}=1; $files{"$tempdir/$subdir/Packages"}=1; $files{"$tempdir/$subdir/Release"}=1; + $files{"$tempdir/$subdir/Packages.diff/Index"}=1; } # Get Sources file @@ -1358,6 +1409,32 @@ if ($do_source) { make_dir($subdir); make_dir($tempdir."/".$subdir); + if (exists $file_lists_size{"$tempdir/$subdir/Sources.diff/Index"}) { + if (!check_lists ("$tempdir/$subdir/Sources.diff/Index")) { + make_dir("$subdir/Sources.diff"); + make_dir("$tempdir/$subdir/Sources.diff"); + say("$subdir/Sources.diff/Index needs fetch"); + remote_get("$subdir/Sources.diff/Index"); + if (!check_lists ("$tempdir/$subdir/Sources.diff/Index")) { + say("$subdir/Sources.diff/Index failed md5sum check, removing"); + push (@errlog,"$subdir/Sources.diff/Index failed md5sum check, removing\n"); + unlink "$tempdir/$subdir/Sources.diff/Index"; + } else { + fetch_and_apply_pdiffs($subdir, "Sources"); + if (check_lists ("$tempdir/$subdir/Sources")) { + system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Sources", "$tempdir/$subdir/Sources.gz"); + system_redirect_io("bzip2", "$tempdir/$subdir/Sources", "$tempdir/$subdir/Sources.bz2"); + } + } + } else { + $bytes_gotten += $file_lists_size{"$tempdir/$subdir/Sources.diff/Index"}; + fetch_and_apply_pdiffs($subdir, "Sources"); + if (check_lists ("$tempdir/$subdir/Sources")) { + system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Sources", "$tempdir/$subdir/Sources.gz"); + system_redirect_io("bzip2", "$tempdir/$subdir/Sources", "$tempdir/$subdir/Sources.bz2"); + } + } + } if (exists $file_lists_size{"$tempdir/$subdir/Sources.gz"}) { if (!check_lists ("$tempdir/$subdir/Sources.gz")) { say("$subdir/Sources.gz needs fetch"); @@ -1427,10 +1504,96 @@ $files{"$subdir/Sources"}=1; $files{"$subdir/Sources.bz2"}=1; $files{"$subdir/Release"}=1; + $files{"$subdir/Sources.diff/Index"}=1; $files{"$tempdir/$subdir/Sources.gz"}=1; $files{"$tempdir/$subdir/Sources"}=1; $files{"$tempdir/$subdir/Sources.bz2"}=1; $files{"$tempdir/$subdir/Release"}=1; + $files{"$tempdir/$subdir/Sources.diff/Index"}=1; + } +} + +sub fetch_and_apply_pdiffs { + my ($subdir, $list) = @_; + local (*INDEX, *LIST); + my (%history_sha1, %history_size, %pdiff_sha1, %pdiff_size); + my ($current_sha1, $current_size, $sha1, $size, $file, $digest, $ret, $patch); + + # Parse DiffIndex file + open(INDEX, "$tempdir/$subdir/$list.diff/Index") or die "$tempdir/$subdir/$list.diff/Index: $!"; + $_ = <INDEX>; + while (defined($_)) { + if (m/^SHA1-Current:/m) { + ($current_sha1, $current_size) = m/^SHA1-Current:\s+([A-Za-z0-9]+)\s+(\d+)/m; + $_ = <INDEX>; + } + elsif (m/^SHA1-History:/m) { + while (defined($_ = <INDEX>)) { + last if (!m/^\s/m); + ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m; + $history_sha1{$file} = $sha1; + $history_size{$file} = $size; + } + } + elsif (m/^SHA1-Patches:/m) { + while (defined($_ = <INDEX>)) { + last if (!m/^\s/m); + ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m; + $pdiff_sha1{$file} = $sha1; + $pdiff_size{$file} = $size; + } + } + } + close(INDEX); + + # Download pdiff files as necessary + $ret = 1; + foreach $file (sort keys %pdiff_sha1) { + if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", $pdiff_size{$file}, $pdiff_sha1{$file})) { + say("$subdir/$list.diff/$file.gz needs fetch"); + remote_get("$subdir/$list.diff/$file.gz"); + $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz"; + if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", $pdiff_size{$file}, $pdiff_sha1{$file})) { + say("$subdir/$list.diff/$file.gz failed sha1sum check, removing"); + push (@errlog,"$subdir/$list.diff/$file.gz failed sha1sum check, removing\n"); + unlink "$tempdir/$subdir/$list.diff/$file.gz"; + $ret = 0; + } + } else { + $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz"; + $bytes_gotten += -s "$tempdir/$subdir/$list.diff/$file.gz"; + } + $files{"$subdir/$list.diff/$file.gz"}=1; + $files{"$tempdir/$subdir/$list.diff/$file.gz"}=1; + } + return unless ($ret); + + # Apply pdiff files + open(LIST, "$subdir/$list") or return; + $digest = Digest::SHA1->new; + $digest->addfile(*LIST); + $sha1 = $digest->hexdigest; + $size = -s "$subdir/$list"; + unlink("$tempdir/$subdir/$list"); + system("cp $subdir/$list $tempdir/$subdir/$list"); + $patch = 0; + foreach $file (sort keys %history_sha1) { + $patch = 1 if ($sha1 eq $history_sha1{$file} && $size eq $history_size{$file}); + next unless ($patch); + system_redirect_io("gunzip", "$tempdir/$subdir/$list.diff/$file.gz", "$tempdir/$subdir/$list.diff/$file"); + system("patch --ed $tempdir/$subdir/$list $tempdir/$subdir/$list.diff/$file"); + unlink("$tempdir/$subdir/$list.diff/$file"); + say("$subdir/$list patched with $subdir/$list.diff/$file.gz"); + } + open(LIST, "$tempdir/$subdir/$list") or die "$tempdir/$subdir/$list: $!"; + $digest = Digest::SHA1->new; + $digest->addfile(*LIST); + $sha1 = $digest->hexdigest; + $size = -s "$tempdir/$subdir/$list"; + if (!($sha1 eq $current_sha1 && $size eq $current_size)) { + say("$subdir/$list failed sha1sum check, removing"); + push (@errlog,"$subdir/$list failed sha1sum check, removing\n"); + unlink "$tempdir/$subdir/$list"; } }