On Thu, Sep 07, 2006 at 09:35:28AM +0200, Goswin von Brederlow wrote:
> Peter Colberg <[EMAIL PROTECTED]> writes:
> 
> > Tackling full pdiff support for debmirror tonight, I took the last
> > patch a bit further and implemented APT's RRED method, basically
> > by translating from C++ to perl in a linear fashion.
> >
> > This means that debmirror is now capable of using the pdiff files to
> > patch already available, out-dated Packages or Sources files and thus
> > saving some bandwidth by not having to download the full (albeit
> > compressed) files.
> > [...]
> >
> I've done some work in that direction locally already that checks the
> Release file for support of pdiff files but not the actual
> fetch&patch and a better Packages/Packages.gz/Package.bz2
> fetch/generate mechanism that works if any of the three is available.
> 
> I will merge in your patch today, got some spare time now.

Thanks for your quick response.

After real world tests, I spotted some missing unlink calls; these
minor changes are included in the first patch below.

> As for reimplementing the rred method: Do we realy want that? I was
> thinking of depending on patch and then just firing that up.

No, I guess you are right about not including a home-grown method.

It does not matter performance wise: Both the perl-implemented rred
method (see below for an earlier version without hashing) and patch
take less than 1s patching a current main/binary-i386/Packages file
of about 19M with a single day's pdiff. So a further dependency on a
matured program like patch would definitely be the preferable
solution.

Grüße,
Peter
--- debmirror-20051209~/debmirror       2005-12-09 19:13:09.000000000 +0100
+++ debmirror-20051209/debmirror        2006-09-07 09:31:39.000000000 +0200
@@ -355,6 +355,7 @@
 use LockFile::Simple;
 use Compress::Zlib;
 use Digest::MD5;
+use Digest::SHA1;
 use LWP::UserAgent;
 
 # Yeah, I use too many global variables in this program.
@@ -634,6 +635,7 @@
       add_bytes("dists/$dist/$section/binary-$arch/Packages.gz");
       add_bytes("dists/$dist/$section/binary-$arch/Packages.bz2");
       add_bytes("dists/$dist/$section/binary-$arch/Release");
+      add_bytes("dists/$dist/$section/binary-$arch/Packages.diff/Index");
     }
     # d-i has no sources over there, sources are in main
     next if ($section =~ /debian-installer/);
@@ -642,6 +644,7 @@
       add_bytes("dists/$dist/$section/source/Sources.gz");
       add_bytes("dists/$dist/$section/source/Sources.bz2");
       add_bytes("dists/$dist/$section/source/Release");
+      add_bytes("dists/$dist/$section/source/Sources.diff/Index");
     }
   }
 }
@@ -650,10 +653,12 @@
   add_bytes("$_/Packages.gz");
   add_bytes("$_/Packages.bz2");
   add_bytes("$_/Release");
+  add_bytes("$_/Packages.diff/Index");
   if ($do_source) {
     add_bytes("$_/Sources");
     add_bytes("$_/Sources.gz");
     add_bytes("$_/Sources.bz2");
+    add_bytes("$_/Sources.diff/Index");
   }
 }
 if ($getcontents) {
@@ -1059,6 +1064,24 @@
   return 0;
 }
 
+# Check uncompressed pdiff content against sha1sum from Index file.
+sub check_pdiff {
+  my ($filename, $size, $sha1) = @_;
+  my $digest = Digest::SHA1->new;
+  my $ret = 0;
+
+  if (-f "$filename.gz") {
+    system_redirect_io("gunzip", "$filename.gz", "$filename");
+    if ($size == -s $filename) {
+      open HANDLE, $filename or die "$filename: $!";
+      $digest->addfile(*HANDLE);
+      $ret = ($sha1 eq $digest->hexdigest);
+    }
+    unlink ($filename);
+  }
+  return $ret;
+}
+
 # Check file against md5sum and size from the Release file.
 # It will return true if the md5sum matches.
 sub check_lists {
@@ -1276,6 +1299,32 @@
   make_dir($subdir);
   make_dir("$tempdir/$subdir");
 
+  if (exists $file_lists_size{"$tempdir/$subdir/Packages.diff/Index"}) {
+    if (!check_lists ("$tempdir/$subdir/Packages.diff/Index")) {
+      make_dir("$subdir/Packages.diff");
+      make_dir("$tempdir/$subdir/Packages.diff");
+      say("$subdir/Packages.diff/Index needs fetch");
+      remote_get("$subdir/Packages.diff/Index");
+      if (!check_lists ("$tempdir/$subdir/Packages.diff/Index")) {
+       say("$subdir/Packages.diff/Index failed md5sum check, removing");
+       push (@errlog,"$subdir/Packages.diff/Index failed md5sum check, 
removing\n");
+       unlink "$tempdir/$subdir/Packages.diff/Index";
+      } else {
+       fetch_and_apply_pdiffs($subdir, "Packages");
+       if (check_lists ("$tempdir/$subdir/Packages")) {
+         system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Packages", 
"$tempdir/$subdir/Packages.gz");
+         system_redirect_io("bzip2", "$tempdir/$subdir/Packages", 
"$tempdir/$subdir/Packages.bz2");
+       }
+      }
+    } else {
+      $bytes_gotten += 
$file_lists_size{"$tempdir/$subdir/Packages.diff/Index"};
+      fetch_and_apply_pdiffs($subdir, "Packages");
+      if (check_lists ("$tempdir/$subdir/Packages")) {
+       system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Packages", 
"$tempdir/$subdir/Packages.gz");
+       system_redirect_io("bzip2", "$tempdir/$subdir/Packages", 
"$tempdir/$subdir/Packages.bz2");
+      }
+    }
+  }
   if (exists $file_lists_size{"$tempdir/$subdir/Packages.gz"}) {
     if (!check_lists ("$tempdir/$subdir/Packages.gz")) {
       say("$subdir/Packages.gz needs fetch");
@@ -1345,10 +1394,12 @@
   $files{"$subdir/Packages.bz2"}=1;
   $files{"$subdir/Packages"}=1;
   $files{"$subdir/Release"}=1;
+  $files{"$subdir/Packages.diff/Index"}=1;
   $files{"$tempdir/$subdir/Packages.gz"}=1;
   $files{"$tempdir/$subdir/Packages.bz2"}=1;
   $files{"$tempdir/$subdir/Packages"}=1;
   $files{"$tempdir/$subdir/Release"}=1;
+  $files{"$tempdir/$subdir/Packages.diff/Index"}=1;
 }
 
 # Get Sources file
@@ -1358,6 +1409,32 @@
   if ($do_source) {
     make_dir($subdir);
     make_dir($tempdir."/".$subdir);
+    if (exists $file_lists_size{"$tempdir/$subdir/Sources.diff/Index"}) {
+      if (!check_lists ("$tempdir/$subdir/Sources.diff/Index")) {
+       make_dir("$subdir/Sources.diff");
+       make_dir("$tempdir/$subdir/Sources.diff");
+       say("$subdir/Sources.diff/Index needs fetch");
+       remote_get("$subdir/Sources.diff/Index");
+       if (!check_lists ("$tempdir/$subdir/Sources.diff/Index")) {
+         say("$subdir/Sources.diff/Index failed md5sum check, removing");
+         push (@errlog,"$subdir/Sources.diff/Index failed md5sum check, 
removing\n");
+         unlink "$tempdir/$subdir/Sources.diff/Index";
+       } else {
+         fetch_and_apply_pdiffs($subdir, "Sources");
+         if (check_lists ("$tempdir/$subdir/Sources")) {
+           system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Sources", 
"$tempdir/$subdir/Sources.gz");
+           system_redirect_io("bzip2", "$tempdir/$subdir/Sources", 
"$tempdir/$subdir/Sources.bz2");
+         }
+       }
+      } else {
+       $bytes_gotten += 
$file_lists_size{"$tempdir/$subdir/Sources.diff/Index"};
+       fetch_and_apply_pdiffs($subdir, "Sources");
+       if (check_lists ("$tempdir/$subdir/Sources")) {
+         system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Sources", 
"$tempdir/$subdir/Sources.gz");
+         system_redirect_io("bzip2", "$tempdir/$subdir/Sources", 
"$tempdir/$subdir/Sources.bz2");
+       }
+      }
+    }
     if (exists $file_lists_size{"$tempdir/$subdir/Sources.gz"}) {
       if (!check_lists ("$tempdir/$subdir/Sources.gz")) {
        say("$subdir/Sources.gz needs fetch");
@@ -1427,10 +1504,205 @@
     $files{"$subdir/Sources"}=1;
     $files{"$subdir/Sources.bz2"}=1;
     $files{"$subdir/Release"}=1;
+    $files{"$subdir/Sources.diff/Index"}=1;
     $files{"$tempdir/$subdir/Sources.gz"}=1;
     $files{"$tempdir/$subdir/Sources"}=1;
     $files{"$tempdir/$subdir/Sources.bz2"}=1;
     $files{"$tempdir/$subdir/Release"}=1;
+    $files{"$tempdir/$subdir/Sources.diff/Index"}=1;
+  }
+}
+
+sub fetch_and_apply_pdiffs {
+  my ($subdir, $list) = @_;
+  local (*INDEX, *LIST, *LIST_OLD, *PDIFF);
+  my (%history_sha1, %history_size, %pdiff_sha1, %pdiff_size);
+  my ($current_sha1, $current_size, $sha1, $size, $file, $digest, $ret);
+
+  # Parse DiffIndex file
+  open(INDEX, "$tempdir/$subdir/$list.diff/Index") or die 
"$tempdir/$subdir/$list.diff/Index: $!";
+  $_ = <INDEX>;
+  while (defined($_)) {
+    if (m/^SHA1-Current:/m) {
+      ($current_sha1, $current_size) = 
m/^SHA1-Current:\s+([A-Za-z0-9]+)\s+(\d+)/m;
+      $_ = <INDEX>;
+    }
+    elsif (m/^SHA1-History:/m) {
+      while (defined($_ = <INDEX>)) {
+       last if (!m/^\s/m);
+       ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m;
+       $history_sha1{$file} = $sha1;
+       $history_size{$file} = $size;
+      }
+    }
+    elsif (m/^SHA1-Patches:/m) {
+      while (defined($_ = <INDEX>)) {
+       last if (!m/^\s/m);
+       ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m;
+       $pdiff_sha1{$file} = $sha1;
+       $pdiff_size{$file} = $size;
+      }
+    }
+  }
+  close(INDEX);
+
+  # Download pdiff files as necessary
+  $ret = 1;
+  foreach $file (sort keys %pdiff_sha1) {
+    if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", $pdiff_size{$file}, 
$pdiff_sha1{$file})) {
+      say("$subdir/$list.diff/$file.gz needs fetch");
+      remote_get("$subdir/$list.diff/$file.gz");
+      $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz";
+      if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", 
$pdiff_size{$file}, $pdiff_sha1{$file})) {
+       say("$subdir/$list.diff/$file.gz failed sha1sum check, removing");
+       push (@errlog,"$subdir/$list.diff/$file.gz failed sha1sum check, 
removing\n");
+       unlink "$tempdir/$subdir/$list.diff/$file.gz";
+       $ret = 0;
+      }
+    } else {
+      $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz";
+      $bytes_gotten += -s "$tempdir/$subdir/$list.diff/$file.gz";
+    }
+    $files{"$subdir/$list.diff/$file.gz"}=1;
+    $files{"$tempdir/$subdir/$list.diff/$file.gz"}=1;
+  }
+  return unless ($ret);
+
+  # Apply pdiff files
+  open(LIST, "$subdir/$list") or return;
+  $digest = Digest::SHA1->new;
+  $digest->addfile(*LIST);
+  $sha1 = $digest->hexdigest;
+  $size = -s "$subdir/$list";
+  unlink("$tempdir/$subdir/$list");
+  system("cp $subdir/$list $tempdir/$subdir/$list");
+  foreach $file (sort keys %history_sha1) {
+    next unless ($sha1 eq $history_sha1{$file} && $size eq 
$history_size{$file});
+    system_redirect_io("gunzip", "$tempdir/$subdir/$list.diff/$file.gz", 
"$tempdir/$subdir/$list.diff/$file");
+    unlink("$tempdir/$subdir/$list.old");
+    system("mv $tempdir/$subdir/$list $tempdir/$subdir/$list.old");
+    open(PDIFF, "$tempdir/$subdir/$list.diff/$file") or die 
"$tempdir/$subdir/$list.diff/$file: $!";
+    open(LIST_OLD, "$tempdir/$subdir/$list.old") or die 
"$tempdir/$subdir/$list.old: $!";
+    open(LIST, ">$tempdir/$subdir/$list") or die "$tempdir/$subdir/$list: $!";
+    rred_ed_file(*PDIFF, *LIST_OLD, *LIST, $digest);
+    close(PDIFF);
+    close(LIST_OLD);
+    close(LIST);
+    unlink("$tempdir/$subdir/$list.diff/$file");
+    $sha1 = $digest->hexdigest;
+    $size = -s "$tempdir/$subdir/$list";
+    say("$subdir/$list patched with $subdir/$list.diff/$file.gz");
+  }
+  unlink("$tempdir/$subdir/$list.old");
+  if (!($sha1 eq $current_sha1 && $size eq $current_size)) {
+    say("$subdir/$list failed sha1sum check, removing");
+    push (@errlog,"$subdir/$list failed sha1sum check, removing\n");
+    unlink "$tempdir/$subdir/$list";
+  }
+}
+
+# apt pdiff patch algorithm (see apt-0.6.45/methods/rred.cc)
+{
+  # this method implements a patch functionality similar to "patch --ed" that 
is
+  # used by the "tiffany" incremental packages download stuff. it differs from 
+  # "ed" insofar that it is way more restricted (and therefore secure). in the
+  # moment only the "c", "a" and "d" commands of ed are implemented (diff 
+  # doesn't output any other). additionally the records must be reverse sorted 
+  # by line number and may not overlap (diff *seems* to produce this kind of 
+  # output). 
+
+  # rred method ed commands
+  use constant RRED_MODE_CHANGED => 'c';
+  use constant RRED_MODE_DELETED => 'd';
+  use constant RRED_MODE_ADDED => 'a';
+  # rred method return values
+  use constant RRED_ED_OK => 0;
+  use constant RRED_ED_ORDERING => 1;
+  use constant RRED_ED_PARSER => 2;
+  use constant RRED_ED_FAILURE => 3;
+
+  sub rred_ed_file {
+    local (*ED_CMDS, *IN_FILE, *OUT_FILE) = splice @_, 0, 3;
+    my ($digest) = @_;
+    my $result;
+
+    # we do a tail recursion to read the commands in the right order
+    $result = rred_ed_rec(*ED_CMDS, *IN_FILE, *OUT_FILE, 0, $digest);
+
+    # read the rest from infile
+    if ($result > 0) {
+      while (<IN_FILE>) {
+       print OUT_FILE;
+       $digest->add($_);
+      }
+    }
+    else {
+      return RRED_ED_FAILURE;
+    }
+    return RRED_ED_OK;
+  }
+
+  sub rred_ed_rec {
+    no warnings 'recursion';
+    local (*ED_CMDS, *IN_FILE, *OUT_FILE) = splice @_, 0, 3;
+    my ($line, $digest) = @_;
+    my ($pos, $startline, $stopline, $mode);
+
+    # get the current command and parse it
+    if (defined($_ = <ED_CMDS>)) {
+      ($startline, $stopline, $mode) = m/^(\d+)(?:,(\d+))?([cda])$/m;
+
+      if (not defined($startline)) {
+       return RRED_ED_PARSER;
+      }
+      elsif ($startline < $line) {
+       return RRED_ED_ORDERING;
+      }
+      $stopline = $startline unless defined($stopline);
+    }
+    else {
+      return $line;
+    }
+    # get the current position
+    $pos = tell(ED_CMDS);
+    # if this is add or change then go to the next full stop
+    if (($mode eq RRED_MODE_CHANGED) || ($mode eq RRED_MODE_ADDED)) {
+      while (<ED_CMDS>) {
+       last if (m/^\./m);
+      }
+    }
+    # do the recursive call
+    $line = rred_ed_rec(*ED_CMDS, *IN_FILE, *OUT_FILE, $line, $digest);
+    # pass on errors
+    if ($line < 0) {
+      return $line;
+    }
+    # apply our hunk
+    seek(ED_CMDS, $pos, 0);
+    # first wind to the current position
+    $startline -= 1 unless ($mode eq RRED_MODE_ADDED);
+    while ($line < $startline) {
+      $_ = <IN_FILE>;
+      print OUT_FILE $_;
+      $digest->add($_);
+      $line++;
+    }
+    # include from ed script
+    if (($mode eq RRED_MODE_ADDED) || ($mode eq RRED_MODE_CHANGED)) {
+      while (<ED_CMDS>) {
+       last if (m/^\./m);
+       print OUT_FILE;
+       $digest->add($_);
+      }
+    }
+    # ignore the corresponding number of lines from input
+    if (($mode eq RRED_MODE_DELETED) || ($mode eq RRED_MODE_CHANGED)) {
+      while ($line < $stopline) {
+       <IN_FILE>;
+       $line++;
+      }
+    }
+    return $line;
   }
 }
 
#!/usr/bin/perl -w

use strict;

# rred method ed commands
use constant RRED_MODE_CHANGED => 'c';
use constant RRED_MODE_DELETED => 'd';
use constant RRED_MODE_ADDED => 'a';
# rred method return values
use constant RRED_ED_OK => 0;
use constant RRED_ED_ORDERING => 1;
use constant RRED_ED_PARSER => 2;
use constant RRED_ED_FAILURE => 3;



local (*ED_CMDS, *IN_FILE, *OUT_FILE);
open (ED_CMDS, "patch");
open (IN_FILE, "in");
open (OUT_FILE, ">out");
rred_ed_file(*ED_CMDS, *IN_FILE, *OUT_FILE);


# this method implements a patch functionality similar to "patch --ed" that is
# used by the "tiffany" incremental packages download stuff. it differs from 
# "ed" insofar that it is way more restricted (and therefore secure). in the
# moment only the "c", "a" and "d" commands of ed are implemented (diff 
# doesn't output any other). additionally the records must be reverse sorted 
# by line number and may not overlap (diff *seems* to produce this kind of 
# output). 

sub rred_ed_file {
  local (*ED_CMDS, *IN_FILE, *OUT_FILE) = @_;
  my ($result);

  # we do a tail recursion to read the commands in the right order
  $result = rred_ed_rec(*ED_CMDS, *IN_FILE, *OUT_FILE, 0);

  # read the rest from infile
  if ($result > 0) {
    print OUT_FILE <IN_FILE>;
  }
  else {
    return RRED_ED_FAILURE;
  }
  return RRED_ED_OK;
}

sub rred_ed_rec {
  no warnings 'recursion';
  my $line = pop;
  local (*ED_CMDS, *IN_FILE, *OUT_FILE) = @_;
  my ($pos, $startline, $stopline, $mode);

  # get the current command and parse it
  if (defined($_ = <ED_CMDS>)) {
    ($startline, $stopline, $mode) = m/^(\d+)(?:,(\d+))?([cda])$/m;

    if (not defined($startline)) {
      return RRED_ED_PARSER;
    }
    elsif ($startline < $line) {
      return RRED_ED_ORDERING;
    }
    $stopline = $startline unless defined($stopline);
  }
  else {
    return $line;
  }
  # get the current position
  $pos = tell(ED_CMDS);
  # if this is add or change then go to the next full stop
  if (($mode eq RRED_MODE_CHANGED) || ($mode eq RRED_MODE_ADDED)) {
    while (<ED_CMDS>) {
      last if (m/^\./m);
    }
  }
  # do the recursive call
  $line = rred_ed_rec(*ED_CMDS, *IN_FILE, *OUT_FILE, $line);
  # pass on errors
  if ($line < 0) {
    return $line;
  }
  # apply our hunk
  seek(ED_CMDS, $pos, 0);
  # first wind to the current position
  $startline -= 1 unless ($mode eq RRED_MODE_ADDED);
  while ($line < $startline) {
    print OUT_FILE scalar <IN_FILE>;
    $line++;
  }
  # include from ed script
  if (($mode eq RRED_MODE_ADDED) || ($mode eq RRED_MODE_CHANGED)) {
    while (<ED_CMDS>) {
      last if (m/^\./m);
      print OUT_FILE;
    }
  }
  # ignore the corresponding number of lines from input
  if (($mode eq RRED_MODE_DELETED) || ($mode eq RRED_MODE_CHANGED)) {
    while ($line < $stopline) {
      <IN_FILE>;
      $line++;
    }
  }
  return $line;
}
diff -urN debmirror-20051209~/debian/control debmirror-20051209/debian/control
--- debmirror-20051209~/debian/control  2004-12-09 18:56:46.000000000 +0100
+++ debmirror-20051209/debian/control   2006-09-07 10:08:42.000000000 +0200
@@ -6,7 +6,7 @@
 
 Package: debmirror
 Architecture: all
-Depends: libnet-perl, libdigest-md5-perl, liblockfile-simple-perl, rsync, 
libcompress-zlib-perl, bzip2, libwww-perl
+Depends: libnet-perl, libdigest-md5-perl, liblockfile-simple-perl, rsync, 
libcompress-zlib-perl, bzip2, libwww-perl, libdigest-sha1-perl, patch
 Recommends: gnupg
 Description: Debian partial mirror script, with ftp and package pool support
  This program downloads and maintains a partial local Debian mirror.
diff -urN debmirror-20051209~/debmirror debmirror-20051209/debmirror
--- debmirror-20051209~/debmirror       2005-12-09 19:13:09.000000000 +0100
+++ debmirror-20051209/debmirror        2006-09-07 10:14:01.000000000 +0200
@@ -355,6 +355,7 @@
 use LockFile::Simple;
 use Compress::Zlib;
 use Digest::MD5;
+use Digest::SHA1;
 use LWP::UserAgent;
 
 # Yeah, I use too many global variables in this program.
@@ -634,6 +635,7 @@
       add_bytes("dists/$dist/$section/binary-$arch/Packages.gz");
       add_bytes("dists/$dist/$section/binary-$arch/Packages.bz2");
       add_bytes("dists/$dist/$section/binary-$arch/Release");
+      add_bytes("dists/$dist/$section/binary-$arch/Packages.diff/Index");
     }
     # d-i has no sources over there, sources are in main
     next if ($section =~ /debian-installer/);
@@ -642,6 +644,7 @@
       add_bytes("dists/$dist/$section/source/Sources.gz");
       add_bytes("dists/$dist/$section/source/Sources.bz2");
       add_bytes("dists/$dist/$section/source/Release");
+      add_bytes("dists/$dist/$section/source/Sources.diff/Index");
     }
   }
 }
@@ -650,10 +653,12 @@
   add_bytes("$_/Packages.gz");
   add_bytes("$_/Packages.bz2");
   add_bytes("$_/Release");
+  add_bytes("$_/Packages.diff/Index");
   if ($do_source) {
     add_bytes("$_/Sources");
     add_bytes("$_/Sources.gz");
     add_bytes("$_/Sources.bz2");
+    add_bytes("$_/Sources.diff/Index");
   }
 }
 if ($getcontents) {
@@ -1059,6 +1064,24 @@
   return 0;
 }
 
+# Check uncompressed pdiff content against sha1sum from Index file.
+sub check_pdiff {
+  my ($filename, $size, $sha1) = @_;
+  my $digest = Digest::SHA1->new;
+  my $ret = 0;
+
+  if (-f "$filename.gz") {
+    system_redirect_io("gunzip", "$filename.gz", "$filename");
+    if ($size == -s $filename) {
+      open HANDLE, $filename or die "$filename: $!";
+      $digest->addfile(*HANDLE);
+      $ret = ($sha1 eq $digest->hexdigest);
+    }
+    unlink ($filename);
+  }
+  return $ret;
+}
+
 # Check file against md5sum and size from the Release file.
 # It will return true if the md5sum matches.
 sub check_lists {
@@ -1276,6 +1299,32 @@
   make_dir($subdir);
   make_dir("$tempdir/$subdir");
 
+  if (exists $file_lists_size{"$tempdir/$subdir/Packages.diff/Index"}) {
+    if (!check_lists ("$tempdir/$subdir/Packages.diff/Index")) {
+      make_dir("$subdir/Packages.diff");
+      make_dir("$tempdir/$subdir/Packages.diff");
+      say("$subdir/Packages.diff/Index needs fetch");
+      remote_get("$subdir/Packages.diff/Index");
+      if (!check_lists ("$tempdir/$subdir/Packages.diff/Index")) {
+       say("$subdir/Packages.diff/Index failed md5sum check, removing");
+       push (@errlog,"$subdir/Packages.diff/Index failed md5sum check, 
removing\n");
+       unlink "$tempdir/$subdir/Packages.diff/Index";
+      } else {
+       fetch_and_apply_pdiffs($subdir, "Packages");
+       if (check_lists ("$tempdir/$subdir/Packages")) {
+         system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Packages", 
"$tempdir/$subdir/Packages.gz");
+         system_redirect_io("bzip2", "$tempdir/$subdir/Packages", 
"$tempdir/$subdir/Packages.bz2");
+       }
+      }
+    } else {
+      $bytes_gotten += 
$file_lists_size{"$tempdir/$subdir/Packages.diff/Index"};
+      fetch_and_apply_pdiffs($subdir, "Packages");
+      if (check_lists ("$tempdir/$subdir/Packages")) {
+       system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Packages", 
"$tempdir/$subdir/Packages.gz");
+       system_redirect_io("bzip2", "$tempdir/$subdir/Packages", 
"$tempdir/$subdir/Packages.bz2");
+      }
+    }
+  }
   if (exists $file_lists_size{"$tempdir/$subdir/Packages.gz"}) {
     if (!check_lists ("$tempdir/$subdir/Packages.gz")) {
       say("$subdir/Packages.gz needs fetch");
@@ -1345,10 +1394,12 @@
   $files{"$subdir/Packages.bz2"}=1;
   $files{"$subdir/Packages"}=1;
   $files{"$subdir/Release"}=1;
+  $files{"$subdir/Packages.diff/Index"}=1;
   $files{"$tempdir/$subdir/Packages.gz"}=1;
   $files{"$tempdir/$subdir/Packages.bz2"}=1;
   $files{"$tempdir/$subdir/Packages"}=1;
   $files{"$tempdir/$subdir/Release"}=1;
+  $files{"$tempdir/$subdir/Packages.diff/Index"}=1;
 }
 
 # Get Sources file
@@ -1358,6 +1409,32 @@
   if ($do_source) {
     make_dir($subdir);
     make_dir($tempdir."/".$subdir);
+    if (exists $file_lists_size{"$tempdir/$subdir/Sources.diff/Index"}) {
+      if (!check_lists ("$tempdir/$subdir/Sources.diff/Index")) {
+       make_dir("$subdir/Sources.diff");
+       make_dir("$tempdir/$subdir/Sources.diff");
+       say("$subdir/Sources.diff/Index needs fetch");
+       remote_get("$subdir/Sources.diff/Index");
+       if (!check_lists ("$tempdir/$subdir/Sources.diff/Index")) {
+         say("$subdir/Sources.diff/Index failed md5sum check, removing");
+         push (@errlog,"$subdir/Sources.diff/Index failed md5sum check, 
removing\n");
+         unlink "$tempdir/$subdir/Sources.diff/Index";
+       } else {
+         fetch_and_apply_pdiffs($subdir, "Sources");
+         if (check_lists ("$tempdir/$subdir/Sources")) {
+           system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Sources", 
"$tempdir/$subdir/Sources.gz");
+           system_redirect_io("bzip2", "$tempdir/$subdir/Sources", 
"$tempdir/$subdir/Sources.bz2");
+         }
+       }
+      } else {
+       $bytes_gotten += 
$file_lists_size{"$tempdir/$subdir/Sources.diff/Index"};
+       fetch_and_apply_pdiffs($subdir, "Sources");
+       if (check_lists ("$tempdir/$subdir/Sources")) {
+         system_redirect_io("gzip -9 -n", "$tempdir/$subdir/Sources", 
"$tempdir/$subdir/Sources.gz");
+         system_redirect_io("bzip2", "$tempdir/$subdir/Sources", 
"$tempdir/$subdir/Sources.bz2");
+       }
+      }
+    }
     if (exists $file_lists_size{"$tempdir/$subdir/Sources.gz"}) {
       if (!check_lists ("$tempdir/$subdir/Sources.gz")) {
        say("$subdir/Sources.gz needs fetch");
@@ -1427,10 +1504,96 @@
     $files{"$subdir/Sources"}=1;
     $files{"$subdir/Sources.bz2"}=1;
     $files{"$subdir/Release"}=1;
+    $files{"$subdir/Sources.diff/Index"}=1;
     $files{"$tempdir/$subdir/Sources.gz"}=1;
     $files{"$tempdir/$subdir/Sources"}=1;
     $files{"$tempdir/$subdir/Sources.bz2"}=1;
     $files{"$tempdir/$subdir/Release"}=1;
+    $files{"$tempdir/$subdir/Sources.diff/Index"}=1;
+  }
+}
+
+sub fetch_and_apply_pdiffs {
+  my ($subdir, $list) = @_;
+  local (*INDEX, *LIST);
+  my (%history_sha1, %history_size, %pdiff_sha1, %pdiff_size);
+  my ($current_sha1, $current_size, $sha1, $size, $file, $digest, $ret, 
$patch);
+
+  # Parse DiffIndex file
+  open(INDEX, "$tempdir/$subdir/$list.diff/Index") or die 
"$tempdir/$subdir/$list.diff/Index: $!";
+  $_ = <INDEX>;
+  while (defined($_)) {
+    if (m/^SHA1-Current:/m) {
+      ($current_sha1, $current_size) = 
m/^SHA1-Current:\s+([A-Za-z0-9]+)\s+(\d+)/m;
+      $_ = <INDEX>;
+    }
+    elsif (m/^SHA1-History:/m) {
+      while (defined($_ = <INDEX>)) {
+       last if (!m/^\s/m);
+       ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m;
+       $history_sha1{$file} = $sha1;
+       $history_size{$file} = $size;
+      }
+    }
+    elsif (m/^SHA1-Patches:/m) {
+      while (defined($_ = <INDEX>)) {
+       last if (!m/^\s/m);
+       ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m;
+       $pdiff_sha1{$file} = $sha1;
+       $pdiff_size{$file} = $size;
+      }
+    }
+  }
+  close(INDEX);
+
+  # Download pdiff files as necessary
+  $ret = 1;
+  foreach $file (sort keys %pdiff_sha1) {
+    if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", $pdiff_size{$file}, 
$pdiff_sha1{$file})) {
+      say("$subdir/$list.diff/$file.gz needs fetch");
+      remote_get("$subdir/$list.diff/$file.gz");
+      $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz";
+      if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", 
$pdiff_size{$file}, $pdiff_sha1{$file})) {
+       say("$subdir/$list.diff/$file.gz failed sha1sum check, removing");
+       push (@errlog,"$subdir/$list.diff/$file.gz failed sha1sum check, 
removing\n");
+       unlink "$tempdir/$subdir/$list.diff/$file.gz";
+       $ret = 0;
+      }
+    } else {
+      $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz";
+      $bytes_gotten += -s "$tempdir/$subdir/$list.diff/$file.gz";
+    }
+    $files{"$subdir/$list.diff/$file.gz"}=1;
+    $files{"$tempdir/$subdir/$list.diff/$file.gz"}=1;
+  }
+  return unless ($ret);
+
+  # Apply pdiff files
+  open(LIST, "$subdir/$list") or return;
+  $digest = Digest::SHA1->new;
+  $digest->addfile(*LIST);
+  $sha1 = $digest->hexdigest;
+  $size = -s "$subdir/$list";
+  unlink("$tempdir/$subdir/$list");
+  system("cp $subdir/$list $tempdir/$subdir/$list");
+  $patch = 0;
+  foreach $file (sort keys %history_sha1) {
+    $patch = 1 if ($sha1 eq $history_sha1{$file} && $size eq 
$history_size{$file});
+    next unless ($patch);
+    system_redirect_io("gunzip", "$tempdir/$subdir/$list.diff/$file.gz", 
"$tempdir/$subdir/$list.diff/$file");
+    system("patch --ed $tempdir/$subdir/$list 
$tempdir/$subdir/$list.diff/$file");
+    unlink("$tempdir/$subdir/$list.diff/$file");
+    say("$subdir/$list patched with $subdir/$list.diff/$file.gz");
+  }
+  open(LIST, "$tempdir/$subdir/$list") or die "$tempdir/$subdir/$list: $!";
+  $digest = Digest::SHA1->new;
+  $digest->addfile(*LIST);
+  $sha1 = $digest->hexdigest;
+  $size = -s "$tempdir/$subdir/$list";
+  if (!($sha1 eq $current_sha1 && $size eq $current_size)) {
+    say("$subdir/$list failed sha1sum check, removing");
+    push (@errlog,"$subdir/$list failed sha1sum check, removing\n");
+    unlink "$tempdir/$subdir/$list";
   }
 }
 

Reply via email to