Package: par2
Version: 0.4-11
Severity: normal
Tags: upstream patch

A bug exists in the algorithm used by par2 to choose the best blocksize for a 
recovery set.
If I request a certain block count, then the software tries to determine the 
smallest block size that will satisfy this request.
It performs a binary search in par2creator.cpp lines 261 to 342, however it 
then throws the result of the search away.
As a result, the algorithm always picks the mean of the upper bound and the 
lower bound as the block size, unless all the input files are identical size.
This block size can be quite a bit bigger than required, which causes par2 to 
generate bigger parity files than necessary.
This recently increased the number of DVDs that my backups consumed, which was 
annoying.

I have created a patch that corrects the binary search algorithm.
It now returns the smallest block size that results in no more than the 
requested number of blocks.

This patch should be passed upstream.

Matthew

-- System Information:
Debian Release: 6.0.3
  APT prefers stable
  APT policy: (500, 'stable')
Architecture: amd64 (x86_64)

Kernel: Linux 2.6.32-5-amd64 (SMP w/2 CPU cores)
Locale: LANG=en_GB, LC_CTYPE=en_GB (charmap=ISO-8859-1)
Shell: /bin/sh linked to /bin/bash

Versions of packages par2 depends on:
ii  libc6                         2.11.2-10  Embedded GNU C Library: Shared lib
ii  libgcc1                       1:4.4.5-8  GCC support library
ii  libstdc++6                    4.6.1-4    GNU Standard C++ Library v3

par2 recommends no packages.

par2 suggests no packages.

-- no debconf information
--- orig/par2cmdline-0.4/par2creator.cpp	2004-04-15 14:48:41.000000000 +0100
+++ par2cmdline-0.4/par2creator.cpp	2012-01-10 13:20:47.000000000 +0000
@@ -263,8 +263,6 @@
         u64 lowerBound = totalsize / sourceblockcount;
         u64 upperBound = (totalsize + sourceblockcount - extrafiles.size() - 1) / (sourceblockcount - extrafiles.size());
 
-        u64 bestsize = lowerBound;
-        u64 bestdistance = 1000000;
         u64 bestcount = 0;
 
         u64 count;
@@ -280,34 +278,11 @@
             count += ((i->FileSize()+3)/4 + size-1) / size;
           }
 
-          if (bestdistance > (count>sourceblockcount ? count-sourceblockcount : sourceblockcount-count))
-          {
-            bestdistance = (count>sourceblockcount ? count-sourceblockcount : sourceblockcount-count);
-            bestcount = count;
-            bestsize = size;
-          }
-        }
-
-        // Work out how many blocks you get for the upper bound block size
-        {
-          size = upperBound;
-
-          count = 0;
-          for (ExtraFileIterator i=extrafiles.begin(); i!=extrafiles.end(); i++)
-          {
-            count += ((i->FileSize()+3)/4 + size-1) / size;
-          }
-
-          if (bestdistance > (count>sourceblockcount ? count-sourceblockcount : sourceblockcount-count))
-          {
-            bestdistance = (count>sourceblockcount ? count-sourceblockcount : sourceblockcount-count);
-            bestcount = count;
-            bestsize = size;
-          }
+          bestcount = count;
         }
 
         // Use binary search to find best block size
-        while (lowerBound+1 < upperBound)
+        while (lowerBound < upperBound)
         {
           size = (lowerBound + upperBound)/2;
 
@@ -317,28 +292,18 @@
             count += ((i->FileSize()+3)/4 + size-1) / size;
           }
 
-          if (bestdistance > (count>sourceblockcount ? count-sourceblockcount : sourceblockcount-count))
-          {
-            bestdistance = (count>sourceblockcount ? count-sourceblockcount : sourceblockcount-count);
-            bestcount = count;
-            bestsize = size;
-          }
-
-          if (count < sourceblockcount)
-          {
-            upperBound = size;
-          }
-          else if (count > sourceblockcount)
+          if (count > sourceblockcount)
           {
-            lowerBound = size;
+            lowerBound = size + 1;
           }
           else
           {
             upperBound = size;
+            bestcount = count;
           }
         }
 
-        size = bestsize;
+        size = lowerBound;
         count = bestcount;
 
         if (count > 32768)

Reply via email to