Package: bsdiff Version: 4.3-10 Severity: wishlist Tags: patch bsdiff requires 17x the size of the old file in the initial phase which is something. This two patches try push it down to 8x. The limitation is that max old file size is 2GiB - 1. A diff of file of that size requires with patches 16GiB of extra memory (without 32GiB). Please find the patches attached including a proper description.
Sebastian
>From a62890ad764913e7f00d94eb40d8acefbddf8222 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior <sebast...@breakpoint.cc> Date: Sat, 2 Jul 2011 20:06:58 +0200 Subject: [PATCH 1/2] mmap() src & dst file instead of malloc() + read() it This drops the memory pressure since the OS may now drop and reload parts of the old and new file on demand. Signed-off-by: Sebastian Andrzej Siewior <sebast...@breakpoint.cc> --- bsdiff.c | 17 ++++++++++------- 1 files changed, 10 insertions(+), 7 deletions(-) diff --git a/bsdiff.c b/bsdiff.c index dc2e845..d16e7f1 100644 --- a/bsdiff.c +++ b/bsdiff.c @@ -238,12 +238,15 @@ int main(int argc,char *argv[]) /* Allocate newsize+1 bytes instead of newsize bytes to ensure that we never try to malloc(0) and get a NULL pointer */ - if(((fd=open(argv[2],O_RDONLY,0))<0) || - ((newsize=lseek(fd,0,SEEK_END))==-1) || - ((new=malloc(newsize+1))==NULL) || - (lseek(fd,0,SEEK_SET)!=0) || - (read(fd,new,newsize)!=newsize) || - (close(fd)==-1)) err(1,"%s",argv[2]); + fd = open(argv[2], O_RDONLY, 0); + if (fd < 0) + err(1, "open %s", argv[2]); + newsize = lseek(fd, 0, SEEK_END); + if (newsize == -1) + err(1, "lseek %s", argv[2]); + new = mmap(NULL, newsize + 1, PROT_READ, MAP_SHARED, fd, 0); + if (new == MAP_FAILED) + err(1, "mmap %s", argv[2]); if(((db=malloc(newsize+1))==NULL) || ((eb=malloc(newsize+1))==NULL)) err(1,NULL); @@ -403,7 +406,7 @@ int main(int argc,char *argv[]) free(eb); free(I); munmap(old, oldsize); - free(new); + munmap(new, newsize); return 0; } -- 1.7.5.4
>From e8415b6b829b9851995b0100224cf2aa5edf24e4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior <sebast...@breakpoint.cc> Date: Sun, 3 Jul 2011 19:55:33 +0200 Subject: [PATCH 2/2] use signed int instead off_t for file size Using off_t (64bit) is kinda waste. With last change we only need 16x the size of the old file. So for a 2GiB - 1 file we would allocate almost 32GiB (the content of the old file would be loaded on demand from disk). This is a lot. Since the file size is less than 2GiB we lave the upper 4 bytes unused. With this change the max file size is limitted to 2GiB - 1 and we require 8x the size of the oldfile which makes almost 16GiB. When we assume a virtual addess space of 3GiB on a 32bit then the max oldsize increased from about ~180MiB to ~341MiB. _If_ some sees this as a regression because files >2GiB can not be used anymore please provide another binary with -Dt_off=off_t. The binary has less than 20KiB. Signed-off-by: Sebastian Andrzej Siewior <sebast...@breakpoint.cc> --- bsdiff.c | 55 +++++++++++++++++++++++++++++++++---------------------- 1 files changed, 33 insertions(+), 22 deletions(-) diff --git a/bsdiff.c b/bsdiff.c index d16e7f1..345fa4f 100644 --- a/bsdiff.c +++ b/bsdiff.c @@ -38,12 +38,17 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <limits.h> + +#ifndef t_off +#define t_off signed int +#endif #define MIN(x,y) (((x)<(y)) ? (x) : (y)) -static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h) +static void split(t_off *I,t_off *V,t_off start,t_off len,t_off h) { - off_t i,j,k,x,tmp,jj,kk; + t_off i,j,k,x,tmp,jj,kk; if(len<16) { for(k=start;k<start+len;k+=j) { @@ -102,10 +107,10 @@ static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h) if(start+len>kk) split(I,V,kk,start+len-kk,h); } -static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize) +static void qsufsort(t_off *I,t_off *V,u_char *old,t_off oldsize) { - off_t buckets[256]; - off_t i,h,len; + t_off buckets[256]; + t_off i,h,len; for(i=0;i<256;i++) buckets[i]=0; for(i=0;i<oldsize;i++) buckets[old[i]]++; @@ -140,9 +145,9 @@ static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize) for(i=0;i<oldsize+1;i++) I[V[i]]=i; } -static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) +static t_off matchlen(u_char *old,t_off oldsize,u_char *new,t_off newsize) { - off_t i; + t_off i; for(i=0;(i<oldsize)&&(i<newsize);i++) if(old[i]!=new[i]) break; @@ -150,10 +155,10 @@ static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) return i; } -static off_t search(off_t *I,u_char *old,off_t oldsize, - u_char *new,off_t newsize,off_t st,off_t en,off_t *pos) +static t_off search(t_off *I,u_char *old,t_off oldsize, + u_char *new,t_off newsize,t_off st,t_off en,t_off *pos) { - off_t x,y; + t_off x,y; if(en-st<2) { x=matchlen(old+I[st],oldsize-I[st],new,newsize); @@ -176,9 +181,9 @@ static off_t search(off_t *I,u_char *old,off_t oldsize, }; } -static void offtout(off_t x,u_char *buf) +static void offtout(t_off x,u_char *buf) { - off_t y; + t_off y; if(x<0) y=-x; else y=x; @@ -199,14 +204,14 @@ int main(int argc,char *argv[]) int fd; u_char *old,*new; off_t oldsize,newsize; - off_t *I,*V; - off_t scan,pos,len; - off_t lastscan,lastpos,lastoffset; - off_t oldscore,scsc; - off_t s,Sf,lenf,Sb,lenb; - off_t overlap,Ss,lens; - off_t i; - off_t dblen,eblen; + t_off *I,*V; + t_off scan,pos,len; + t_off lastscan,lastpos,lastoffset; + t_off oldscore,scsc; + t_off s,Sf,lenf,Sb,lenb; + t_off overlap,Ss,lens; + t_off i; + t_off dblen,eblen; u_char *db,*eb; u_char buf[8]; u_char header[32]; @@ -224,13 +229,16 @@ int main(int argc,char *argv[]) oldsize = lseek(fd, 0, SEEK_END); if (oldsize == -1) err(1, "lseek %s", argv[1]); + if (oldsize > INT_MAX) + err(1, "file too large %s", argv[1]); + old = mmap(NULL, oldsize + 1, PROT_READ, MAP_SHARED, fd, 0); if (old == MAP_FAILED) err(1, "mmap %s", argv[1]); close(fd); - if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) || - ((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL); + if(((I=malloc((oldsize+1)*sizeof(t_off)))==NULL) || + ((V=malloc((oldsize+1)*sizeof(t_off)))==NULL)) err(1,NULL); qsufsort(I,V,old,oldsize); @@ -244,6 +252,9 @@ int main(int argc,char *argv[]) newsize = lseek(fd, 0, SEEK_END); if (newsize == -1) err(1, "lseek %s", argv[2]); + if (newsize > INT_MAX) + err(1, "file too large %s", argv[2]); + new = mmap(NULL, newsize + 1, PROT_READ, MAP_SHARED, fd, 0); if (new == MAP_FAILED) err(1, "mmap %s", argv[2]); -- 1.7.5.4