control: reopen -1 On 2016-10-29 14:15:26 [+0200], Sebastian Andrzej Siewior wrote: > I moved away from bsdiff and don't need this anymore. This improvement help a > little the main reason for moving away is that the average file was getting > bigger to a point where bsdiff was still too hungry on my average box.
reconsider. refreshed patches. Sebastian
>From ec1bd32377001835f1dc18d14e8f1417128970da Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior <sebast...@breakpoint.cc> Date: Mon, 31 Oct 2016 13:34:50 +0100 Subject: [PATCH 1/3] mmap() src file instead of malloc() + read() it Signed-off-by: Sebastian Andrzej Siewior <sebast...@breakpoint.cc> --- bsdiff.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/bsdiff.c b/bsdiff.c index 150a7f79c488..f25304548101 100644 --- a/bsdiff.c +++ b/bsdiff.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <sys/mman.h> #define MIN(x,y) (((x)<(y)) ? (x) : (y)) @@ -215,14 +216,18 @@ int main(int argc,char *argv[]) if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile\n",argv[0]); - /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure - that we never try to malloc(0) and get a NULL pointer */ - if(((fd=open(argv[1],O_RDONLY,0))<0) || - ((oldsize=lseek(fd,0,SEEK_END))==-1) || - ((old=malloc(oldsize+1))==NULL) || - (lseek(fd,0,SEEK_SET)!=0) || - (read(fd,old,oldsize)!=oldsize) || - (close(fd)==-1)) err(1,"%s",argv[1]); + fd = open(argv[1], O_RDONLY,0); + if (fd < 0) + err(1, "Open %s", argv[1]); + + oldsize = lseek(fd, 0, SEEK_END); + if (oldsize < 0) + err(1, "seek %s", argv[1]); + + old = mmap(NULL, oldsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0); + if (old == MAP_FAILED) + err(1, "mmap() %s", argv[1]); + close(fd); if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) || ((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL); @@ -397,7 +402,7 @@ int main(int argc,char *argv[]) free(db); free(eb); free(I); - free(old); + munmap(old, oldsize); free(new); return 0; -- 2.10.2
>From 84bf617a3cfa0b39d7941996ea82ebf8973315d7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior <sebast...@breakpoint.cc> Date: Sat, 2 Jul 2011 20:06:58 +0200 Subject: [PATCH 2/3] mmap() dst file instead of malloc() + read() it This drops the memory pressure since the OS may now drop and reload parts of the old and new file on demand. Signed-off-by: Sebastian Andrzej Siewior <sebast...@breakpoint.cc> --- bsdiff.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/bsdiff.c b/bsdiff.c index f25304548101..f975e3722cba 100644 --- a/bsdiff.c +++ b/bsdiff.c @@ -238,12 +238,16 @@ int main(int argc,char *argv[]) /* Allocate newsize+1 bytes instead of newsize bytes to ensure that we never try to malloc(0) and get a NULL pointer */ - if(((fd=open(argv[2],O_RDONLY,0))<0) || - ((newsize=lseek(fd,0,SEEK_END))==-1) || - ((new=malloc(newsize+1))==NULL) || - (lseek(fd,0,SEEK_SET)!=0) || - (read(fd,new,newsize)!=newsize) || - (close(fd)==-1)) err(1,"%s",argv[2]); + fd = open(argv[2], O_RDONLY, 0); + if (fd < 0) + err(1, "open %s", argv[2]); + newsize = lseek(fd, 0, SEEK_END); + if (newsize == -1) + err(1, "lseek %s", argv[2]); + new = mmap(NULL, newsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0); + if (new == MAP_FAILED) + err(1, "mmap %s", argv[2]); + close(fd); if(((db=malloc(newsize+1))==NULL) || ((eb=malloc(newsize+1))==NULL)) err(1,NULL); @@ -403,7 +407,7 @@ int main(int argc,char *argv[]) free(eb); free(I); munmap(old, oldsize); - free(new); + munmap(new, newsize); return 0; } -- 2.10.2
>From 1b61950f9d292fb70affcc12c0b41a42b649745d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior <bige...@linutronix.de> Date: Mon, 31 Oct 2016 21:08:27 +0100 Subject: [PATCH 3/3] use int32_t instead off_t for file size Using off_t (64bit) is kinda waste. With last change we only need 16x the size of the old file. So for a 2GiB - 1 file we would allocate almost 32GiB (the content of the old file would be loaded on demand from disk). This is a lot. Since the file size is less than 2GiB we leave the upper 4 bytes unused. With this change the max file size is limitted to 2GiB - 1 and we require 8x the size of the oldfile which makes almost 16GiB. When we assume a virtual addess space of 3GiB on a 32bit then the max oldsize increased from about ~180MiB to ~341MiB. _If_ some sees this as a regression because files >2GiB can not be used anymore please provide another binary with -DUSE_OFF_T. The binary has less than 20KiB. Signed-off-by: Sebastian Andrzej Siewior <sebast...@breakpoint.cc> --- bsdiff.c | 62 ++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/bsdiff.c b/bsdiff.c index f975e3722cba..997c87b01f56 100644 --- a/bsdiff.c +++ b/bsdiff.c @@ -39,11 +39,24 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 #include <unistd.h> #include <sys/mman.h> +#include <limits.h> + +#ifdef USE_OFF_T + +#define t_off off_t +#define t_off_max LLONG_MAX + +#else + +#define t_off int32_t +#define t_off_max INT_MAX +#endif + #define MIN(x,y) (((x)<(y)) ? (x) : (y)) -static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h) +static void split(t_off *I,t_off *V,t_off start,t_off len,t_off h) { - off_t i,j,k,x,tmp,jj,kk; + t_off i,j,k,x,tmp,jj,kk; if(len<16) { for(k=start;k<start+len;k+=j) { @@ -102,10 +115,10 @@ static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h) if(start+len>kk) split(I,V,kk,start+len-kk,h); } -static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize) +static void qsufsort(t_off *I,t_off *V,u_char *old,t_off oldsize) { - off_t buckets[256]; - off_t i,h,len; + t_off buckets[256]; + t_off i,h,len; for(i=0;i<256;i++) buckets[i]=0; for(i=0;i<oldsize;i++) buckets[old[i]]++; @@ -140,9 +153,9 @@ static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize) for(i=0;i<oldsize+1;i++) I[V[i]]=i; } -static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) +static t_off matchlen(u_char *old,t_off oldsize,u_char *new,t_off newsize) { - off_t i; + t_off i; for(i=0;(i<oldsize)&&(i<newsize);i++) if(old[i]!=new[i]) break; @@ -150,10 +163,10 @@ static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) return i; } -static off_t search(off_t *I,u_char *old,off_t oldsize, - u_char *new,off_t newsize,off_t st,off_t en,off_t *pos) +static t_off search(t_off *I,u_char *old,t_off oldsize, + u_char *new,t_off newsize,t_off st,t_off en,t_off *pos) { - off_t x,y; + t_off x,y; if(en-st<2) { x=matchlen(old+I[st],oldsize-I[st],new,newsize); @@ -176,9 +189,9 @@ static off_t search(off_t *I,u_char *old,off_t oldsize, }; } -static void offtout(off_t x,u_char *buf) +static void offtout(t_off x,u_char *buf) { - off_t y; + t_off y; if(x<0) y=-x; else y=x; @@ -199,14 +212,14 @@ int main(int argc,char *argv[]) int fd; u_char *old,*new; off_t oldsize,newsize; - off_t *I,*V; - off_t scan,pos,len; - off_t lastscan,lastpos,lastoffset; - off_t oldscore,scsc; - off_t s,Sf,lenf,Sb,lenb; - off_t overlap,Ss,lens; - off_t i; - off_t dblen,eblen; + t_off *I,*V; + t_off scan,pos,len; + t_off lastscan,lastpos,lastoffset; + t_off oldscore,scsc; + t_off s,Sf,lenf,Sb,lenb; + t_off overlap,Ss,lens; + t_off i; + t_off dblen,eblen; u_char *db,*eb; u_char buf[8]; u_char header[32]; @@ -223,14 +236,16 @@ int main(int argc,char *argv[]) oldsize = lseek(fd, 0, SEEK_END); if (oldsize < 0) err(1, "seek %s", argv[1]); + if (oldsize > t_off_max) + err(1, "file too large %s", argv[1]); old = mmap(NULL, oldsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0); if (old == MAP_FAILED) err(1, "mmap() %s", argv[1]); close(fd); - if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) || - ((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL); + if(((I=malloc((oldsize+1)*sizeof(t_off)))==NULL) || + ((V=malloc((oldsize+1)*sizeof(t_off)))==NULL)) err(1,NULL); qsufsort(I,V,old,oldsize); @@ -244,6 +259,9 @@ int main(int argc,char *argv[]) newsize = lseek(fd, 0, SEEK_END); if (newsize == -1) err(1, "lseek %s", argv[2]); + if (newsize > t_off_max) + err(1, "file too large %s", argv[2]); + new = mmap(NULL, newsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0); if (new == MAP_FAILED) err(1, "mmap %s", argv[2]); -- 2.10.2