control: reopen -1

On 2016-10-29 14:15:26 [+0200], Sebastian Andrzej Siewior wrote:
> I moved away from bsdiff and don't need this anymore. This improvement help a
> little the main reason for moving away is that the average file was getting
> bigger to a point where bsdiff was still too hungry on my average box.

reconsider. refreshed patches.
 
Sebastian
>From ec1bd32377001835f1dc18d14e8f1417128970da Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebast...@breakpoint.cc>
Date: Mon, 31 Oct 2016 13:34:50 +0100
Subject: [PATCH 1/3] mmap() src file instead of malloc() + read() it

Signed-off-by: Sebastian Andrzej Siewior <sebast...@breakpoint.cc>
---
 bsdiff.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/bsdiff.c b/bsdiff.c
index 150a7f79c488..f25304548101 100644
--- a/bsdiff.c
+++ b/bsdiff.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <sys/mman.h>
 
 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
 
@@ -215,14 +216,18 @@ int main(int argc,char *argv[])
 
 	if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile\n",argv[0]);
 
-	/* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
-		that we never try to malloc(0) and get a NULL pointer */
-	if(((fd=open(argv[1],O_RDONLY,0))<0) ||
-		((oldsize=lseek(fd,0,SEEK_END))==-1) ||
-		((old=malloc(oldsize+1))==NULL) ||
-		(lseek(fd,0,SEEK_SET)!=0) ||
-		(read(fd,old,oldsize)!=oldsize) ||
-		(close(fd)==-1)) err(1,"%s",argv[1]);
+	fd = open(argv[1], O_RDONLY,0);
+	if (fd < 0)
+		err(1, "Open %s", argv[1]);
+
+	oldsize = lseek(fd, 0, SEEK_END);
+	if (oldsize < 0)
+		err(1, "seek %s", argv[1]);
+
+	old = mmap(NULL, oldsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0);
+	if (old == MAP_FAILED)
+		err(1, "mmap() %s", argv[1]);
+	close(fd);
 
 	if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) ||
 		((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL);
@@ -397,7 +402,7 @@ int main(int argc,char *argv[])
 	free(db);
 	free(eb);
 	free(I);
-	free(old);
+	munmap(old, oldsize);
 	free(new);
 
 	return 0;
-- 
2.10.2

>From 84bf617a3cfa0b39d7941996ea82ebf8973315d7 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebast...@breakpoint.cc>
Date: Sat, 2 Jul 2011 20:06:58 +0200
Subject: [PATCH 2/3] mmap() dst file instead of malloc() + read() it

This drops the memory pressure since the OS may now drop and reload parts of
the old and new file on demand.

Signed-off-by: Sebastian Andrzej Siewior <sebast...@breakpoint.cc>
---
 bsdiff.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/bsdiff.c b/bsdiff.c
index f25304548101..f975e3722cba 100644
--- a/bsdiff.c
+++ b/bsdiff.c
@@ -238,12 +238,16 @@ int main(int argc,char *argv[])
 
 	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
 		that we never try to malloc(0) and get a NULL pointer */
-	if(((fd=open(argv[2],O_RDONLY,0))<0) ||
-		((newsize=lseek(fd,0,SEEK_END))==-1) ||
-		((new=malloc(newsize+1))==NULL) ||
-		(lseek(fd,0,SEEK_SET)!=0) ||
-		(read(fd,new,newsize)!=newsize) ||
-		(close(fd)==-1)) err(1,"%s",argv[2]);
+	fd = open(argv[2], O_RDONLY, 0);
+	if (fd < 0)
+		err(1, "open %s", argv[2]);
+	newsize = lseek(fd, 0, SEEK_END);
+	if (newsize == -1)
+		err(1, "lseek %s", argv[2]);
+	new = mmap(NULL, newsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0);
+	if (new == MAP_FAILED)
+		err(1, "mmap %s", argv[2]);
+	close(fd);
 
 	if(((db=malloc(newsize+1))==NULL) ||
 		((eb=malloc(newsize+1))==NULL)) err(1,NULL);
@@ -403,7 +407,7 @@ int main(int argc,char *argv[])
 	free(eb);
 	free(I);
 	munmap(old, oldsize);
-	free(new);
+	munmap(new, newsize);
 
 	return 0;
 }
-- 
2.10.2

>From 1b61950f9d292fb70affcc12c0b41a42b649745d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bige...@linutronix.de>
Date: Mon, 31 Oct 2016 21:08:27 +0100
Subject: [PATCH 3/3] use int32_t instead off_t for file size

Using off_t (64bit) is kinda waste. With last change we only need 16x the
size of the old file. So for a 2GiB - 1 file we would allocate almost
32GiB (the content of the old file would be loaded on demand from disk).
This is a lot. Since the file size is less than 2GiB we leave the upper 4
bytes unused.
With this change the max file size is limitted to 2GiB - 1 and we require
8x the size of the oldfile which makes almost 16GiB.

When we assume a virtual addess space of 3GiB on a 32bit then the max
oldsize increased from about ~180MiB to ~341MiB.

_If_ some sees this as a regression because files >2GiB can not be used
anymore please provide another binary with -DUSE_OFF_T. The binary has
less than 20KiB.

Signed-off-by: Sebastian Andrzej Siewior <sebast...@breakpoint.cc>
---
 bsdiff.c | 62 ++++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/bsdiff.c b/bsdiff.c
index f975e3722cba..997c87b01f56 100644
--- a/bsdiff.c
+++ b/bsdiff.c
@@ -39,11 +39,24 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05
 #include <unistd.h>
 #include <sys/mman.h>
 
+#include <limits.h>
+
+#ifdef USE_OFF_T
+
+#define t_off off_t
+#define t_off_max LLONG_MAX
+
+#else
+
+#define t_off int32_t
+#define t_off_max INT_MAX
+#endif
+
 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
 
-static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h)
+static void split(t_off *I,t_off *V,t_off start,t_off len,t_off h)
 {
-	off_t i,j,k,x,tmp,jj,kk;
+	t_off i,j,k,x,tmp,jj,kk;
 
 	if(len<16) {
 		for(k=start;k<start+len;k+=j) {
@@ -102,10 +115,10 @@ static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h)
 	if(start+len>kk) split(I,V,kk,start+len-kk,h);
 }
 
-static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize)
+static void qsufsort(t_off *I,t_off *V,u_char *old,t_off oldsize)
 {
-	off_t buckets[256];
-	off_t i,h,len;
+	t_off buckets[256];
+	t_off i,h,len;
 
 	for(i=0;i<256;i++) buckets[i]=0;
 	for(i=0;i<oldsize;i++) buckets[old[i]]++;
@@ -140,9 +153,9 @@ static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize)
 	for(i=0;i<oldsize+1;i++) I[V[i]]=i;
 }
 
-static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
+static t_off matchlen(u_char *old,t_off oldsize,u_char *new,t_off newsize)
 {
-	off_t i;
+	t_off i;
 
 	for(i=0;(i<oldsize)&&(i<newsize);i++)
 		if(old[i]!=new[i]) break;
@@ -150,10 +163,10 @@ static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
 	return i;
 }
 
-static off_t search(off_t *I,u_char *old,off_t oldsize,
-		u_char *new,off_t newsize,off_t st,off_t en,off_t *pos)
+static t_off search(t_off *I,u_char *old,t_off oldsize,
+		u_char *new,t_off newsize,t_off st,t_off en,t_off *pos)
 {
-	off_t x,y;
+	t_off x,y;
 
 	if(en-st<2) {
 		x=matchlen(old+I[st],oldsize-I[st],new,newsize);
@@ -176,9 +189,9 @@ static off_t search(off_t *I,u_char *old,off_t oldsize,
 	};
 }
 
-static void offtout(off_t x,u_char *buf)
+static void offtout(t_off x,u_char *buf)
 {
-	off_t y;
+	t_off y;
 
 	if(x<0) y=-x; else y=x;
 
@@ -199,14 +212,14 @@ int main(int argc,char *argv[])
 	int fd;
 	u_char *old,*new;
 	off_t oldsize,newsize;
-	off_t *I,*V;
-	off_t scan,pos,len;
-	off_t lastscan,lastpos,lastoffset;
-	off_t oldscore,scsc;
-	off_t s,Sf,lenf,Sb,lenb;
-	off_t overlap,Ss,lens;
-	off_t i;
-	off_t dblen,eblen;
+	t_off *I,*V;
+	t_off scan,pos,len;
+	t_off lastscan,lastpos,lastoffset;
+	t_off oldscore,scsc;
+	t_off s,Sf,lenf,Sb,lenb;
+	t_off overlap,Ss,lens;
+	t_off i;
+	t_off dblen,eblen;
 	u_char *db,*eb;
 	u_char buf[8];
 	u_char header[32];
@@ -223,14 +236,16 @@ int main(int argc,char *argv[])
 	oldsize = lseek(fd, 0, SEEK_END);
 	if (oldsize < 0)
 		err(1, "seek %s", argv[1]);
+	if (oldsize > t_off_max)
+		err(1, "file too large %s", argv[1]);
 
 	old = mmap(NULL, oldsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0);
 	if (old == MAP_FAILED)
 		err(1, "mmap() %s", argv[1]);
 	close(fd);
 
-	if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) ||
-		((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL);
+	if(((I=malloc((oldsize+1)*sizeof(t_off)))==NULL) ||
+		((V=malloc((oldsize+1)*sizeof(t_off)))==NULL)) err(1,NULL);
 
 	qsufsort(I,V,old,oldsize);
 
@@ -244,6 +259,9 @@ int main(int argc,char *argv[])
 	newsize = lseek(fd, 0, SEEK_END);
 	if (newsize == -1)
 		err(1, "lseek %s", argv[2]);
+	if (newsize > t_off_max)
+		err(1, "file too large %s", argv[2]);
+
 	new = mmap(NULL, newsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0);
 	if (new == MAP_FAILED)
 		err(1, "mmap %s", argv[2]);
-- 
2.10.2

Reply via email to