This change reworks parse.c to avoid mmapping the entire file, which reduces memory usage by up to 20MB when parsing the available file.
The file is now read in 64KB chunks. A new chunk is loaded whenever we get to the end of the previous chunk. Because fields can cross over chunk boundaries (or even be larger than 64KB themselves, theorically) they are copied to a separate storage buffer while parsing, which can be reallocated as needed. Note that because the storage buffer can get reallocated, 'fieldstart' and 'valuestart' can not be preserved accross getc invocations - instead we must preserve their positions as integer offsets from the start of the (reallocatable) storage buffer. This helps a little more, but not as much as the previous two patches. I think I'll stop here as we're hitting a point of diminishing returns. One could probably do better by using proper database techniques to handle the package information, but this is probably overkill. -- Michel "Walken" Lespinasse "Bill Gates is a monocle and a Persian cat away from being the villain in a James Bond movie." -- Dennis Miller
diff -ru dpkg-1.13.24.patch12/lib/parse.c dpkg-1.13.24/lib/parse.c --- dpkg-1.13.24.patch12/lib/parse.c 2006-10-27 21:21:25.000000000 -0700 +++ dpkg-1.13.24/lib/parse.c 2006-10-28 00:33:10.000000000 -0700 @@ -76,7 +76,35 @@ #define NFIELDS (sizeof(fieldinfos)/sizeof(struct fieldinfo)) const int nfields= NFIELDS; -static void cu_parsedb(int argc, void **argv) { close(*(int *)argv); } +#define READ_SIZE 65536 +static int fbuf_fd; +static char fbuf_readbuf[READ_SIZE]; +static int fbuf_readpos, fbuf_readfilled; +static char *fbuf_storebuf; +static int fbuf_storepos= 0, fbuf_storealloc= 0; + +static void cu_parsedb(int argc, void **argv) { close(fbuf_fd); } + +static int fbuf_readblock(void) { + fbuf_readfilled= read(fbuf_fd, fbuf_readbuf, READ_SIZE); + if (fbuf_readfilled < 0) ohshite(_("error reading package file")); + fbuf_readpos= 0; +} + +static inline int fbuf_EOF(void) { + return fbuf_readfilled == 0; +} + +static inline int fbuf_getc(void) { + int c = fbuf_readbuf[fbuf_readpos++]; + if (fbuf_readpos >= fbuf_readfilled) fbuf_readblock(); + if (fbuf_storepos >= fbuf_storealloc) { + fbuf_storealloc++; + fbuf_storebuf = realloc(fbuf_storebuf, fbuf_storealloc); + } + fbuf_storebuf[fbuf_storepos++] = c; + return c; +} int parsedb(const char *filename, enum parsedbflags flags, struct pkginfo **donep, FILE *warnto, int *warncount) { @@ -84,7 +112,6 @@ * If donep is not null only one package's information is expected. */ - int fd; struct pkginfo newpig, *pigp; struct pkginfoperfile *newpifp, *pifp; struct arbitraryfield *arp, **larpp; @@ -93,63 +120,43 @@ int fieldencountered[NFIELDS]; const struct fieldinfo *fip; const struct nickname *nick; - char *data, *dataptr, *endptr; const char *fieldstart, *valuestart; char *value= NULL; int valuealloc= 0; - int fieldlen= 0, valuelen= 0; + int fieldpos= 0, valuepos= 0, fieldlen= 0, valuelen= 0; int *ip, c; - struct stat stat; if (warncount) *warncount= 0; newpifp= (flags & pdb_recordavailable) ? &newpig.available : &newpig.installed; - fd= open(filename, O_RDONLY); - if (fd == -1) ohshite(_("failed to open package info file `%.255s' for reading"),filename); - - push_cleanup(cu_parsedb,~0, NULL,0, 1,&fd); + fbuf_fd= open(filename, O_RDONLY); + if (fbuf_fd == -1) ohshite(_("failed to open package info file `%.255s' for reading"),filename); - if (fstat(fd, &stat) == -1) - ohshite(_("can't stat package info file `%.255s'"),filename); + push_cleanup(cu_parsedb,~0, NULL,0, 0); - if (stat.st_size > 0) { -#ifdef HAVE_MMAP - if ((dataptr= (char *)mmap(NULL, stat.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) - ohshite(_("can't mmap package info file `%.255s'"),filename); -#else - if ((dataptr= malloc(stat.st_size)) == NULL) - ohshite(_("failed to malloc for info file `%.255s'"),filename); - - fd_buf_copy(fd, dataptr, stat.st_size, _("copy info file `%.255s'"),filename); -#endif - data= dataptr; - endptr= dataptr + stat.st_size; - } else { - data= dataptr= endptr= NULL; - } + fbuf_readblock(); lno= 1; pdone= 0; -#define EOF_mmap(dataptr, endptr) (dataptr >= endptr) -#define getc_mmap(dataptr) *dataptr++; -#define ungetc_mmap(c, dataptr, data) dataptr--; for (;;) { /* loop per package */ + fbuf_storepos= 0; memset(fieldencountered, 0, sizeof(fieldencountered)); blankpackage(&newpig); blankpackageperfile(newpifp); /* Skip adjacent new lines */ - while(!EOF_mmap(dataptr, endptr)) { - c= getc_mmap(dataptr); if (c!='\n' && c!=MSDOS_EOF_CHAR ) break; + while(!fbuf_EOF()) { + c= fbuf_getc(); if (c!='\n' && c!=MSDOS_EOF_CHAR ) break; lno++; } - if (EOF_mmap(dataptr, endptr)) break; + if (fbuf_EOF()) break; for (;;) { /* loop per field */ - fieldstart= dataptr - 1; - while (!EOF_mmap(dataptr, endptr) && !isspace(c) && c!=':' && c!=MSDOS_EOF_CHAR) - c= getc_mmap(dataptr); - fieldlen= dataptr - fieldstart - 1; - while (!EOF_mmap(dataptr, endptr) && c != '\n' && isspace(c)) c= getc_mmap(dataptr); - if (EOF_mmap(dataptr, endptr)) + fieldpos= fbuf_storepos - 1; + while (!fbuf_EOF() && !isspace(c) && c!=':' && c!=MSDOS_EOF_CHAR) + c= fbuf_getc(); + fieldlen= fbuf_storepos - fieldpos - 1; + while (!fbuf_EOF() && c != '\n' && isspace(c)) c= fbuf_getc(); + fieldstart= fbuf_storebuf + fieldpos; + if (fbuf_EOF()) parseerr(NULL,filename,lno, warnto,warncount,&newpig,0, _("EOF after field name `%.*s'"),fieldlen,fieldstart); if (c == '\n') @@ -162,11 +169,12 @@ parseerr(NULL,filename,lno, warnto,warncount,&newpig,0, _("field name `%.*s' must be followed by colon"),fieldlen,fieldstart); /* Skip space after ':' but before value and eol */ - while(!EOF_mmap(dataptr, endptr)) { - c= getc_mmap(dataptr); + while(!fbuf_EOF()) { + c= fbuf_getc(); if (c == '\n' || !isspace(c)) break; } - if (EOF_mmap(dataptr, endptr)) + fieldstart= fbuf_storebuf + fieldpos; + if (fbuf_EOF()) parseerr(NULL,filename,lno, warnto,warncount,&newpig,0, _("EOF before value of field `%.*s' (missing final newline)"), fieldlen,fieldstart); @@ -174,27 +182,28 @@ parseerr(NULL,filename,lno, warnto,warncount,&newpig,0, _("MSDOS EOF char in value of field `%.*s' (missing newline?)"), fieldlen,fieldstart); - valuestart= dataptr - 1; + valuepos= fbuf_storepos - 1; for (;;) { if (c == '\n' || c == MSDOS_EOF_CHAR) { lno++; - if (EOF_mmap(dataptr, endptr)) break; - c= getc_mmap(dataptr); + if (fbuf_EOF()) break; + c= fbuf_getc(); /* Found double eol, or start of new field */ - if (EOF_mmap(dataptr, endptr) || c == '\n' || !isspace(c)) break; - ungetc_mmap(c,dataptr, data); - c= '\n'; - } else if (EOF_mmap(dataptr, endptr)) { + if (fbuf_EOF() || c == '\n' || !isspace(c)) break; + } else if (fbuf_EOF()) { + fieldstart= fbuf_storebuf + fieldpos; parseerr(NULL,filename,lno, warnto,warncount,&newpig,0, _("EOF during value of field `%.*s' (missing final newline)"), fieldlen,fieldstart); + } else { + c= fbuf_getc(); } - c= getc_mmap(dataptr); } - valuelen= dataptr - valuestart - 1; + fieldstart= fbuf_storebuf + fieldpos; + valuestart= fbuf_storebuf + valuepos; + valuelen= fbuf_storepos - valuepos - 1; /* trim ending space on value */ - while (valuelen && isspace(*(valuestart+valuelen-1))) - valuelen--; + while (valuelen && isspace(*(valuestart+valuelen-1))) valuelen--; for (nick= nicknames; nick->nick && (strncasecmp(nick->nick,fieldstart, fieldlen) || nick->nick[fieldlen] != 0); nick++); if (nick->nick) { fieldstart= nick->canon; @@ -231,7 +240,7 @@ arp->next= NULL; *larpp= arp; } - if (EOF_mmap(dataptr, endptr) || c == '\n' || c == MSDOS_EOF_CHAR) break; + if (fbuf_EOF() || c == '\n' || c == MSDOS_EOF_CHAR) break; } /* loop per field */ if (pdone && donep) parseerr(NULL,filename,lno, warnto,warncount,&newpig,0, @@ -320,19 +329,13 @@ if (donep) *donep= pigp; pdone++; - if (EOF_mmap(dataptr, endptr)) break; + if (fbuf_EOF()) break; if (c == '\n') lno++; } pop_cleanup(0); - if (data != NULL) { -#ifdef HAVE_MMAP - munmap(data, stat.st_size); -#else - free(data); -#endif - } + free(fbuf_storebuf); fbuf_storebuf= NULL; fbuf_storealloc= 0; free(value); - if (close(fd)) ohshite(_("failed to close after read: `%.255s'"),filename); + if (close(fbuf_fd)) ohshite(_("failed to close after read: `%.255s'"),filename); if (donep && !pdone) ohshit(_("no package information in `%.255s'"),filename); return pdone;