Hi Tobias, Tobias Stoeckmann wrote on Wed, Dec 02, 2015 at 08:54:34PM +0100:
> this patch adds a lot of input validation to libc/locale/rune.c. Thanks for doing this work. I consider the direction useful. See inline for some specific questions. I'm willing to test a final version of the patch. > Index: rune.c > =================================================================== > RCS file: /cvs/src/lib/libc/locale/rune.c,v > retrieving revision 1.4 > diff -u -p -u -p -r1.4 rune.c > --- rune.c 25 May 2014 17:47:04 -0000 1.4 > +++ rune.c 30 Oct 2015 16:13:01 -0000 > @@ -59,23 +59,31 @@ > * SUCH DAMAGE. > */ > > +#include <sys/types.h> > +#include <sys/stat.h> > #include <assert.h> > +#include <errno.h> > +#include <stdint.h> > #include <stdio.h> > -#include <string.h> > #include <stdlib.h> > -#include <errno.h> > +#include <string.h> > #include <wchar.h> > -#include <sys/types.h> > -#include <sys/stat.h> > #include "rune.h" > #include "rune_local.h" > > -static int readrange(_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, > FILE *); > +#define SAFE_ADD(x, y) \ > +do { \ > + if ((x) > SIZE_MAX - (y)) \ > + return NULL; \ > + (x) += (y); \ > +} while (0); > + > +static int readrange(_RuneLocale *, _RuneRange *, rune_t, void *, FILE *); I suggest s/rune_t/uint32_t/, see below. > static void _freeentry(_RuneRange *); > static void _wctype_init(_RuneLocale *rl); > > static int > -readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp, > +readrange(_RuneLocale *rl, _RuneRange *rr, rune_t nranges, void *lastp, > FILE *fp) [...] Again, s/rune_t/uint32_t/, see below. > @@ -209,6 +223,7 @@ _Read_RuneMagi(FILE *fp) > _RuneLocale *rl; > struct stat sb; > int x; > + rune_t runetype_nranges, maplower_nranges, mapupper_nranges; I think this is the wrong data type. These variables are not rune numbers. They are numbers of rune ranges. I would suggest to use uint32_t. > > if (fstat(fileno(fp), &sb) < 0) > return NULL; > @@ -225,10 +240,24 @@ _Read_RuneMagi(FILE *fp) > if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic))) > return NULL; > > - hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) + > - ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) + > - ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) + > - ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry); > + /* XXX assumes rune_t = u_int32_t */ If you follow my suggestion above makeing *_nranges uint32_t, the problem goes away. frr_nranges is uint32_t. ntohl takes uint32_t and returns uint32_t, so all is fine. > + runetype_nranges = ntohl(frl.frl_runetype_ext.frr_nranges); > + maplower_nranges = ntohl(frl.frl_maplower_ext.frr_nranges); > + mapupper_nranges = ntohl(frl.frl_mapupper_ext.frr_nranges); > + > +#ifndef __LP64__ > + if (runetype_nranges > SIZE_MAX / sizeof(_RuneEntry) || > + maplower_nranges > SIZE_MAX / sizeof(_RuneEntry) || > + mapupper_nranges > SIZE_MAX / sizeof(_RuneEntry)) > + return NULL; > +#endif I dislike using #ifdef, it seems error-prone to me. The checks are correct in any case. Sure, if size_t is much larger than uint32_t, these checks can never trigger. But why do micro-optimization here? Why not just delete the #ifndef/#endif? > + hostdatalen = 0; > + SAFE_ADD(hostdatalen, sizeof(*rl)); That seems pointless. It's obvious that sizeof(whatever) cannot overflow size_t. Why not just hostdatalen = sizeof(*rl); > + SAFE_ADD(hostdatalen, ntohl((u_int32_t)frl.frl_variable_len)); That seems insufficient. frl_variable_len is int32_t, so it may be negative. The cast will make it large, but it may still fit in size_t, so there is a risk of copying a lot of garbage. I suggest to do an additional check frl.frl_variable_len > 0 beforehand. There is no need for a cast, then. > + SAFE_ADD(hostdatalen, runetype_nranges * sizeof(_RuneEntry)); > + SAFE_ADD(hostdatalen, maplower_nranges * sizeof(_RuneEntry)); > + SAFE_ADD(hostdatalen, mapupper_nranges * sizeof(_RuneEntry)); > > if ((hostdata = calloc(hostdatalen, 1)) == NULL) > return NULL; [...] Unrelated to your patch, but anyway: /* XXX assumes rune_t = u_int32_t */ is missing before rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune); Would you mind adding that? Yours, Ingo