On Wed, Jan 23, 2013 at 04:24:01PM +0400, Evgeniy Stepanov wrote:
> > > What if glibc adds a scanf hook (like it has already printf hooks), apps
> > > could then register their own stuff and the above would then break. It
> > > really should be very conservative, and should be checked e.g. with all
> > > glibc's *scanf tests (e.g. stdio-common/scanf[0-9]*.c,
> > > stdio-common/tst-sscanf.c).
>
> I'll add support for the missing %specs. About the testing, these
> files seem like GPL, so I'd prefer not to look at them at all. We've
> got a smallish test for the scanf implementation in sanitizer_common,
> but it would be really great to run it on full glibc scanf tests.
> Would you be willing to setup such testing gnu-side?
Seems the code in llvm repo looks much better now to me than it used to,
still it breaks on:
#define _GNU_SOURCE 1
#include <stdio.h>
#include <string.h>
int
main ()
{
char *p;
long double q;
if (sscanf ("abcdefghijklmno 1.0", "%ms %Lf", &p, &q) != 2
|| strcmp (p, "abcdefghijklmno") != 0
|| q != 1.0L)
return 1;
return 0;
}
because it mishandles %ms.
Attached patch fixes that and also handles %a when possible.
There are cases where it is unambiguous, e.g.
s%Las is s %La s, reading long double, or %ar is %a r, reading
float, other cases where we can check at least something and keep checking
the rest of the format string, e.g. for:
%as
we know it will either store float, or char * pointer, so just assume
conservatively the smaller size, and other cases where we have to punt,
%a[a%d]
(where % appears in the [...] list).
I've tested various glibc *scanf testcases with the GCC libsanitizer
+ enabling of # define SANITIZER_INTERCEPT_SCANF SI_NOT_WINDOWS
+ llvm svn diff between last merge point to GCC and current llvm trunk
+ this patch, and it looked good.
There is one further issue, I'd say you need to pass down to scanf_common
the result from the intercepted call, and use it to see if it is valid
to call strlen on the ptr, or if just 1 should be assumed for SSS_STRLEN.
Note 'n' doesn't count towards that. Because, it is unsafe to call strlen
on arguments that haven't been assigned yet. Testcase that still fails:
#define _GNU_SOURCE 1
#include <stdio.h>
#include <string.h>
int
main ()
{
int a, b, c, d;
char e[3], f[10];
memset (e, ' ', sizeof e);
memset (f, ' ', sizeof f);
if (sscanf ("1 2 a", "%d%n%n%d %s %s", &a, &b, &c, &d, e, f) != 3
|| a != 1
|| b != 1
|| c != 1
|| d != 2
|| strcmp (e, "a") != 0)
return 1;
return 0;
}
Oh, one more thing, on Linux for recent enough glibc's it would be
desirable to also intercept:
__isoc99_sscanf, __isoc99_scanf, __isoc99_vsscanf, __isoc99_fscanf,
__isoc99_vfscanf, __isoc99_vscanf
functions (guard the interception with
#if defined __GLIBC_PREREQ && __GLIBC_PREREQ (2, 7)
). All these work exactly like the corresponding non-__isoc99_
functions, just %a followed by s, S or [ always writes float, thus
there is no ambiguity.
Jakub
--- sanitizer_common_interceptors_scanf.inc.jj 2013-02-08 13:21:51.000000000
+0100
+++ sanitizer_common_interceptors_scanf.inc 2013-02-11 12:15:27.575871424
+0100
@@ -18,11 +18,12 @@
struct ScanfDirective {
int argIdx; // argument index, or -1 of not specified ("%n$")
- bool suppressed; // suppress assignment ("*")
int fieldWidth;
+ bool suppressed; // suppress assignment ("*")
bool allocate; // allocate space ("m")
char lengthModifier[2];
char convSpecifier;
+ bool maybeGnuMalloc;
};
static const char *parse_number(const char *p, int *out) {
@@ -119,6 +120,31 @@ static const char *scanf_parse_next(cons
// Consume the closing ']'.
++p;
}
+ // This is unfortunately ambiguous between old GNU extension
+ // of %as, %aS and %a[...] and newer POSIX %a followed by
+ // letters s, S or [.
+ if (dir->convSpecifier == 'a' && !dir->lengthModifier[0]) {
+ if (*p == 's' || *p == 'S') {
+ dir->maybeGnuMalloc = true;
+ ++p;
+ } else if (*p == '[') {
+ // Watch for %a[h-j%d], if % appears in the
+ // [...] range, then we need to give up, we don't know
+ // if scanf will parse it as POSIX %a [h-j %d ] or
+ // GNU allocation of string with range dh-j plus %.
+ const char *q = p + 1;
+ if (*q == '^')
+ ++q;
+ if (*q == ']')
+ ++q;
+ while (*q && *q != ']' && *q != '%')
+ ++q;
+ if (*q == 0 || *q == '%')
+ return 0;
+ p = q + 1; // Consume the closing ']'.
+ dir->maybeGnuMalloc = true;
+ }
+ }
break;
}
return p;
@@ -131,9 +157,7 @@ static bool scanf_is_integer_conv(char c
// Returns true if the character is an floating point conversion specifier.
static bool scanf_is_float_conv(char c) {
- return char_is_one_of(c, "AeEfFgG");
- // NOTE: c == 'a' is ambiguous between POSIX and GNU and, therefore,
- // unsupported.
+ return char_is_one_of(c, "aAeEfFgG");
}
// Returns string output character size for string-like conversions,
@@ -168,6 +192,21 @@ enum ScanfStoreSize {
// Returns the store size of a scanf directive (if >0), or a value of
// ScanfStoreSize.
static int scanf_get_store_size(ScanfDirective *dir) {
+ if (dir->allocate) {
+ if (!char_is_one_of(dir->convSpecifier, "cCsS["))
+ return SSS_INVALID;
+ return sizeof(char *);
+ }
+
+ if (dir->maybeGnuMalloc) {
+ if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
+ return SSS_INVALID;
+ // This is ambiguous, so check the smaller size of char * (if it is
+ // a GNU extension of %as, %aS or %a[...]) and float (if it is
+ // POSIX %a followed by s, S or [ letters).
+ return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
+ }
+
if (scanf_is_integer_conv(dir->convSpecifier)) {
switch (dir->lengthModifier[0]) {
case 'h':
@@ -256,10 +295,6 @@ static void scanf_common(void *ctx, cons
}
if (dir.suppressed)
continue;
- if (dir.allocate) {
- // Unsupported;
- continue;
- }
int size = scanf_get_store_size(&dir);
if (size == SSS_INVALID)