llvmbot wrote: @llvm/pr-subscribers-clang-static-analyzer-1
<details> <summary>Changes</summary> See the motivation here: https://discourse.llvm.org/t/patches-inspired-by-the-juliet-benchmark/73106 I've checked all these 4 commits on a large set of projects, and they - surprisingly - don't show any report differences besides noise. I plan to land these 4 commits individually (aka. I don't plan to squash them). -- Full diff: https://github.com/llvm/llvm-project/pull/66074.diff 4 Files Affected: - (modified) clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (+15-7) - (modified) clang/test/Analysis/taint-diagnostic-visitor.c (+1-1) - (modified) clang/test/Analysis/taint-generic.c (+203-7) - (modified) clang/test/Analysis/taint-generic.cpp (+12) <pre> diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index 3dcb45c0b110383..e6f0b7354b168bf 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -105,7 +105,8 @@ bool isStdin(SVal Val, const ASTContext &ACtx) { if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { D = D->getCanonicalDecl(); // FIXME: This should look for an exact match. - if (D->getName().contains("stdin") && D->isExternC()) { + if (D->getName().contains("stdin") && D->hasExternalStorage() && + D->isExternC()) { const QualType FILETy = ACtx.getFILEType().getCanonicalType(); const QualType Ty = D->getType().getCanonicalType(); @@ -622,12 +623,14 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { {{{"getlogin_r"}}, TR::Source({{0}})}, // Props + {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, + {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})}, {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})}, {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})}, @@ -695,6 +698,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, @@ -731,6 +735,8 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { TR::Prop({{1}}, {{0, ReturnValueIndex}})}, {{CDF_MaybeBuiltin, {{"strcat"}}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{CDF_MaybeBuiltin, {{"wcsncat"}}}, + TR::Prop({{1}}, {{0, ReturnValueIndex}})}, {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{CDF_MaybeBuiltin, {{"strdupa"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, @@ -739,12 +745,14 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { // Sinks {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, {{{"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"execl"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"execle"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"execlp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"execvp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"execvP"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"execve"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, + {{{"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, + {{{"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, + {{{"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, + {{{"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, + {{{"execve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, + {{{"fexecve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, + {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, + {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c index 663836836d3db67..f1b9ceebdd9a6b8 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor.c +++ b/clang/test/Analysis/taint-diagnostic-visitor.c @@ -13,7 +13,7 @@ size_t strlen( const char* str ); void *malloc(size_t size ); void free( void *ptr ); char *fgets(char *str, int n, FILE *stream); -FILE *stdin; +extern FILE *stdin; void taintDiagnostic(void) { diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index b7906d201e4fad3..f3c56b6ff930034 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -56,10 +56,14 @@ // CHECK-INVALID-ARG-SAME: rules greater or equal to -1 typedef long long rsize_t; +typedef __WCHAR_TYPE__ wchar_t; void clang_analyzer_isTainted_char(char); +void clang_analyzer_isTainted_wchar(wchar_t); void clang_analyzer_isTainted_charp(char*); void clang_analyzer_isTainted_int(int); +int coin(); + int scanf(const char *restrict format, ...); char *gets(char *str); char *gets_s(char *str, rsize_t n); @@ -75,6 +79,17 @@ extern FILE *stdin; #define bool _Bool #define NULL (void*)0 +wchar_t *fgetws(wchar_t *ws, int n, FILE *stream); +wchar_t *wmemset(wchar_t *wcs, wchar_t wc, unsigned long n); +wchar_t *wmemcpy(wchar_t *dest, const wchar_t *src, size_t n); +wchar_t *wmemmove(wchar_t *dest, const wchar_t *src, size_t n); +size_t wcslen(const wchar_t *s); +wchar_t *wcscpy(wchar_t * dest, const wchar_t * src); +wchar_t *wcsncpy(wchar_t *dest, const wchar_t *src, size_t n); +wchar_t *wcscat(wchar_t *dest, const wchar_t *src); +wchar_t *wcsncat(wchar_t *dest,const wchar_t *src, size_t n); +int swprintf(wchar_t *wcs, size_t maxlen, const wchar_t *format, ...); + char *getenv(const char *name); FILE *fopen(const char *name, const char *mode); @@ -105,6 +120,41 @@ void *malloc(size_t); void *calloc(size_t nmemb, size_t size); void bcopy(void *s1, void *s2, size_t n); + +// function | pathname | filename | fd | arglist | argv[] | envp[] +// =============================================================== +// 1 execl | X | | | X | | +// 2 execle | X | | | X | | X +// 3 execlp | | X | | X | | +// 4 execv | X | | | | X | +// 5 execve | X | | | | X | X +// 6 execvp | | X | | | X | +// 7 execvpe | | X | | | X | X +// 8 fexecve | | | X | | X | X +// =============================================================== +// letter | | p | f | l | v | e +// +// legend: +// - pathname: rel/abs path to the binary +// - filename: file name searched in PATH to execute the binary +// - fd: accepts a file descriptor +// - arglist: accepts variadic arguments +// - argv: accepts a pointer to array, denoting the new argv +// - envp: accepts a pointer to array, denoting the new envp + +int execl(const char *path, const char *arg, ...); +int execle(const char *path, const char *arg, ...); +int execlp(const char *file, const char *arg, ...); +int execv(const char *path, char *const argv[]); +int execve(const char *path, char *const argv[], char *const envp[]); +int execvp(const char *file, char *const argv[]); +int execvpe(const char *file, char *const argv[], char *const envp[]); +int fexecve(int fd, char *const argv[], char *const envp[]); +FILE *popen(const char *command, const char *type); +int pclose(FILE *stream); +int system(const char *command); + + typedef size_t socklen_t; struct sockaddr { @@ -211,7 +261,6 @@ void testUncontrolledFormatString(char **p) { } -int system(const char *command); void testTaintSystemCall(void) { char buffer[156]; char addr[128]; @@ -274,7 +323,6 @@ void testTaintedBufferSize(void) { #define SOCK_STREAM 1 int socket(int, int, int); size_t read(int, void *, size_t); -int execl(const char *, const char *, ...); void testSocket(void) { int sock; @@ -430,6 +478,24 @@ int testSprintf_propagates_taint(char *buf, char *msg) { return 1 / x; // expected-warning {{Division by a tainted value, possibly zero}} } +void test_wchar_apis_propagates(const char *path) { + FILE *f = fopen(path, "r"); + clang_analyzer_isTainted_charp((char*)f); // expected-warning {{YES}} + wchar_t wbuf[10]; + fgetws(wbuf, sizeof(wbuf)/sizeof(*wbuf), f); + clang_analyzer_isTainted_wchar(*wbuf); // expected-warning {{YES}} + int n = wcslen(wbuf); + clang_analyzer_isTainted_int(n); // expected-warning {{YES}} + + wchar_t dst[100] = L"ABC"; + clang_analyzer_isTainted_wchar(*dst); // expected-warning {{NO}} + wcsncat(dst, wbuf, sizeof(wbuf)/sizeof(*wbuf)); + clang_analyzer_isTainted_wchar(*dst); // expected-warning {{YES}} + + int m = wcslen(dst); + clang_analyzer_isTainted_int(m); // expected-warning {{YES}} +} + int scanf_s(const char *format, ...); int testScanf_s_(int *out) { scanf_s("%d", out); @@ -544,6 +610,10 @@ void testFread(const char *fname, int *buffer, size_t size, size_t count) { } ssize_t recv(int sockfd, void *buf, size_t len, int flags); +int accept(int fd, struct sockaddr *addr, socklen_t *addrlen); +int bind(int fd, const struct sockaddr *addr, socklen_t addrlen); +int listen(int fd, int backlog); + void testRecv(int *buf, size_t len, int flags) { int fd; scanf("%d", &fd); // fake a tainted a file descriptor @@ -640,7 +710,6 @@ void testRawmemchr(int c) { clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} } -typedef char wchar_t; int mbtowc(wchar_t *pwc, const char *s, size_t n); void testMbtowc(wchar_t *pwc, size_t n) { char buf[10]; @@ -653,8 +722,7 @@ void testMbtowc(wchar_t *pwc, size_t n) { int wctomb(char *s, wchar_t wc); void testWctomb(char *buf) { - wchar_t wc; - scanf("%c", &wc); + wchar_t wc = getchar(); int result = wctomb(buf, wc); clang_analyzer_isTainted_char(*buf); // expected-warning {{YES}} @@ -663,8 +731,7 @@ void testWctomb(char *buf) { int wcwidth(wchar_t c); void testWcwidth() { - wchar_t wc; - scanf("%c", &wc); + wchar_t wc = getchar(); int width = wcwidth(wc); clang_analyzer_isTainted_int(width); // expected-warning {{YES}} @@ -1087,6 +1154,128 @@ void testConfigurationSinks(void) { // expected-warning@-1 {{Untrusted data is passed to a user-defined sink}} } +int test_exec_like_functions() { + char buf[100] = {0}; + scanf("%99s", buf); + clang_analyzer_isTainted_char(buf[0]); // expected-warning {{YES}} + + char *cleanArray[] = {"ENV1=V1", "ENV2=V2", NULL}; + char *taintedArray[] = {buf, "ENV2=V2", NULL}; + clang_analyzer_isTainted_char(taintedArray[0][0]); // expected-warning {{YES}} + clang_analyzer_isTainted_char(*(char*)taintedArray[0]); // expected-warning {{YES}} + clang_analyzer_isTainted_char(*(char*)taintedArray); // expected-warning {{NO}} We should have YES here. + // FIXME: Above the triple pointer indirection will confuse the checker, + // as we only check two levels. The results would be worse, if the tainted + // subobject ("buf") would not be at the beginning of the enclosing object, + // for the same reason. + + switch (coin()) { + default: break; + // Execute `path` with all arguments after `path` until a NULL pointer + // and environment from `environ'. + case 0: return execl("path", "arg0", "arg1", "arg2", NULL); // no-warning + case 1: return execl(buf, "arg0", "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}} + case 2: return execl("path", buf, "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}} + case 3: return execl("path", "arg0", buf, "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}} + case 4: return execl("path", "arg0", "arg1", buf, NULL); // expected-warning {{Untrusted data is passed to a system call}} + } + + switch (coin()) { + default: break; + // Execute `path` with all arguments after `PATH` until a NULL pointer, + // and the argument after that for environment. + case 0: return execle("path", "arg0", "arg1", NULL, cleanArray); // no-warning + case 1: return execle( buf, "arg0", "arg1", NULL, cleanArray); // expected-warning {{Untrusted data is passed to a system call}} + case 2: return execle("path", buf, "arg1", NULL, cleanArray); // expected-warning {{Untrusted data is passed to a system call}} + case 3: return execle("path", "arg0", buf, NULL, cleanArray); // expected-warning {{Untrusted data is passed to a system call}} + case 4: return execle("path", "arg0", "arg1", NULL, buf); // expected-warning {{Untrusted data is passed to a system call}} + case 5: return execle("path", "arg0", "arg1", NULL, taintedArray); // FIXME: We might wanna have a report here. + } + + switch (coin()) { + default: break; + // Execute `file`, searching in the `PATH' environment variable if it + // contains no slashes, with all arguments after `file` until a NULL + // pointer and environment from `environ'. + case 0: return execlp("file", "arg0", "arg1", "arg2", NULL); // no-warning + case 1: return execlp( buf, "arg0", "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}} + case 2: return execlp("file", buf, "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}} + case 3: return execlp("file", "arg0", buf, "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}} + case 4: return execlp("file", "arg0", "arg1", buf, NULL); // expected-warning {{Untrusted data is passed to a system call}} + } + + switch (coin()) { + default: break; + // Execute `path` with arguments `ARGV` and environment from `environ'. + case 0: return execv("path", /*argv=*/ cleanArray); // no-warning + case 1: return execv( buf, /*argv=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}} + case 2: return execv("path", /*argv=*/taintedArray); // FIXME: We might wanna have a report here. + } + + switch (coin()) { + default: break; + // Replace the current process, executing `path` with arguments `ARGV` + // and environment `ENVP`. `ARGV` and `ENVP` are terminated by NULL pointers. + case 0: return execve("path", /*argv=*/ cleanArray, /*envp=*/cleanArray); // no-warning + case 1: return execve( buf, /*argv=*/ cleanArray, /*envp=*/cleanArray); // expected-warning {{Untrusted data is passed to a system call}} + case 2: return execve("path", /*argv=*/taintedArray, /*envp=*/cleanArray); // FIXME: We might wanna have a report here. + case 3: return execve("path", /*argv=*/cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here. + } + + switch (coin()) { + default: break; + // Execute `file`, searching in the `PATH' environment variable if it + // contains no slashes, with arguments `ARGV` and environment from `environ'. + case 0: return execvp("file", /*argv=*/ cleanArray); // no-warning + case 1: return execvp( buf, /*argv=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}} + case 2: return execvp("file", /*argv=*/taintedArray); // FIXME: We might wanna have a report here. + } + + // execvpe + switch (coin()) { + default: break; + // Execute `file`, searching in the `PATH' environment variable if it + // contains no slashes, with arguments `ARGV` and environment `ENVP`. + // `ARGV` and `ENVP` are terminated by NULL pointers. + case 0: return execvpe("file", /*argv=*/ cleanArray, /*envp=*/ cleanArray); // no-warning + case 1: return execvpe( buf, /*argv=*/ cleanArray, /*envp=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}} + case 2: return execvpe("file", /*argv=*/taintedArray, /*envp=*/ cleanArray); // FIXME: We might wanna have a report here. + case 3: return execvpe("file", /*argv=*/ cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here. + } + + int cleanFD = coin(); + int taintedFD; + scanf("%d", &taintedFD); + clang_analyzer_isTainted_int(taintedFD); // expected-warning {{YES}} + + switch (coin()) { + default: break; + // Execute the file `FD` refers to, overlaying the running program image. + // `ARGV` and `ENVP` are passed to the new program, as for `execve'. + case 0: return fexecve( cleanFD, /*argv=*/ cleanArray, /*envp=*/ cleanArray); // no-warning + case 1: return fexecve(taintedFD, /*argv=*/ cleanArray, /*envp=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}} + case 2: return fexecve( cleanFD, /*argv=*/taintedArray, /*envp=*/ cleanArray); // FIXME: We might wanna have a report here. + case 3: return fexecve( cleanFD, /*argv=*/ cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here. + } + + switch (coin()) { + default: break; + // Create a new stream connected to a pipe running the given `command`. + case 0: return pclose(popen("command", /*mode=*/"r")); // no-warning + case 1: return pclose(popen( buf, /*mode=*/"r")); // expected-warning {{Untrusted data is passed to a system call}} + case 2: return pclose(popen("command", /*mode=*/buf)); // 'mode' is not a taint sink. + } + + switch (coin()) { + default: break; + // Execute the given line as a shell command. + case 0: return system("command"); // no-warning + case 1: return system( buf); // expected-warning {{Untrusted data is passed to a system call}} + } + + return 0; +} + void testUnknownFunction(void (*foo)(void)) { foo(); // no-crash } @@ -1107,3 +1296,10 @@ void testProctitle2(char *real_argv[]) { setproctitle_init(1, argv, 0); // expected-warning {{Untrusted data is passed to a user-defined sink}} setproctitle_init(1, real_argv, argv); // expected-warning {{Untrusted data is passed to a user-defined sink}} } + +void testAcceptPropagates() { + int listenSocket = socket(2, 1, 6); + clang_analyzer_isTainted_int(listenSocket); // expected-warning {{YES}} + int acceptSocket = accept(listenSocket, 0, 0); + clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}} +} diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp index 09cd54471948e1a..c907c8f5eeb958b 100644 --- a/clang/test/Analysis/taint-generic.cpp +++ b/clang/test/Analysis/taint-generic.cpp @@ -7,6 +7,12 @@ int scanf(const char*, ...); int mySource1(); int mySource3(); +typedef struct _FILE FILE; +extern "C" { +extern FILE *stdin; +} +int fscanf(FILE *stream, const char *format, ...); + bool isOutOfRange2(const int*); void mySink2(int); @@ -124,3 +130,9 @@ void testConfigurationMemberFunc() { foo.myMemberScanf("%d", &x); Buffer[x] = 1; // expected-warning {{Out of bound memory access }} } + +void testReadingFromStdin(char **p) { + int n; + fscanf(stdin, "%d", &n); + Buffer[n] = 1; // expected-warning {{Out of bound memory access (index is tainted)}} +} </pre> </details> https://github.com/llvm/llvm-project/pull/66074 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits