Author: Donát Nagy Date: 2023-05-03T18:52:27+02:00 New Revision: 8c22cbea87beb74da3dc5891c40cdf574cd5fe56
URL: https://github.com/llvm/llvm-project/commit/8c22cbea87beb74da3dc5891c40cdf574cd5fe56 DIFF: https://github.com/llvm/llvm-project/commit/8c22cbea87beb74da3dc5891c40cdf574cd5fe56.diff LOG: [analyzer] ArrayBoundCheckerV2: suppress false positives from ctype macros The checker alpha.security.ArrayBoundV2 created bug reports in situations when the (tainted) result of fgetc() or getchar() was passed to one of the isXXXXX() macros from ctype.h. This is a common input handling pattern (within the limited toolbox of the C language) and several open source projects contained code where it led to false positive reports; so this commit suppresses ArrayBoundV2 reports generated within the isXXXXX() macros. Note that here even true positive reports would be difficult to understand, as they'd refer to the implementation details of these macros. Differential Revision: https://reviews.llvm.org/D149460 Added: Modified: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp clang/test/Analysis/taint-generic.c Removed: ################################################################################ diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index a476613b6dcc7..6d0cc505756b8 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -42,8 +42,10 @@ class ArrayBoundCheckerV2 : void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState, SVal TaintedSVal) const; + static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); + public: - void checkLocation(SVal l, bool isLoad, const Stmt*S, + void checkLocation(SVal l, bool isLoad, const Stmt *S, CheckerContext &C) const; }; @@ -155,6 +157,15 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, // memory access is within the extent of the base region. Since we // have some flexibility in defining the base region, we can achieve // various levels of conservatism in our buffer overflow checking. + + // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as + // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) + // and incomplete analysis of these leads to false positives. As even + // accurate reports would be confusing for the users, just disable reports + // from these macros: + if (isFromCtypeMacro(LoadS, checkerContext.getASTContext())) + return; + ProgramStateRef state = checkerContext.getState(); SValBuilder &svalBuilder = checkerContext.getSValBuilder(); @@ -267,6 +278,25 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext, checkerContext.emitReport(std::move(BR)); } +bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { + SourceLocation Loc = S->getBeginLoc(); + if (!Loc.isMacroID()) + return false; + + StringRef MacroName = Lexer::getImmediateMacroName( + Loc, ACtx.getSourceManager(), ACtx.getLangOpts()); + + if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's') + return false; + + return ((MacroName == "isalnum") || (MacroName == "isalpha") || + (MacroName == "isblank") || (MacroName == "isdigit") || + (MacroName == "isgraph") || (MacroName == "islower") || + (MacroName == "isnctrl") || (MacroName == "isprint") || + (MacroName == "ispunct") || (MacroName == "isspace") || + (MacroName == "isupper") || (MacroName == "isxdigit")); +} + #ifndef NDEBUG LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const { dumpToStream(llvm::errs()); diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index 626e01e39d158..62e1f570b6622 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -156,6 +156,28 @@ void bufferGetchar(int x) { Buffer[m] = 1; //expected-warning {{Out of bound memory access (index is tainted)}} } +extern const unsigned short int **__ctype_b_loc (void); +enum { _ISdigit = 2048 }; +# define isdigit(c) ((*__ctype_b_loc ())[(int) (c)] & (unsigned short int) _ISdigit) + +int isdigitImplFalsePositive(void) { + // If this code no longer produces a bug report, then consider removing the + // special case that disables buffer overflow reports coming from the isXXXXX + // macros in ctypes.h. + int c = getchar(); + return ((*__ctype_b_loc ())[(int) (c)] & (unsigned short int) _ISdigit); + //expected-warning@-1 {{Out of bound memory access (index is tainted)}} +} + +int isdigitSuppressed(void) { + // Same code as above, but reports are suppressed based on macro name: + int c = getchar(); + return isdigit(c); //no-warning +} + +// Some later tests use isdigit as a function, so we need to undef it: +#undef isdigit + void testUncontrolledFormatString(char **p) { char s[80]; fscanf(stdin, "%s", s); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
