================ @@ -572,196 +570,236 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { std::vector<std::pair<CallDescription, GenericTaintRule>>; using TR = GenericTaintRule; - const Builtin::Context &BI = C.getASTContext().BuiltinInfo; - RulesConstructionTy GlobalCRules{ // Sources - {{{"fdopen"}}, TR::Source({{ReturnValueIndex}})}, - {{{"fopen"}}, TR::Source({{ReturnValueIndex}})}, - {{{"freopen"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getch"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getchar"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})}, - {{{"gets"}}, TR::Source({{0}, ReturnValueIndex})}, - {{{"gets_s"}}, TR::Source({{0}, ReturnValueIndex})}, - {{{"scanf"}}, TR::Source({{}, 1})}, - {{{"scanf_s"}}, TR::Source({{}, {1}})}, - {{{"wgetch"}}, TR::Source({{}, ReturnValueIndex})}, + {{CDM::CLibrary, {"fdopen"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fopen"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"freopen"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getch"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getchar"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"gets"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"gets_s"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"scanf"}}, TR::Source({{}, 1})}, + {{CDM::CLibrary, {"scanf_s"}}, TR::Source({{}, 1})}, + {{CDM::CLibrary, {"wgetch"}}, TR::Source({{ReturnValueIndex}})}, // Sometimes the line between taint sources and propagators is blurry. // _IO_getc is choosen to be a source, but could also be a propagator. // This way it is simpler, as modeling it as a propagator would require // to model the possible sources of _IO_FILE * values, which the _IO_getc // function takes as parameters. - {{{"_IO_getc"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getcwd"}}, TR::Source({{0, ReturnValueIndex}})}, - {{{"getwd"}}, TR::Source({{0, ReturnValueIndex}})}, - {{{"readlink"}}, TR::Source({{1, ReturnValueIndex}})}, - {{{"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})}, - {{{"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})}, - {{{"gethostname"}}, TR::Source({{0}})}, - {{{"getnameinfo"}}, TR::Source({{2, 4}})}, - {{{"getseuserbyname"}}, TR::Source({{1, 2}})}, - {{{"getgroups"}}, TR::Source({{1, ReturnValueIndex}})}, - {{{"getlogin"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getlogin_r"}}, TR::Source({{0}})}, + {{CDM::CLibrary, {"_IO_getc"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getcwd"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"getwd"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"readlink"}}, TR::Source({{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})}, + {{CDM::CLibrary, {"get_current_dir_name"}}, + TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"gethostname"}}, TR::Source({{0}})}, + {{CDM::CLibrary, {"getnameinfo"}}, TR::Source({{2, 4}})}, + {{CDM::CLibrary, {"getseuserbyname"}}, TR::Source({{1, 2}})}, + {{CDM::CLibrary, {"getgroups"}}, TR::Source({{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"getlogin"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getlogin_r"}}, TR::Source({{0}})}, // Props - {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, - {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, - {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})}, - {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})}, - {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})}, - - {{{"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"getdelim"}}, TR::Prop({{3}}, {{0}})}, - {{{"getline"}}, TR::Prop({{2}}, {{0}})}, - {{{"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, - {{{"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, - {{{"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})}, - {{{"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - {{{"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - - {{{"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - - {{{"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})}, - {{{"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{{"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{{"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{{"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - // If memmem was called with a tainted needle and the search was - // successful, that would mean that the value pointed by the return value - // has the same content as the needle. If we choose to go by the policy of - // content equivalence implies taintedness equivalence, that would mean - // haystack should be considered a propagation source argument. - {{{"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - // The comment for memmem above also applies to strstr. - {{{"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"fgets"}}, + TR::Prop({{2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"fgetws"}}, + TR::Prop({{2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"fscanf"}}, TR::Prop({{0}}, {{}, 2})}, + {{CDM::CLibrary, {"fscanf_s"}}, TR::Prop({{0}}, {{}, 2})}, + {{CDM::CLibrary, {"sscanf"}}, TR::Prop({{0}}, {{}, 2})}, + {{CDM::CLibrary, {"sscanf_s"}}, TR::Prop({{0}}, {{}, 2})}, + + {{CDM::CLibrary, {"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getc_unlocked"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getdelim"}}, TR::Prop({{3}}, {{0}})}, + // TODO: this intends to match the C function `getline()`, but the call + // description also matches the C++ function `std::getline()`; it should + // be ruled out by some additional logic. + {{CDM::CLibrary, {"getline"}}, TR::Prop({{2}}, {{0}})}, ---------------- NagyDonat wrote:
Right now the propagation rule `TR::Prop({{2}}, {{0}})`, which propagates taint from the third argument (index 2) to the first argument (index 0), is applied to both the C-style plain `getline` and the C++ `std::getline`. However these are completely different functions: ``` ssize_t getline(char **restrict lineptr, size_t *restrict n, FILE *restrict stream); ``` and ``` template< class CharT, class Traits, class Allocator > std::basic_istream<CharT, Traits>& getline( std::basic_istream<CharT, Traits>&& input, std::basic_string<CharT, Traits, Allocator>& str, CharT delim ); // there is also a 2-argument overload without 'delim' ``` so the propagation behavior is nonsense for the C++ function `std::getline`. Note that the matching mode `CDM::CLibrary` accepts both functions declared at TU scope and functions declared within the namespace `std`, because there are many functions for which this is the right thing to do (under C they appear at TU scope, but the C++ headers declare the same function within `std`). The functions named `getline` are an unfortunate exception, where the separate standard revisions of C and C++ used the same name with very different semantics. https://github.com/llvm/llvm-project/pull/91635 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits