boga95 created this revision. boga95 added reviewers: Szelethus, xazax.hun, dkrupp, NoQ. Herald added subscribers: cfe-commits, Charusso, donat.nagy, mikhail.ramalho, a.sidorin, rnkovacs, szepet, baloghadamsoftware, whisperity. Herald added a project: clang.
Parse the yaml configuration file and store it in static variables. The user can define taint propagation rules, custom sink, and filter functions. E.g: # A list of source/propagation function Propagations: # int x = mySource1(); // x is tainted - Name: mySource1 DstArgs: [4294967294] # Index for return value # int x; # mySource2(&x); // x is tainted - Name: mySource2 DstArgs: [0] # int x, y; # myScanf("%d %d", &x, &y); // x and y are tainted - Name: myScanf VarType: Dst VarIndex: 1 # int x; // x is tainted # int y; # myPropagator(x, &y); // y is tainted - Name: myPropagator SrcArgs: [0] DstArgs: [1] # const unsigned size = 100; # char buf[size]; # int x, y; # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted # // the return value and the buf will be tainted - Name: mySnprintf SrcArgs: [1] DstArgs: [0, 4294967294] VarType: Src VarIndex: 3 # A list of filter functions Filters: # int x; // x is tainted # myFilter(&x); // x is not tainted anymore - Name: myFilter Args: [0] # A list of sink functions Sinks: # int x, y; // x and y are tainted # mySink(x, 0, 1); // It will warn # mySink(0, 1, y); // It will warn # mySink(0, x, 1); // It won't warn - Name: mySink Args: [0, 2] Repository: rC Clang https://reviews.llvm.org/D59555 Files: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -13,14 +13,16 @@ // aggressively, even if the involved symbols are under constrained. // //===----------------------------------------------------------------------===// -#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/AST/Attr.h" #include "clang/Basic/Builtins.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/YAMLTraits.h" #include <climits> #include <initializer_list> #include <utility> @@ -41,11 +43,38 @@ void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; -private: + using ArgVector = SmallVector<unsigned, 2>; + + enum class VariadicType { None, Src, Dst }; + + /// The ``TaintConfiguration`` is used to parse configuration file. + struct TaintConfiguration { + using NameArgsPair = std::pair<std::string, ArgVector>; + + struct Propagation { + std::string Name; + ArgVector SrcArgs; + ArgVector DstArgs; + VariadicType VarType; + unsigned VarIndex; + }; + + std::vector<Propagation> Propagations; + std::vector<NameArgsPair> Filters; + std::vector<NameArgsPair> Sinks; + }; + + /// Get and read the config file. + static void getConfiguration(StringRef ConfigFile); + + /// Parse the config. + static void parseConfiguration(TaintConfiguration &&Config); + static const unsigned InvalidArgIndex = UINT_MAX; /// Denotes the return vale. static const unsigned ReturnValueIndex = UINT_MAX - 1; +private: mutable std::unique_ptr<BugType> BT; void initBugType() const { if (!BT) @@ -91,8 +120,6 @@ bool generateReportIfTainted(const Expr *E, const char Msg[], CheckerContext &C) const; - using ArgVector = SmallVector<unsigned, 2>; - /// A struct used to specify taint propagation rules for a function. /// /// If any of the possible taint source arguments is tainted, all of the @@ -103,8 +130,6 @@ /// ReturnValueIndex is added to the dst list, the return value will be /// tainted. struct TaintPropagationRule { - enum class VariadicType { None, Src, Dst }; - using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, CheckerContext &C); @@ -125,8 +150,7 @@ : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), PropagationFunc(nullptr) {} - TaintPropagationRule(std::initializer_list<unsigned> &&Src, - std::initializer_list<unsigned> &&Dst, + TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, VariadicType Var = VariadicType::None, unsigned VarIndex = InvalidArgIndex, PropagationFuncType Func = nullptr) @@ -170,6 +194,19 @@ static bool postSocket(bool IsTainted, const CallExpr *CE, CheckerContext &C); }; + + using NameRuleMap = llvm::StringMap<TaintPropagationRule>; + using NameArgMap = llvm::StringMap<ArgVector>; + + /// Defines a map between the propagation function's name and + /// TaintPropagationRule. + static NameRuleMap CustomPropagations; + + /// Defines a map between the filter function's name and filtering args. + static NameArgMap CustomFilters; + + /// Defines a map between the sink function's name and sinking args. + static NameArgMap CustomSinks; }; const unsigned GenericTaintChecker::ReturnValueIndex; @@ -188,14 +225,105 @@ "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " "for character data and the null terminator)"; +GenericTaintChecker::NameRuleMap GenericTaintChecker::CustomPropagations; + +GenericTaintChecker::NameArgMap GenericTaintChecker::CustomFilters; + +GenericTaintChecker::NameArgMap GenericTaintChecker::CustomSinks; } // end of anonymous namespace +using TaintConfig = GenericTaintChecker::TaintConfiguration; + +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair) + +namespace llvm { +namespace yaml { +template <> struct MappingTraits<TaintConfig> { + static void mapping(IO &IO, TaintConfig &Config) { + IO.mapOptional("Propagations", Config.Propagations); + IO.mapOptional("Filters", Config.Filters); + IO.mapOptional("Sinks", Config.Sinks); + } +}; + +template <> struct MappingTraits<TaintConfig::Propagation> { + static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { + IO.mapRequired("Name", Propagation.Name); + IO.mapOptional("SrcArgs", Propagation.SrcArgs); + IO.mapRequired("DstArgs", Propagation.DstArgs); + IO.mapOptional("VarType", Propagation.VarType, + GenericTaintChecker::VariadicType::None); + IO.mapOptional("VarIndex", Propagation.VarIndex, + GenericTaintChecker::InvalidArgIndex); + } +}; + +template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { + static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { + IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); + IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); + IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); + } +}; + +template <> struct MappingTraits<TaintConfig::NameArgsPair> { + static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) { + IO.mapRequired("Name", NameArg.first); + IO.mapRequired("Args", NameArg.second); + } +}; +} // namespace yaml +} // namespace llvm + /// A set which is used to pass information from call pre-visit instruction /// to the call post-visit. The values are unsigned integers, which are either /// ReturnValueIndex, or indexes of the pointer/reference argument, which /// points to data, which should be tainted on return. REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) +void GenericTaintChecker::getConfiguration(StringRef ConfigFile) { + if (ConfigFile.trim().empty()) + return; + + llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get(); + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer = + FS->getBufferForFile(ConfigFile.str()); + + if (std::error_code ec = Buffer.getError()) { + llvm::errs() << "Error when getting TaintPropagation's config file '" + << ConfigFile << "': " << ec.message() << '\n'; + return; + } + + llvm::yaml::Input Input(Buffer.get()->getBuffer()); + TaintConfiguration Config; + Input >> Config; + + if (std::error_code ec = Input.error()) { + return; + } + + parseConfiguration(std::move(Config)); +} + +void GenericTaintChecker::parseConfiguration(TaintConfiguration &&Config) { + for (auto &P : Config.Propagations) { + GenericTaintChecker::CustomPropagations.try_emplace( + P.Name, std::move(P.SrcArgs), std::move(P.DstArgs), P.VarType, + P.VarIndex); + } + + for (auto &F : Config.Filters) { + GenericTaintChecker::CustomFilters.try_emplace(F.first, + std::move(F.second)); + } + + for (auto &S : Config.Sinks) { + GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second)); + } +} + GenericTaintChecker::TaintPropagationRule GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) { @@ -212,7 +340,8 @@ .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) - .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getchar_unlocked", + TaintPropagationRule({}, {ReturnValueIndex})) .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) @@ -443,7 +572,7 @@ // Check for taint in variadic arguments. if (!IsTainted && VariadicType::Src == VarType) { // Check if any of the arguments is tainted - for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) break; } @@ -474,7 +603,7 @@ // If they are not pointing to const data, mark data as tainted. // TODO: So far we are just going one level down; ideally we'd need to // recurse here. - for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { const Expr *Arg = CE->getArg(i); // Process pointer argument. const Type *ArgTy = Arg->getType().getTypePtr(); @@ -539,7 +668,7 @@ static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, - unsigned int &ArgNum) { + unsigned &ArgNum) { // Find if the function contains a format string argument. // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, // vsnprintf, syslog, custom annotated functions. @@ -592,7 +721,7 @@ bool GenericTaintChecker::checkUncontrolledFormatString( const CallExpr *CE, CheckerContext &C) const { // Check if the function contains a format string argument. - unsigned int ArgNum = 0; + unsigned ArgNum = 0; if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) return false; @@ -666,7 +795,10 @@ } void ento::registerGenericTaintChecker(CheckerManager &mgr) { - mgr.registerChecker<GenericTaintChecker>(); + const auto *Checker = mgr.registerChecker<GenericTaintChecker>(); + StringRef ConfigFile = + mgr.getAnalyzerOptions().getCheckerStringOption(Checker, "Config", ""); + GenericTaintChecker::getConfiguration(ConfigFile); } bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits