================ @@ -0,0 +1,295 @@ +//=== MissingTerminatingZeroChecker.cpp -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Check for string arguments passed to C library functions where the +// terminating zero is missing. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "llvm/ADT/BitVector.h" +#include <sstream> + +using namespace clang; +using namespace ento; + +namespace { + +struct StringData { + const MemRegion *StrRegion; + int64_t StrLength; + unsigned int Offset; + const llvm::BitVector *NonNullData; +}; + +class MissingTerminatingZeroChecker + : public Checker<check::Bind, check::PreCall> { +public: + void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + + void initOptions(bool NoDefaultIgnore, StringRef IgnoreList); + +private: + const BugType BT{this, "Missing terminating zero"}; + + using IgnoreEntry = std::pair<int, int>; + /// Functions (identified by name only) to ignore. + /// The entry stores a parameter index, or -1. + llvm::StringMap<IgnoreEntry> FunctionsToIgnore = { + {"stpncpy", {1, -1}}, {"strncat", {1, -1}}, {"strncmp", {0, 1}}, + {"strncpy", {1, -1}}, {"strndup", {0, -1}}, {"strnlen", {0, -1}}, + }; + + bool checkArg(unsigned int ArgI, CheckerContext &C, + const CallEvent &Call) const; + bool getStringData(StringData &DataOut, ProgramStateRef State, + SValBuilder &SVB, const MemRegion *StrReg) const; + ProgramStateRef setStringData(ProgramStateRef State, Loc L, + const llvm::BitVector &NonNullData) const; + void reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C, + const char Msg[]) const; +}; + +} // namespace + +namespace llvm { +template <> struct FoldingSetTrait<llvm::BitVector> { + static inline void Profile(llvm::BitVector X, FoldingSetNodeID &ID) { + ID.AddInteger(X.size()); + for (unsigned int I = 0; I < X.size(); ++I) + ID.AddBoolean(X[I]); + } +}; +} // end namespace llvm + +// Contains for a "string" (character array) region if elements are known to be +// non-zero. The bit vector is indexed by the array element index and is true +// if the element is known to be non-zero. Size of the vector does not +// correspond to the extent of the memory region (can be smaller), the missing +// elements are considered to be false. +// (A value of 'false' means that the string element is zero or unknown.) +REGISTER_MAP_WITH_PROGRAMSTATE(NonNullnessData, const MemRegion *, + llvm::BitVector) ---------------- NagyDonat wrote:
I discussed this architectural question with @dkrupp and @gamesh411 and we concluded that it would be better to improve and use the standard `RegionStore` instead of introducing a new custom state component. We understand that improving `RegionStore` is more difficult than implementing this fresh specialized data structure, but on a long term it would be a nightmare to maintain two (or more if other checkers follow this example) overlapping memory models that each have their own advantages and drawbacks. > If I remember my first idea was to only check the affected (element) regions > and it would be a simple checker. But somehow I did not find a way to iterate > over the array elements (or subregions). I also recall that this limitation exists, but it could and should be addressed by improving the interface of `RegionStore`. > Probably array initialization did not appear in the element regions in all > cases too. Yes, there is a performance optimization that array initialization only initializes the first few elements, but strings initialized by array initialization should be very rare in real-world code, so this shouldn't be a serious limitation. If the current limitation is too restrictive, you can increase it a bit to get a few more initialized elements. > Additionally string copy functions can be more difficult to handle in that > way. But modeling the string and memory copy functions is also needed by the other checkers (that get data from `RegionStore`), so we should model them by updating `RegionStore` even if that's more difficult to implement. (By the way @steakhal do you have any plans/ideas about modeling `strcpy` / `memcpy` etc.? How difficult would it be to model them with a default binding that places a suitable LazyCompoundVal?) https://github.com/llvm/llvm-project/pull/146664 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits