================
@@ -0,0 +1,295 @@
+//=== MissingTerminatingZeroChecker.cpp -------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Check for string arguments passed to C library functions where the
+// terminating zero is missing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "llvm/ADT/BitVector.h"
+#include <sstream>
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+
+struct StringData {
+  const MemRegion *StrRegion;
+  int64_t StrLength;
+  unsigned int Offset;
+  const llvm::BitVector *NonNullData;
+};
+
+class MissingTerminatingZeroChecker
+    : public Checker<check::Bind, check::PreCall> {
+public:
+  void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const;
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+
+  void initOptions(bool NoDefaultIgnore, StringRef IgnoreList);
+
+private:
+  const BugType BT{this, "Missing terminating zero"};
+
+  using IgnoreEntry = std::pair<int, int>;
+  /// Functions (identified by name only) to ignore.
+  /// The entry stores a parameter index, or -1.
+  llvm::StringMap<IgnoreEntry> FunctionsToIgnore = {
+      {"stpncpy", {1, -1}}, {"strncat", {1, -1}}, {"strncmp", {0, 1}},
+      {"strncpy", {1, -1}}, {"strndup", {0, -1}}, {"strnlen", {0, -1}},
+  };
+
+  bool checkArg(unsigned int ArgI, CheckerContext &C,
+                const CallEvent &Call) const;
+  bool getStringData(StringData &DataOut, ProgramStateRef State,
+                     SValBuilder &SVB, const MemRegion *StrReg) const;
+  ProgramStateRef setStringData(ProgramStateRef State, Loc L,
+                                const llvm::BitVector &NonNullData) const;
+  void reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C,
+                 const char Msg[]) const;
+};
+
+} // namespace
+
+namespace llvm {
+template <> struct FoldingSetTrait<llvm::BitVector> {
+  static inline void Profile(llvm::BitVector X, FoldingSetNodeID &ID) {
+    ID.AddInteger(X.size());
+    for (unsigned int I = 0; I < X.size(); ++I)
+      ID.AddBoolean(X[I]);
+  }
+};
+} // end namespace llvm
+
+// Contains for a "string" (character array) region if elements are known to be
+// non-zero. The bit vector is indexed by the array element index and is true
+// if the element is known to be non-zero. Size of the vector does not
+// correspond to the extent of the memory region (can be smaller), the missing
+// elements are considered to be false.
+// (A value of 'false' means that the string element is zero or unknown.)
+REGISTER_MAP_WITH_PROGRAMSTATE(NonNullnessData, const MemRegion *,
+                               llvm::BitVector)
----------------
NagyDonat wrote:

I discussed this architectural question with @dkrupp and @gamesh411 and we 
concluded that it would be better to improve and use the standard `RegionStore` 
instead of introducing a new custom state component.

We understand that improving `RegionStore` is more difficult than implementing 
this fresh specialized data structure, but on a long term it would be a 
nightmare to maintain two (or more if other checkers follow this example) 
overlapping memory models that each have their own advantages and drawbacks.

> If I remember my first idea was to only check the affected (element) regions 
> and it would be a simple checker. But somehow I did not find a way to iterate 
> over the array elements (or subregions).

I also recall that this limitation exists, but it could and should be addressed 
by improving the interface of `RegionStore`.

> Probably array initialization did not appear in the element regions in all 
> cases too.

Yes, there is a performance optimization that array initialization only 
initializes the first few elements, but strings initialized by array 
initialization should be very rare in real-world code, so this shouldn't be a 
serious limitation. If the current limitation is too restrictive, you can 
increase it a bit to get a few more initialized elements.

> Additionally string copy functions can be more difficult to handle in that 
> way.

But modeling the string and memory copy functions is also needed by the other 
checkers (that get data from `RegionStore`), so we should model them by 
updating `RegionStore` even if that's more difficult to implement. (By the way 
@steakhal do you have any plans/ideas about modeling `strcpy` / `memcpy` etc.? 
How difficult would it be to model them with a default binding that places a 
suitable LazyCompoundVal?)

https://github.com/llvm/llvm-project/pull/146664
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to