Author: flovent
Date: 2025-07-07T13:46:30+02:00
New Revision: 22357fe33a8a8cc221632e32cb443676f1feeda9

URL: 
https://github.com/llvm/llvm-project/commit/22357fe33a8a8cc221632e32cb443676f1feeda9
DIFF: 
https://github.com/llvm/llvm-project/commit/22357fe33a8a8cc221632e32cb443676f1feeda9.diff

LOG: [analyzer] Avoid unnecessary super region invalidation in `CStringChecker` 
(#146212)

Bounded string functions takes smallest of two values as it's copy size
(`amountCopied` variable in `evalStrcpyCommon`), and it's used to
decided whether this operation will cause out-of-bound access and
invalidate it's super region if it does.

for `strlcat`: `amountCopied = min (size - dstLen - 1 , srcLen)`
for others: `amountCopied = min (srcLen, size)`

Currently when one of two values is unknown or `SValBuilder` can't
decide which one is smaller, `amountCopied` will remain `UnknownVal`,
which will invalidate copy destination's super region unconditionally.

This patch add check to see if one of these two values is definitely
in-bound, if so `amountCopied` has to be in-bound too, because it‘s less
than or equal to them, we can avoid the invalidation of super region and
some related false positives in this situation.

Note: This patch uses `size` as an approximation of `size - dstLen - 1`
in `strlcat` case because currently analyzer doesn't handle complex
expressions like this very well.

Closes #143807.

Added: 
    clang/test/Analysis/cstring-should-not-invalidate.cpp

Modified: 
    clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 4d12fdcec1f1a..31cb150892a5d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -2223,6 +2223,44 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, 
const CallEvent &Call,
         Result = lastElement;
     }
 
+    // For bounded method, amountCopied take the minimum of two values,
+    // for ConcatFnKind::strlcat:
+    // amountCopied = min (size - dstLen - 1 , srcLen)
+    // for others:
+    // amountCopied = min (srcLen, size)
+    // So even if we don't know about amountCopied, as long as one of them will
+    // not cause an out-of-bound access, the whole function's operation will 
not
+    // too, that will avoid invalidating the superRegion of data member in that
+    // situation.
+    bool CouldAccessOutOfBound = true;
+    if (IsBounded && amountCopied.isUnknown()) {
+      auto CouldAccessOutOfBoundForSVal =
+          [&](std::optional<NonLoc> Val) -> bool {
+        if (!Val)
+          return true;
+        return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression),
+                                  Dst.Expression->getType(), *Val,
+                                  C.getASTContext().getSizeType());
+      };
+
+      CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL);
+
+      if (CouldAccessOutOfBound) {
+        // Get the max number of characters to copy.
+        const Expr *LenExpr = Call.getArgExpr(2);
+        SVal LenVal = state->getSVal(LenExpr, LCtx);
+
+        // Protect against misdeclared strncpy().
+        LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType());
+
+        // Because analyzer doesn't handle expressions like `size -
+        // dstLen - 1` very well, we roughly use `size` for
+        // ConcatFnKind::strlcat here, same with other concat kinds.
+        CouldAccessOutOfBound =
+            CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>());
+      }
+    }
+
     // Invalidate the destination (regular invalidation without 
pointer-escaping
     // the address of the top-level region). This must happen before we set the
     // C string length because invalidation will clear the length.
@@ -2230,9 +2268,13 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, 
const CallEvent &Call,
     // can use LazyCompoundVals to copy the source values into the destination.
     // This would probably remove any existing bindings past the end of the
     // string, but that's still an improvement over blank invalidation.
-    state = invalidateDestinationBufferBySize(
-        C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
-        amountCopied, C.getASTContext().getSizeType());
+    if (CouldAccessOutOfBound)
+      state = invalidateDestinationBufferBySize(
+          C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
+          amountCopied, C.getASTContext().getSizeType());
+    else
+      state = invalidateDestinationBufferNeverOverflows(
+          C, state, Call.getCFGElementRef(), *dstRegVal);
 
     // Invalidate the source (const-invalidation without const-pointer-escaping
     // the address of the top-level region).

diff  --git a/clang/test/Analysis/cstring-should-not-invalidate.cpp 
b/clang/test/Analysis/cstring-should-not-invalidate.cpp
new file mode 100644
index 0000000000000..abd4689b2f9b9
--- /dev/null
+++ b/clang/test/Analysis/cstring-should-not-invalidate.cpp
@@ -0,0 +1,132 @@
+// RUN: %clang_analyze_cc1 
-analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection 
-verify %s
+
+typedef decltype(sizeof(int)) size_t;
+void clang_analyzer_eval(bool);
+
+char *strncpy(char *dest, const char *src, size_t x);
+
+constexpr int initB = 100;
+struct Base {
+  int b;
+  Base(): b(initB) {}
+};
+
+// issue 143807
+struct strncpyTestClass: public Base {
+  int *m_ptr;
+  char m_buff[1000];
+
+  void KnownLen(char *src) {
+    m_ptr = new int;
+    strncpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size
+    delete m_ptr;                         // no warning
+  }
+
+  void KnownSrcLen(size_t n) {
+    m_ptr = new int;
+    strncpy(m_buff, "xyz", n); // known src size but unknown len
+    delete m_ptr;              // no warning
+  }
+};
+
+void strncpyTest(char *src, size_t n) {
+  strncpyTestClass rep;
+  rep.KnownLen(src);
+  rep.KnownSrcLen(n);
+  clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
+}
+
+size_t strlcpy(char *dest, const char *src, size_t size);
+
+struct strlcpyTestClass: public Base {
+  int *m_ptr;
+  char m_buff[1000];
+
+  void KnownLen(char *src) {
+    m_ptr = new int;
+    strlcpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size
+    delete m_ptr;                         // no warning
+  }
+
+  void KnownSrcLen(size_t n) {
+    m_ptr = new int;
+    strlcpy(m_buff, "xyz", n); // known src size but unknown len
+    delete m_ptr;              // no warning
+  }
+};
+
+void strlcpyTest(char *src, size_t n) {
+  strlcpyTestClass rep;
+  rep.KnownLen(src);
+  rep.KnownSrcLen(n);
+  clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
+}
+
+char *strncat(char *s1, const char *s2, size_t n);
+
+struct strncatTestClass: public Base {
+  int *m_ptr;
+  char m_buff[1000];
+
+  void KnownLen(char *src) {
+    m_ptr = new int;
+    strncat(m_buff, src, sizeof(m_buff) - 1); // known len but unknown src size
+    delete m_ptr;                         // no warning
+  }
+
+  void KnownSrcLen(size_t n) {
+    m_ptr = new int;
+    strncat(m_buff, "xyz", n); // known src size but unknown len
+    delete m_ptr;              // no warning
+  }
+};
+
+void strncatTest(char *src, size_t n) {
+  strncatTestClass rep;
+  rep.KnownLen(src);
+  rep.KnownSrcLen(n);
+  clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
+}
+
+struct strncatReportOutOfBoundTestClass {
+  int *m_ptr;
+  char m_buff[1000];
+
+  void KnownLen(char *src) {
+    m_ptr = new int;
+    // expected-warning@+1{{String concatenation function overflows the 
destination buffer}}
+    strncat(m_buff, src, sizeof(m_buff)); // known len but unknown src size
+    delete m_ptr;                         // no warning
+  }
+};
+
+void strncatReportOutOfBoundTest(char *src, size_t n) {
+  strncatReportOutOfBoundTestClass rep;
+  rep.KnownLen(src);
+}
+
+size_t strlcat(char *dst, const char *src, size_t size);
+
+struct strlcatTestClass: public Base {
+  int *m_ptr;
+  char m_buff[1000];
+
+  void KnownLen(char *src) {
+    m_ptr = new int;
+    strlcat(m_buff, src, sizeof(m_buff)); // known len but unknown src size
+    delete m_ptr;                         // no warning
+  }
+
+  void KnownSrcLen(size_t n) {
+    m_ptr = new int;
+    strlcat(m_buff, "xyz", n); // known src size but unknown len
+    delete m_ptr;              // no warning
+  }
+};
+
+void strlcatTest(char *src, size_t n) {
+  strlcatTestClass rep;
+  rep.KnownLen(src);
+  rep.KnownSrcLen(n);
+  clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
+}


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to