Author: flovent Date: 2025-07-07T13:46:30+02:00 New Revision: 22357fe33a8a8cc221632e32cb443676f1feeda9
URL: https://github.com/llvm/llvm-project/commit/22357fe33a8a8cc221632e32cb443676f1feeda9 DIFF: https://github.com/llvm/llvm-project/commit/22357fe33a8a8cc221632e32cb443676f1feeda9.diff LOG: [analyzer] Avoid unnecessary super region invalidation in `CStringChecker` (#146212) Bounded string functions takes smallest of two values as it's copy size (`amountCopied` variable in `evalStrcpyCommon`), and it's used to decided whether this operation will cause out-of-bound access and invalidate it's super region if it does. for `strlcat`: `amountCopied = min (size - dstLen - 1 , srcLen)` for others: `amountCopied = min (srcLen, size)` Currently when one of two values is unknown or `SValBuilder` can't decide which one is smaller, `amountCopied` will remain `UnknownVal`, which will invalidate copy destination's super region unconditionally. This patch add check to see if one of these two values is definitely in-bound, if so `amountCopied` has to be in-bound too, because it‘s less than or equal to them, we can avoid the invalidation of super region and some related false positives in this situation. Note: This patch uses `size` as an approximation of `size - dstLen - 1` in `strlcat` case because currently analyzer doesn't handle complex expressions like this very well. Closes #143807. Added: clang/test/Analysis/cstring-should-not-invalidate.cpp Modified: clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp Removed: ################################################################################ diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 4d12fdcec1f1a..31cb150892a5d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -2223,6 +2223,44 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, Result = lastElement; } + // For bounded method, amountCopied take the minimum of two values, + // for ConcatFnKind::strlcat: + // amountCopied = min (size - dstLen - 1 , srcLen) + // for others: + // amountCopied = min (srcLen, size) + // So even if we don't know about amountCopied, as long as one of them will + // not cause an out-of-bound access, the whole function's operation will not + // too, that will avoid invalidating the superRegion of data member in that + // situation. + bool CouldAccessOutOfBound = true; + if (IsBounded && amountCopied.isUnknown()) { + auto CouldAccessOutOfBoundForSVal = + [&](std::optional<NonLoc> Val) -> bool { + if (!Val) + return true; + return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression), + Dst.Expression->getType(), *Val, + C.getASTContext().getSizeType()); + }; + + CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL); + + if (CouldAccessOutOfBound) { + // Get the max number of characters to copy. + const Expr *LenExpr = Call.getArgExpr(2); + SVal LenVal = state->getSVal(LenExpr, LCtx); + + // Protect against misdeclared strncpy(). + LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType()); + + // Because analyzer doesn't handle expressions like `size - + // dstLen - 1` very well, we roughly use `size` for + // ConcatFnKind::strlcat here, same with other concat kinds. + CouldAccessOutOfBound = + CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>()); + } + } + // Invalidate the destination (regular invalidation without pointer-escaping // the address of the top-level region). This must happen before we set the // C string length because invalidation will clear the length. @@ -2230,9 +2268,13 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, // can use LazyCompoundVals to copy the source values into the destination. // This would probably remove any existing bindings past the end of the // string, but that's still an improvement over blank invalidation. - state = invalidateDestinationBufferBySize( - C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal, - amountCopied, C.getASTContext().getSizeType()); + if (CouldAccessOutOfBound) + state = invalidateDestinationBufferBySize( + C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal, + amountCopied, C.getASTContext().getSizeType()); + else + state = invalidateDestinationBufferNeverOverflows( + C, state, Call.getCFGElementRef(), *dstRegVal); // Invalidate the source (const-invalidation without const-pointer-escaping // the address of the top-level region). diff --git a/clang/test/Analysis/cstring-should-not-invalidate.cpp b/clang/test/Analysis/cstring-should-not-invalidate.cpp new file mode 100644 index 0000000000000..abd4689b2f9b9 --- /dev/null +++ b/clang/test/Analysis/cstring-should-not-invalidate.cpp @@ -0,0 +1,132 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -verify %s + +typedef decltype(sizeof(int)) size_t; +void clang_analyzer_eval(bool); + +char *strncpy(char *dest, const char *src, size_t x); + +constexpr int initB = 100; +struct Base { + int b; + Base(): b(initB) {} +}; + +// issue 143807 +struct strncpyTestClass: public Base { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strncpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strncpy(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strncpyTest(char *src, size_t n) { + strncpyTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); + clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}} +} + +size_t strlcpy(char *dest, const char *src, size_t size); + +struct strlcpyTestClass: public Base { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strlcpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strlcpy(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strlcpyTest(char *src, size_t n) { + strlcpyTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); + clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}} +} + +char *strncat(char *s1, const char *s2, size_t n); + +struct strncatTestClass: public Base { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strncat(m_buff, src, sizeof(m_buff) - 1); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strncat(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strncatTest(char *src, size_t n) { + strncatTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); + clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}} +} + +struct strncatReportOutOfBoundTestClass { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + // expected-warning@+1{{String concatenation function overflows the destination buffer}} + strncat(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } +}; + +void strncatReportOutOfBoundTest(char *src, size_t n) { + strncatReportOutOfBoundTestClass rep; + rep.KnownLen(src); +} + +size_t strlcat(char *dst, const char *src, size_t size); + +struct strlcatTestClass: public Base { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strlcat(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strlcat(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strlcatTest(char *src, size_t n) { + strlcatTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); + clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}} +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits