https://github.com/flovent updated https://github.com/llvm/llvm-project/pull/146212
>From 9da53c788fc01cd3fc2dd4c178b836035b5d380b Mon Sep 17 00:00:00 2001 From: flovent <flb...@protonmail.com> Date: Sat, 28 Jun 2025 20:58:43 +0800 Subject: [PATCH 1/5] [analyzer] Avoid unnecessary super region invalidation in `CStringChecker` --- .../Checkers/CStringChecker.cpp | 77 ++++++++++++- .../cstring-should-not-invalidate.cpp | 107 ++++++++++++++++++ 2 files changed, 178 insertions(+), 6 deletions(-) create mode 100644 clang/test/Analysis/cstring-should-not-invalidate.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 4d12fdcec1f1a..433fd2ce5f292 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -272,7 +272,8 @@ class CStringChecker : public Checker< eval::Call, static ProgramStateRef invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S, const Expr *BufE, ConstCFGElementRef Elem, - SVal BufV, SVal SizeV, QualType SizeTy); + SVal BufV, SVal SizeV, QualType SizeTy, + bool CouldAccessOutOfBound = true); /// Operation never overflows, do not invalidate the super region. static ProgramStateRef invalidateDestinationBufferNeverOverflows( @@ -1211,14 +1212,17 @@ bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State, ProgramStateRef CStringChecker::invalidateDestinationBufferBySize( CheckerContext &C, ProgramStateRef S, const Expr *BufE, - ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy) { + ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy, + bool CouldAccessOutOfBound) { auto InvalidationTraitOperations = - [&C, S, BufTy = BufE->getType(), BufV, SizeV, - SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { + [&C, S, BufTy = BufE->getType(), BufV, SizeV, SizeTy, + CouldAccessOutOfBound](RegionAndSymbolInvalidationTraits &ITraits, + const MemRegion *R) { // If destination buffer is a field region and access is in bound, do // not invalidate its super region. if (MemRegion::FieldRegionKind == R->getKind() && - isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) { + (!CouldAccessOutOfBound || + isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy))) { ITraits.setTrait( R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); @@ -2223,6 +2227,67 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, Result = lastElement; } + // For bounded method, amountCopied take the minimum of two values, + // for ConcatFnKind::strlcat: + // amountCopied = min (size - dstLen - 1 , srcLen) + // for others: + // amountCopied = min (srcLen, size) + // So even if we don't know about amountCopied, as long as one of them will + // not cause an out-of-bound access, the whole function's operation will not + // too, that will avoid invalidating the superRegion of data member in that + // situation. + bool CouldAccessOutOfBound = true; + if (IsBounded && amountCopied.isUnknown()) { + // Get the max number of characters to copy. + SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}}; + SVal lenVal = state->getSVal(lenExpr.Expression, LCtx); + + // Protect against misdeclared strncpy(). + lenVal = + svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType()); + + std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); + + auto CouldAccessOutOfBoundForSVal = [&](NonLoc Val) -> bool { + return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression), + Dst.Expression->getType(), Val, + C.getASTContext().getSizeType()); + }; + + if (strLengthNL) { + CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*strLengthNL); + } + + if (CouldAccessOutOfBound && lenValNL) { + switch (appendK) { + case ConcatFnKind::none: + case ConcatFnKind::strcat: { + CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*lenValNL); + break; + } + case ConcatFnKind::strlcat: { + if (!dstStrLengthNL) + break; + + SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, + *dstStrLengthNL, sizeTy); + if (!isa<NonLoc>(freeSpace)) + break; + + freeSpace = + svalBuilder.evalBinOp(state, BO_Sub, freeSpace, + svalBuilder.makeIntVal(1, sizeTy), sizeTy); + std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>(); + if (!freeSpaceNL) + break; + + CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*freeSpaceNL); + break; + } + } + } + } + // Invalidate the destination (regular invalidation without pointer-escaping // the address of the top-level region). This must happen before we set the // C string length because invalidation will clear the length. @@ -2232,7 +2297,7 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, // string, but that's still an improvement over blank invalidation. state = invalidateDestinationBufferBySize( C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal, - amountCopied, C.getASTContext().getSizeType()); + amountCopied, C.getASTContext().getSizeType(), CouldAccessOutOfBound); // Invalidate the source (const-invalidation without const-pointer-escaping // the address of the top-level region). diff --git a/clang/test/Analysis/cstring-should-not-invalidate.cpp b/clang/test/Analysis/cstring-should-not-invalidate.cpp new file mode 100644 index 0000000000000..14c92447c52ca --- /dev/null +++ b/clang/test/Analysis/cstring-should-not-invalidate.cpp @@ -0,0 +1,107 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection +// -analyzer-config c++-inlining=constructors -verify %s + +// expected-no-diagnostics + +typedef unsigned int size_t; + +char *strncpy(char *dest, const char *src, size_t x); + +// issue 143807 +struct strncpyTestClass { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strncpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strncpy(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strncpyTest(char *src, size_t n) { + strncpyTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); +} + +size_t strlcpy(char *dest, const char *src, size_t size); + +struct strlcpyTestClass { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strlcpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strlcpy(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strlcpyTest(char *src, size_t n) { + strlcpyTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); +} + +char *strncat(char *s1, const char *s2, size_t n); + +struct strncatTestClass { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strncat(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strncat(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strncatTest(char *src, size_t n) { + strncatTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); +} + +size_t strlcat(char *dst, const char *src, size_t size); + +struct strlcatTestClass { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strlcat(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strlcat(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strlcatTest(char *src, size_t n) { + strlcatTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); +} >From 60e75d062e4753be437bb923a063c2c85efe9cba Mon Sep 17 00:00:00 2001 From: flovent <flb...@protonmail.com> Date: Mon, 30 Jun 2025 22:54:25 +0800 Subject: [PATCH 2/5] address comment for testcase --- clang/test/Analysis/cstring-should-not-invalidate.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/test/Analysis/cstring-should-not-invalidate.cpp b/clang/test/Analysis/cstring-should-not-invalidate.cpp index 14c92447c52ca..f0c5e5fc3c490 100644 --- a/clang/test/Analysis/cstring-should-not-invalidate.cpp +++ b/clang/test/Analysis/cstring-should-not-invalidate.cpp @@ -1,9 +1,8 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -// -analyzer-config c++-inlining=constructors -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s // expected-no-diagnostics -typedef unsigned int size_t; +typedef decltype(sizeof(int)) size_t; char *strncpy(char *dest, const char *src, size_t x); >From 22d662b0eab60de987b3c12ae6475dfb46bc9efb Mon Sep 17 00:00:00 2001 From: flovent <flb...@protonmail.com> Date: Mon, 30 Jun 2025 23:20:14 +0800 Subject: [PATCH 3/5] refactor: use `invalidateDestinationBufferNeverOverflows` --- .../Checkers/CStringChecker.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 433fd2ce5f292..04c9a5957bf93 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -272,8 +272,7 @@ class CStringChecker : public Checker< eval::Call, static ProgramStateRef invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S, const Expr *BufE, ConstCFGElementRef Elem, - SVal BufV, SVal SizeV, QualType SizeTy, - bool CouldAccessOutOfBound = true); + SVal BufV, SVal SizeV, QualType SizeTy); /// Operation never overflows, do not invalidate the super region. static ProgramStateRef invalidateDestinationBufferNeverOverflows( @@ -1212,17 +1211,14 @@ bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State, ProgramStateRef CStringChecker::invalidateDestinationBufferBySize( CheckerContext &C, ProgramStateRef S, const Expr *BufE, - ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy, - bool CouldAccessOutOfBound) { + ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy) { auto InvalidationTraitOperations = - [&C, S, BufTy = BufE->getType(), BufV, SizeV, SizeTy, - CouldAccessOutOfBound](RegionAndSymbolInvalidationTraits &ITraits, - const MemRegion *R) { + [&C, S, BufTy = BufE->getType(), BufV, SizeV, + SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { // If destination buffer is a field region and access is in bound, do // not invalidate its super region. if (MemRegion::FieldRegionKind == R->getKind() && - (!CouldAccessOutOfBound || - isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy))) { + isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) { ITraits.setTrait( R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); @@ -2295,9 +2291,13 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, // can use LazyCompoundVals to copy the source values into the destination. // This would probably remove any existing bindings past the end of the // string, but that's still an improvement over blank invalidation. - state = invalidateDestinationBufferBySize( - C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal, - amountCopied, C.getASTContext().getSizeType(), CouldAccessOutOfBound); + if (CouldAccessOutOfBound) + state = invalidateDestinationBufferBySize( + C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal, + amountCopied, C.getASTContext().getSizeType()); + else + state = invalidateDestinationBufferNeverOverflows( + C, state, Call.getCFGElementRef(), *dstRegVal); // Invalidate the source (const-invalidation without const-pointer-escaping // the address of the top-level region). >From 1926e7ece429a28c3fa0a8b301582184b3d792f9 Mon Sep 17 00:00:00 2001 From: flovent <flb...@protonmail.com> Date: Wed, 2 Jul 2025 23:18:17 +0800 Subject: [PATCH 4/5] use UpperCamelCase for variables and remove SizeArgExpr usage --- .../Checkers/CStringChecker.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 04c9a5957bf93..649fb6014ef6d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -2235,14 +2235,14 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, bool CouldAccessOutOfBound = true; if (IsBounded && amountCopied.isUnknown()) { // Get the max number of characters to copy. - SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}}; - SVal lenVal = state->getSVal(lenExpr.Expression, LCtx); + const Expr *LenExpr = Call.getArgExpr(2); + SVal LenVal = state->getSVal(LenExpr, LCtx); // Protect against misdeclared strncpy(). - lenVal = - svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType()); + LenVal = + svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType()); - std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); + std::optional<NonLoc> LenValNL = LenVal.getAs<NonLoc>(); auto CouldAccessOutOfBoundForSVal = [&](NonLoc Val) -> bool { return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression), @@ -2254,30 +2254,30 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*strLengthNL); } - if (CouldAccessOutOfBound && lenValNL) { + if (CouldAccessOutOfBound && LenValNL) { switch (appendK) { case ConcatFnKind::none: case ConcatFnKind::strcat: { - CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*lenValNL); + CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*LenValNL); break; } case ConcatFnKind::strlcat: { if (!dstStrLengthNL) break; - SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, + SVal FreeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *LenValNL, *dstStrLengthNL, sizeTy); - if (!isa<NonLoc>(freeSpace)) + if (!isa<NonLoc>(FreeSpace)) break; - freeSpace = - svalBuilder.evalBinOp(state, BO_Sub, freeSpace, + FreeSpace = + svalBuilder.evalBinOp(state, BO_Sub, FreeSpace, svalBuilder.makeIntVal(1, sizeTy), sizeTy); - std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>(); - if (!freeSpaceNL) + std::optional<NonLoc> FreeSpaceNL = FreeSpace.getAs<NonLoc>(); + if (!FreeSpaceNL) break; - CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*freeSpaceNL); + CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*FreeSpaceNL); break; } } >From 89f195c4852a01d354dec032991f4551a21c6ec0 Mon Sep 17 00:00:00 2001 From: flovent <flb...@protonmail.com> Date: Thu, 3 Jul 2025 21:14:51 +0800 Subject: [PATCH 5/5] use `size` instead of `size - dstLen - 1` for strlcat kind --- .../Checkers/CStringChecker.cpp | 50 ++++++------------- 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 649fb6014ef6d..29c0dc95929ac 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -2234,16 +2234,6 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, // situation. bool CouldAccessOutOfBound = true; if (IsBounded && amountCopied.isUnknown()) { - // Get the max number of characters to copy. - const Expr *LenExpr = Call.getArgExpr(2); - SVal LenVal = state->getSVal(LenExpr, LCtx); - - // Protect against misdeclared strncpy(). - LenVal = - svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType()); - - std::optional<NonLoc> LenValNL = LenVal.getAs<NonLoc>(); - auto CouldAccessOutOfBoundForSVal = [&](NonLoc Val) -> bool { return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression), Dst.Expression->getType(), Val, @@ -2254,33 +2244,21 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*strLengthNL); } - if (CouldAccessOutOfBound && LenValNL) { - switch (appendK) { - case ConcatFnKind::none: - case ConcatFnKind::strcat: { + if (CouldAccessOutOfBound) { + // Get the max number of characters to copy. + const Expr *LenExpr = Call.getArgExpr(2); + SVal LenVal = state->getSVal(LenExpr, LCtx); + + // Protect against misdeclared strncpy(). + LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType()); + + std::optional<NonLoc> LenValNL = LenVal.getAs<NonLoc>(); + + // Because analyzer doesn't handle expressions like `size - + // dstLen - 1` very well, we roughly use `size` for + // ConcatFnKind::strlcat here, same with other concat kinds. + if (LenValNL) CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*LenValNL); - break; - } - case ConcatFnKind::strlcat: { - if (!dstStrLengthNL) - break; - - SVal FreeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *LenValNL, - *dstStrLengthNL, sizeTy); - if (!isa<NonLoc>(FreeSpace)) - break; - - FreeSpace = - svalBuilder.evalBinOp(state, BO_Sub, FreeSpace, - svalBuilder.makeIntVal(1, sizeTy), sizeTy); - std::optional<NonLoc> FreeSpaceNL = FreeSpace.getAs<NonLoc>(); - if (!FreeSpaceNL) - break; - - CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*FreeSpaceNL); - break; - } - } } } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits