Author: DonĂ¡t Nagy Date: 2025-09-17T13:18:13+02:00 New Revision: 5cbaf5552e9cab25dc3eb760ffb5ab7d3dd59249
URL: https://github.com/llvm/llvm-project/commit/5cbaf5552e9cab25dc3eb760ffb5ab7d3dd59249 DIFF: https://github.com/llvm/llvm-project/commit/5cbaf5552e9cab25dc3eb760ffb5ab7d3dd59249.diff LOG: [analyzer] Show element count in ArrayBound underflow reports (#158639) The underflow reports of checker security.ArrayBound previously displayed the (negative) byte offset of the accessed location; but those numbers were a bit hard to decipher (especially when the elements were large structs), so this commit replaces the byte offset with an index value (if the offset is an integer multiple of the size of the accessed element). This change only affects the content of the messages; the checker will find the same issues before and after this commit. Added: Modified: clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp clang/test/Analysis/ArrayBound/assumption-reporting.c clang/test/Analysis/ArrayBound/verbose-tests.c Removed: ################################################################################ diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp index d35031b5c22df..6ad5acd4e76f2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -130,6 +130,20 @@ struct Messages { std::string Short, Full; }; +enum class BadOffsetKind { Negative, Overflowing, Indeterminate }; + +constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing", + "a negative or overflowing"}; +static StringRef asAdjective(BadOffsetKind Problem) { + return Adjectives[static_cast<int>(Problem)]; +} + +constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of", + "around"}; +static StringRef asPreposition(BadOffsetKind Problem) { + return Prepositions[static_cast<int>(Problem)]; +} + // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt` // instead of `PreStmt` because the current implementation passes the whole // expression to `CheckerContext::getSVal()` which only works after the @@ -388,18 +402,6 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) { return SV ? getConcreteValue(*SV) : std::nullopt; } -static Messages getPrecedesMsgs(const MemSpaceRegion *Space, - const SubRegion *Region, NonLoc Offset) { - std::string RegName = getRegionName(Space, Region), OffsetStr = ""; - - if (auto ConcreteOffset = getConcreteValue(Offset)) - OffsetStr = formatv(" {0}", ConcreteOffset); - - return { - formatv("Out of bound access to memory preceding {0}", RegName), - formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)}; -} - /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if /// it can be performed (`Divisor` is nonzero and there is no remainder). The /// values `Val1` and `Val2` may be nullopt and in that case the corresponding @@ -419,10 +421,11 @@ static bool tryDividePair(std::optional<int64_t> &Val1, return true; } -static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space, - const SubRegion *Region, NonLoc Offset, - NonLoc Extent, SVal Location, - bool AlsoMentionUnderflow) { +static Messages getNonTaintMsgs(const ASTContext &ACtx, + const MemSpaceRegion *Space, + const SubRegion *Region, NonLoc Offset, + std::optional<NonLoc> Extent, SVal Location, + BadOffsetKind Problem) { std::string RegName = getRegionName(Space, Region); const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>(); assert(EReg && "this checker only handles element access"); @@ -439,15 +442,21 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space, SmallString<256> Buf; llvm::raw_svector_ostream Out(Buf); Out << "Access of "; - if (!ExtentN && !UseByteOffsets) + if (OffsetN && !ExtentN && !UseByteOffsets) { + // If the offset is reported as an index, then the report must mention the + // element type (because it is not always clear from the code). It's more + // natural to mention the element type later where the extent is described, + // but if the extent is unknown/irrelevant, then the element type can be + // inserted into the message at this point. Out << "'" << ElemType.getAsString() << "' element in "; + } Out << RegName << " at "; - if (AlsoMentionUnderflow) { - Out << "a negative or overflowing " << OffsetOrIndex; - } else if (OffsetN) { + if (OffsetN) { + if (Problem == BadOffsetKind::Negative) + Out << "negative "; Out << OffsetOrIndex << " " << *OffsetN; } else { - Out << "an overflowing " << OffsetOrIndex; + Out << asAdjective(Problem) << " " << OffsetOrIndex; } if (ExtentN) { Out << ", while it holds only "; @@ -465,8 +474,7 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space, } return {formatv("Out of bound access to memory {0} {1}", - AlsoMentionUnderflow ? "around" : "after the end of", - RegName), + asPreposition(Problem), RegName), std::string(Buf)}; } @@ -635,7 +643,9 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { } else { if (!WithinLowerBound) { // ...and it cannot be valid (>= 0), so report an error. - Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset); + Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg, + ByteOffset, /*Extent=*/std::nullopt, + Location, BadOffsetKind::Negative); reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt); return; } @@ -677,9 +687,12 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { return; } + BadOffsetKind Problem = AlsoMentionUnderflow + ? BadOffsetKind::Indeterminate + : BadOffsetKind::Overflowing; Messages Msgs = - getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset, - *KnownSize, Location, AlsoMentionUnderflow); + getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset, + *KnownSize, Location, Problem); reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize); return; } diff --git a/clang/test/Analysis/ArrayBound/assumption-reporting.c b/clang/test/Analysis/ArrayBound/assumption-reporting.c index 535e623baa815..bffd5d9bc35b5 100644 --- a/clang/test/Analysis/ArrayBound/assumption-reporting.c +++ b/clang/test/Analysis/ArrayBound/assumption-reporting.c @@ -70,7 +70,7 @@ int assumingUpper(int arg) { // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}} int b = TenElements[arg - 10]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b; } @@ -99,7 +99,7 @@ int assumingUpperUnsigned(unsigned arg) { // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}} int b = TenElements[(int)arg - 10]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b; } @@ -111,7 +111,7 @@ int assumingNothing(unsigned arg) { int a = TenElements[arg]; // no note here, we already know that 'arg' is in bounds int b = TenElements[(int)arg - 10]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b; } @@ -145,7 +145,7 @@ int assumingConvertedToIntP(struct foo f, int arg) { // expected-note@-1 {{Assuming byte offset is less than 5, the extent of 'f.b'}} int c = TenElements[arg-2]; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at a negative index}} return a + b + c; } diff --git a/clang/test/Analysis/ArrayBound/verbose-tests.c b/clang/test/Analysis/ArrayBound/verbose-tests.c index 84d238ed1a2a4..e3416886d13e5 100644 --- a/clang/test/Analysis/ArrayBound/verbose-tests.c +++ b/clang/test/Analysis/ArrayBound/verbose-tests.c @@ -11,7 +11,7 @@ int TenElements[10]; void arrayUnderflow(void) { TenElements[-3] = 5; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}} + // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -3}} } int underflowWithDeref(void) { @@ -19,9 +19,39 @@ int underflowWithDeref(void) { --p; return *p; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}} + // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -1}} +} + +char underflowReportedAsChar(void) { + // Underflow is reported with the type of the accessed element (here 'char'), + // not the type that appears in the declaration of the original array (which + // would be 'int'). + return ((char *)TenElements)[-1]; + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'char' element in 'TenElements' at negative index -1}} } +struct TwoInts { + int a, b; +}; + +struct TwoInts underflowReportedAsStruct(void) { + // Another case where the accessed type is used for reporting the offset. + return *(struct TwoInts*)(TenElements - 4); + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'struct TwoInts' element in 'TenElements' at negative index -2}} +} + +struct TwoInts underflowOnlyByteOffset(void) { + // In this case the negative byte offset is not a multiple of the size of the + // accessed element, so the part "= -... * sizeof(type)" is omitted at the + // end of the message. + return *(struct TwoInts*)(TenElements - 3); + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}} +} + + int rng(void); int getIndex(void) { switch (rng()) { @@ -40,10 +70,10 @@ void gh86959(void) { while (rng()) TenElements[getIndex()] = 10; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}} + // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -172}} } -int scanf(const char *restrict fmt, ...); +int scanf(const char *fmt, ...); void taintedIndex(void) { int index; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
