Author: NagyDonat Date: 2024-02-22T14:19:20+01:00 New Revision: fa8a21144ec9a6836e9bf1e3bf5cd0b2f058209e
URL: https://github.com/llvm/llvm-project/commit/fa8a21144ec9a6836e9bf1e3bf5cd0b2f058209e DIFF: https://github.com/llvm/llvm-project/commit/fa8a21144ec9a6836e9bf1e3bf5cd0b2f058209e.diff LOG: [analyzer] Improve handling of unsigned values in ArrayBoundCheckerV2 (#81034) A memory access is an out of bounds error if the offset is < the extent of the memory region. Notice that here "<" is a _mathematical_ comparison between two numbers and NOT a C/C++ operator that compares two typed C++ values: for example -1 < 1000 is true in mathematics, but if the `-1` is an `int` and the `1000` is a `size_t` value, then evaluating the C/C++ operator `<` will return false because the `-1` will be converted to `SIZE_MAX` by the automatic type conversions. This means that it's incorrect to perform a bounds check with `evalBinOpNN(State, BO_LT, ...)` which performs automatic conversions and can produce wildly incorrect results. ArrayBoundsCheckerV2 already had a special case where it avoided calling `evalBinOpNN` in a situation where it would have performed an automatic conversion; this commit replaces that code with a more general one that covers more situations. (It's still not perfect, but it's better than the previous version and I think it will cover practically all real-world code.) Note that this is not a limitation/bug of the simplification algorithm defined in `getSimplifedOffsets()`: the simplification is not applied in the test case `test_comparison_with_extent_symbol` (because the `Extent` is not a concrete int), but without the new code it would still run into a `-1 < UNSIGNED` comparison that evaluates to false because `evalBinOpNN` performs an automatic type conversion. Added: Modified: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp clang/test/Analysis/out-of-bounds.c Removed: ################################################################################ diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h index d7cff49036cb81..a560f274c43ccd 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h @@ -110,12 +110,16 @@ class SValBuilder { /// that value is returned. Otherwise, returns NULL. virtual const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal val) = 0; - /// Tries to get the minimal possible (integer) value of a given SVal. If the - /// constraint manager cannot provide an useful answer, this returns NULL. + /// Tries to get the minimal possible (integer) value of a given SVal. This + /// always returns the value of a ConcreteInt, but may return NULL if the + /// value is symbolic and the constraint manager cannot provide a useful + /// answer. virtual const llvm::APSInt *getMinValue(ProgramStateRef state, SVal val) = 0; - /// Tries to get the maximal possible (integer) value of a given SVal. If the - /// constraint manager cannot provide an useful answer, this returns NULL. + /// Tries to get the maximal possible (integer) value of a given SVal. This + /// always returns the value of a ConcreteInt, but may return NULL if the + /// value is symbolic and the constraint manager cannot provide a useful + /// answer. virtual const llvm::APSInt *getMaxValue(ProgramStateRef state, SVal val) = 0; /// Simplify symbolic expressions within a given SVal. Return an SVal diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 05fc00a990d524..fdcc46e58580b4 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -268,6 +268,16 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); } +static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value) { + const llvm::APSInt *MaxV = SVB.getMaxValue(State, Value); + return MaxV && MaxV->isNegative(); +} + +static bool isUnsigned(SValBuilder &SVB, NonLoc Value) { + QualType T = Value.getType(SVB.getContext()); + return T->isUnsignedIntegerType(); +} + // Evaluate the comparison Value < Threshold with the help of the custom // simplification algorithm defined for this checker. Return a pair of states, // where the first one corresponds to "value below threshold" and the second @@ -281,18 +291,32 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB); } - if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { - QualType T = Value.getType(SVB.getContext()); - if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) { - // In this case we reduced the bound check to a comparison of the form - // (symbol or value with unsigned type) < (negative number) - // which is always false. We are handling these cases separately because - // evalBinOpNN can perform a signed->unsigned conversion that turns the - // negative number into a huge positive value and leads to wildly - // inaccurate conclusions. + + // We want to perform a _mathematical_ comparison between the numbers `Value` + // and `Threshold`; but `evalBinOpNN` evaluates a C/C++ operator that may + // perform automatic conversions. For example the number -1 is less than the + // number 1000, but -1 < `1000ull` will evaluate to `false` because the `int` + // -1 is converted to ULONGLONG_MAX. + // To avoid automatic conversions, we evaluate the "obvious" cases without + // calling `evalBinOpNN`: + if (isNegative(SVB, State, Value) && isUnsigned(SVB, Threshold)) { + if (CheckEquality) { + // negative_value == unsigned_value is always false return {nullptr, State}; } + // negative_value < unsigned_value is always false + return {State, nullptr}; } + if (isUnsigned(SVB, Value) && isNegative(SVB, State, Threshold)) { + // unsigned_value == negative_value and unsigned_value < negative_value are + // both always false + return {nullptr, State}; + } + // FIXME: these special cases are sufficient for handling real-world + // comparisons, but in theory there could be contrived situations where + // automatic conversion of a symbolic value (which can be negative and can be + // positive) leads to incorrect results. + const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT; auto BelowThreshold = SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType()) diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c index ed457e86960063..1f771c2b3bd138 100644 --- a/clang/test/Analysis/out-of-bounds.c +++ b/clang/test/Analysis/out-of-bounds.c @@ -186,3 +186,11 @@ void test_assume_after_access2(unsigned long x) { clang_analyzer_eval(x <= 99); // expected-warning{{TRUE}} } +struct incomplete; +char test_comparison_with_extent_symbol(struct incomplete *p) { + // Previously this was reported as a (false positive) overflow error because + // the extent symbol of the area pointed by `p` was an unsigned and the '-1' + // was converted to its type by `evalBinOpNN`. + return ((char *)p)[-1]; // no-warning +} + _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits