Author: Balazs Benics Date: 2024-10-31T11:01:47+01:00 New Revision: e67e03a22c27b26125247efeae1b2d36edeb3617
URL: https://github.com/llvm/llvm-project/commit/e67e03a22c27b26125247efeae1b2d36edeb3617 DIFF: https://github.com/llvm/llvm-project/commit/e67e03a22c27b26125247efeae1b2d36edeb3617.diff LOG: [analyzer] EvalBinOpLL should return Unknown less often (#114222) SValBuilder::getKnownValue, getMinValue, getMaxValue use SValBuilder::simplifySVal. simplifySVal does repeated simplification until a fixed-point is reached. A single step is done by SimpleSValBuilder::simplifySValOnce, using a Simplifier visitor. That will basically decompose SymSymExprs, and apply constant folding using the constraints we have in the State. Once it decomposes a SymSymExpr, it simplifies both sides and then uses the SValBuilder::evalBinOp to reconstruct the same - but now simpler - SymSymExpr, while applying some caching to remain performant. This decomposition, and then the subsequent re-composition poses new challenges to the SValBuilder::evalBinOp, which is built to handle expressions coming from real C/C++ code, thus applying some implicit assumptions. One previous assumption was that nobody would form an expression like "((int*)0) - q" (where q is an int pointer), because it doesn't really makes sense to write code like that. However, during simplification, we may end up with a call to evalBinOp similar to this. To me, simplifying a SymbolRef should never result in Unknown or Undef, unless it was Unknown or Undef initially or, during simplification we realized that it's a division by zero once we did the constant folding, etc. In the following case the simplified SVal should not become UnknownVal: ```c++ void top(char *p, char *q) { int diff = p - q; // diff: reg<p> - reg<q> if (!p) // p: NULL simplify(diff); // diff after simplification should be: 0(loc) - reg<q> } ``` Returning Unknown from the simplifySVal can weaken analysis precision in other places too, such as in SValBuilder::getKnownValue, getMinValue, or getMaxValue because we call simplifySVal before doing anything else. For nonloc::SymbolVals, this loss of precision is critical, because for those the SymbolRef carries an accurate type of the encoded computation, thus we should at least have a conservative upper or lower bound that we could return from getMinValue or getMaxValue - yet we would just return nullptr. ```c++ const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state, SVal V) { return getConstValue(state, simplifySVal(state, V)); } const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state, SVal V) { V = simplifySVal(state, V); if (const llvm::APSInt *Res = getConcreteValue(V)) return Res; if (SymbolRef Sym = V.getAsSymbol()) return state->getConstraintManager().getSymMinVal(state, Sym); return nullptr; } ``` For now, I don't plan to make the simplification bullet-proof, I'm just explaining why I made this change and what you need to look out for in the future if you see a similar issue. CPP-5750 Added: clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp Modified: clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp clang/unittests/StaticAnalyzer/CMakeLists.txt Removed: ################################################################################ diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 45e48d435aca6a..229169f848e228 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -860,11 +860,12 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, // If one of the operands is a symbol and the other is a constant, // build an expression for use by the constraint manager. if (SymbolRef rSym = rhs.getAsLocSymbol()) { - // We can only build expressions with symbols on the left, - // so we need a reversible operator. - if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp) + if (op == BO_Cmp) return UnknownVal(); + if (!BinaryOperator::isComparisonOp(op)) + return makeNonLoc(L.getValue(), op, rSym, resultTy); + op = BinaryOperator::reverseComparisonOp(op); return makeNonLoc(rSym, op, L.getValue(), resultTy); } diff --git a/clang/unittests/StaticAnalyzer/CMakeLists.txt b/clang/unittests/StaticAnalyzer/CMakeLists.txt index 5ef72cfaea4011..f5da86e5456030 100644 --- a/clang/unittests/StaticAnalyzer/CMakeLists.txt +++ b/clang/unittests/StaticAnalyzer/CMakeLists.txt @@ -20,6 +20,7 @@ add_clang_unittest(StaticAnalysisTests RegisterCustomCheckersTest.cpp StoreTest.cpp SymbolReaperTest.cpp + SValSimplifyerTest.cpp SValTest.cpp TestReturnValueUnderConstruction.cpp Z3CrosscheckOracleTest.cpp diff --git a/clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp b/clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp new file mode 100644 index 00000000000000..85cfe2c1965ac4 --- /dev/null +++ b/clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp @@ -0,0 +1,103 @@ +//===- unittests/StaticAnalyzer/SValSimplifyerTest.cpp --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Frontend/AnalysisConsumer.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" + +using namespace clang; +using namespace ento; + +static std::string toString(SVal V) { + std::string Result; + llvm::raw_string_ostream Stream(Result); + V.dumpToStream(Stream); + return Result; +} + +static void replace(std::string &Content, StringRef Substr, + StringRef Replacement) { + std::size_t Pos = 0; + while ((Pos = Content.find(Substr, Pos)) != std::string::npos) { + Content.replace(Pos, Substr.size(), Replacement); + Pos += Replacement.size(); + } +} + +namespace { + +class SimplifyChecker : public Checker<check::PreCall> { + const BugType Bug{this, "SimplifyChecker"}; + const CallDescription SimplifyCall{CDM::SimpleFunc, {"simplify"}, 1}; + + void report(CheckerContext &C, const Expr *E, StringRef Description) const { + PathDiagnosticLocation Loc(E->getExprLoc(), C.getSourceManager()); + auto Report = std::make_unique<BasicBugReport>(Bug, Description, Loc); + C.emitReport(std::move(Report)); + } + +public: + void checkPreCall(const CallEvent &Call, CheckerContext &C) const { + if (!SimplifyCall.matches(Call)) + return; + const Expr *Arg = Call.getArgExpr(0); + SVal Val = C.getSVal(Arg); + SVal SimplifiedVal = C.getSValBuilder().simplifySVal(C.getState(), Val); + std::string Subject = toString(Val); + std::string Simplified = toString(SimplifiedVal); + std::string Message = (llvm::Twine{Subject} + " -> " + Simplified).str(); + report(C, Arg, Message); + } +}; +} // namespace + +static void addSimplifyChecker(AnalysisASTConsumer &AnalysisConsumer, + AnalyzerOptions &AnOpts) { + AnOpts.CheckersAndPackages = {{"SimplifyChecker", true}}; + AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) { + Registry.addChecker<SimplifyChecker>("SimplifyChecker", "EmptyDescription", + "EmptyDocsUri"); + }); +} + +static void runThisCheckerOnCode(const std::string &Code, std::string &Diags) { + ASSERT_TRUE(runCheckerOnCode<addSimplifyChecker>(Code, Diags, + /*OnlyEmitWarnings=*/true)); + ASSERT_FALSE(Diags.empty()); + ASSERT_EQ(Diags.back(), '\n'); + Diags.pop_back(); +} + +namespace { + +TEST(SValSimplifyerTest, LHSConstrainedNullPtrDiff) { + constexpr auto Code = R"cpp( +template <class T> void simplify(T); +void LHSConstrainedNullPtrDiff(char *p, char *q) { + int diff = p - q; + if (!p) + simplify( diff ); +})cpp"; + + std::string Diags; + runThisCheckerOnCode(Code, Diags); + replace(Diags, "(reg_$0<char * p>)", "reg_p"); + replace(Diags, "(reg_$1<char * q>)", "reg_q"); + // This should not be simplified to "Unknown". + EXPECT_EQ(Diags, "SimplifyChecker: reg_p - reg_q -> 0U - reg_q"); +} + +} // namespace _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits