https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/111258
This is an alternative implementation of the idea present in #109804. In this implementation, the suppression is purely implemented by a `BugReportVisitor`, to avoid coupling the suppression with the engine itself. The idea is to visit the bug-path, and along the way record each time we entered the basic-block that has a comparison in the end that drives the decision of entering or avoiding a loop construct. We also record expressions that caused a state-split. These are basically the expressions that eagerly assume would force a split on - but this approach works even if eager-assumptions are disabled. Once we collected all the data, we go over the loops that we saw, and check if the condition of that loop construct was evaluated N times. This then triggers a heuristic to decide if we should suppress this, e.g. because we "assumed" not to enter the loop, or "assumed" to iterate more than 2 times in the loop. --- This is WIP, because I just scratched the idea, but it seems to work good enough to have some discussion about this, while refining it if I have some time later. >From c429df63af6cdeec7e721e8f9c322f1ac6ba3b3d Mon Sep 17 00:00:00 2001 From: Balazs Benics <benicsbal...@gmail.com> Date: Sat, 5 Oct 2024 15:14:37 +0200 Subject: [PATCH 1/5] Use the eager assumption tags only if the decision was ambiguous --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 15 ++++++++---- clang/test/Analysis/assuming-unsigned-ge-0.c | 5 +--- clang/test/Analysis/cast-value-notes.cpp | 20 +++++++--------- clang/test/Analysis/cast-value-state-dump.cpp | 5 ++-- .../std-c-library-functions-arg-constraints.c | 24 +++++++------------ 5 files changed, 29 insertions(+), 40 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index b1919d7027cf4d..62066632db87d0 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3771,24 +3771,29 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, SVal V = state->getSVal(Ex, Pred->getLocationContext()); std::optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>(); if (SEV && SEV->isExpression()) { - const std::pair<const ProgramPointTag *, const ProgramPointTag*> &tags = - geteagerlyAssumeBinOpBifurcationTags(); - ProgramStateRef StateTrue, StateFalse; std::tie(StateTrue, StateFalse) = state->assume(*SEV); + const ProgramPointTag *EagerTrueTag = nullptr; + const ProgramPointTag *EagerFalseTag = nullptr; + + // Use the eager assumption tags if the choice was ambiguous. + if (StateTrue && StateFalse) + std::tie(EagerTrueTag, EagerFalseTag) = + geteagerlyAssumeBinOpBifurcationTags(); + // First assume that the condition is true. if (StateTrue) { SVal Val = svalBuilder.makeIntVal(1U, Ex->getType()); StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val); - Bldr.generateNode(Ex, Pred, StateTrue, tags.first); + Bldr.generateNode(Ex, Pred, StateTrue, EagerTrueTag); } // Next, assume that the condition is false. if (StateFalse) { SVal Val = svalBuilder.makeIntVal(0U, Ex->getType()); StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val); - Bldr.generateNode(Ex, Pred, StateFalse, tags.second); + Bldr.generateNode(Ex, Pred, StateFalse, EagerFalseTag); } } } diff --git a/clang/test/Analysis/assuming-unsigned-ge-0.c b/clang/test/Analysis/assuming-unsigned-ge-0.c index 553e68cb96c6bd..18e637a6e5310e 100644 --- a/clang/test/Analysis/assuming-unsigned-ge-0.c +++ b/clang/test/Analysis/assuming-unsigned-ge-0.c @@ -2,10 +2,7 @@ // RUN: -analyzer-checker=core -verify %s int assuming_unsigned_ge_0(unsigned arg) { - // TODO This testcase demonstrates the current incorrect behavior of Clang - // Static Analyzer: here 'arg' is unsigned, so "arg >= 0" is not a fresh - // assumption, but it still appears in the diagnostics as if it's fresh: - // expected-note@+2 {{Assuming 'arg' is >= 0}} + // expected-note@+2 {{'arg' is >= 0}} // expected-note@+1 {{Taking false branch}} if (arg < 0) return 0; diff --git a/clang/test/Analysis/cast-value-notes.cpp b/clang/test/Analysis/cast-value-notes.cpp index 7ee224dc6e5d8f..0235404e73adf6 100644 --- a/clang/test/Analysis/cast-value-notes.cpp +++ b/clang/test/Analysis/cast-value-notes.cpp @@ -168,9 +168,8 @@ void evalNonNullParamNonNullReturnReference(const Shape &S) { // expected-note@-2 {{Taking true branch}} (void)(1 / !C); - // expected-note@-1 {{'C' is non-null}} - // expected-note@-2 {{Division by zero}} - // expected-warning@-3 {{Division by zero}} + // expected-note@-1 {{Division by zero}} + // expected-warning@-2 {{Division by zero}} } } @@ -226,9 +225,8 @@ void evalNonNullParamNonNullReturn(const Shape *S) { // expected-note@-2 {{Taking true branch}} (void)(1 / !C); - // expected-note@-1 {{'C' is non-null}} - // expected-note@-2 {{Division by zero}} - // expected-warning@-3 {{Division by zero}} + // expected-note@-1 {{Division by zero}} + // expected-warning@-2 {{Division by zero}} } } @@ -243,9 +241,8 @@ void evalNonNullParamNullReturn(const Shape *S) { // expected-note@-4 {{Taking true branch}} (void)(1 / !T); - // expected-note@-1 {{'T' is non-null}} - // expected-note@-2 {{Division by zero}} - // expected-warning@-3 {{Division by zero}} + // expected-note@-1 {{Division by zero}} + // expected-warning@-2 {{Division by zero}} } } @@ -265,9 +262,8 @@ void evalZeroParamNonNullReturnPointer(const Shape *S) { // expected-note@-2 {{'C' initialized here}} (void)(1 / !C); - // expected-note@-1 {{'C' is non-null}} - // expected-note@-2 {{Division by zero}} - // expected-warning@-3 {{Division by zero}} + // expected-note@-1 {{Division by zero}} + // expected-warning@-2 {{Division by zero}} } void evalZeroParamNonNullReturn(const Shape &S) { diff --git a/clang/test/Analysis/cast-value-state-dump.cpp b/clang/test/Analysis/cast-value-state-dump.cpp index 07fd7abd848ab7..97f3b2236db087 100644 --- a/clang/test/Analysis/cast-value-state-dump.cpp +++ b/clang/test/Analysis/cast-value-state-dump.cpp @@ -40,8 +40,7 @@ void evalNonNullParamNonNullReturn(const Shape *S) { // CHECK-NEXT: ] } (void)(1 / !C); - // expected-note@-1 {{'C' is non-null}} - // expected-note@-2 {{Division by zero}} - // expected-warning@-3 {{Division by zero}} + // expected-note@-1 {{Division by zero}} + // expected-warning@-2 {{Division by zero}} } diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.c b/clang/test/Analysis/std-c-library-functions-arg-constraints.c index 0b817dda98c727..a6e0ad15129f60 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints.c +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.c @@ -42,8 +42,7 @@ void test_alnum_symbolic(int x) { // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} \ - // bugpath-note{{Left side of '&&' is true}} \ - // bugpath-note{{'x' is <= 255}} + // bugpath-note{{Left side of '&&' is true}} } void test_alnum_symbolic2(int x) { @@ -76,8 +75,7 @@ void test_toupper_symbolic(int x) { // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} \ - // bugpath-note{{Left side of '&&' is true}} \ - // bugpath-note{{'x' is <= 255}} + // bugpath-note{{Left side of '&&' is true}} } void test_toupper_symbolic2(int x) { @@ -110,8 +108,7 @@ void test_tolower_symbolic(int x) { // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} \ - // bugpath-note{{Left side of '&&' is true}} \ - // bugpath-note{{'x' is <= 255}} + // bugpath-note{{Left side of '&&' is true}} } void test_tolower_symbolic2(int x) { @@ -144,8 +141,7 @@ void test_toascii_symbolic(int x) { // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} \ - // bugpath-note{{Left side of '&&' is true}} \ - // bugpath-note{{'x' is <= 255}} + // bugpath-note{{Left side of '&&' is true}} } void test_toascii_symbolic2(int x) { @@ -173,8 +169,7 @@ void test_notnull_symbolic(FILE *fp, int *buf) { clang_analyzer_eval(buf != 0); // \ // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ - // bugpath-note{{TRUE}} \ - // bugpath-note{{'buf' is not equal to null}} + // bugpath-note{{TRUE}} } void test_notnull_symbolic2(FILE *fp, int *buf) { if (!buf) // bugpath-note{{Assuming 'buf' is null}} \ @@ -218,8 +213,7 @@ void test_notnull_buffer_3(void *buf) { clang_analyzer_eval(buf != 0); // \ // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ - // bugpath-note{{TRUE}} \ - // bugpath-note{{'buf' is not equal to null}} + // bugpath-note{{TRUE}} } void test_no_node_after_bug(FILE *fp, size_t size, size_t n, void *buf) { @@ -299,8 +293,7 @@ void test_buf_size_symbolic(int s) { clang_analyzer_eval(s <= 3); // \ // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ - // bugpath-note{{TRUE}} \ - // bugpath-note{{'s' is <= 3}} + // bugpath-note{{TRUE}} } void test_buf_size_symbolic_and_offset(int s) { char buf[3]; @@ -308,8 +301,7 @@ void test_buf_size_symbolic_and_offset(int s) { clang_analyzer_eval(s <= 2); // \ // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ - // bugpath-note{{TRUE}} \ - // bugpath-note{{'s' is <= 2}} + // bugpath-note{{TRUE}} } int __buf_size_arg_constraint_mul(const void *, size_t, size_t); >From c240537d15f1275a4c587ace596682edf77ff5d6 Mon Sep 17 00:00:00 2001 From: Balazs Benics <benicsbal...@gmail.com> Date: Sat, 5 Oct 2024 15:15:46 +0200 Subject: [PATCH 2/5] Add test from PR #109804 --- clang/test/Analysis/out-of-bounds.c | 150 +++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c index 1f771c2b3bd138..fde53d148ba442 100644 --- a/clang/test/Analysis/out-of-bounds.c +++ b/clang/test/Analysis/out-of-bounds.c @@ -1,4 +1,9 @@ -// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify %s +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify=expected,eagerlyassume %s +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -analyzer-config eagerly-assume=false -verify %s + +// Note that eagerly-assume=false is tested separately because the +// WeakLoopAssumption suppression heuristic uses different code paths to +// achieve the same result with and without eagerly-assume. void clang_analyzer_eval(int); @@ -194,3 +199,146 @@ char test_comparison_with_extent_symbol(struct incomplete *p) { return ((char *)p)[-1]; // no-warning } +// WeakLoopAssumption suppression +/////////////////////////////////////////////////////////////////////// + +int GlobalArray[100]; +int loop_suppress_after_zero_iterations(unsigned len) { + for (unsigned i = 0; i < len; i++) + if (GlobalArray[i] > 0) + return GlobalArray[i]; + // Previously this would have produced an overflow warning because splitting + // the state on the loop condition introduced an execution path where the + // analyzer thinks that len == 0. + // There are many situations where the programmer knows that an argument is + // positive, but this is not indicated in the source code, so we must avoid + // reporting errors (especially out of bounds errors) on these branches, + // because otherwise we'd get prohibitively many false positives. + return GlobalArray[len - 1]; // no-warning +} + +void loop_report_in_second_iteration(int len) { + int buf[1] = {0}; + for (int i = 0; i < len; i++) { + // When a programmer writes a loop, we may assume that they intended at + // least two iterations. + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } +} + +void loop_suppress_in_third_iteration(int len) { + int buf[2] = {0}; + for (int i = 0; i < len; i++) { + // We should suppress array bounds errors on the third and later iterations + // of loops, because sometimes programmers write a loop in sitiuations + // where they know that there will be at most two iterations, but the + // analyzer cannot deduce this property. + buf[i] = 1; // no-warning + } +} + +int no_suppression_when_no_assumption_zero_iterations(unsigned len) { + if (len != 0) { + // This 'if' introduces a state split between len == 0 and len != 0. + } + + // On the branch where we assumed that len is zero, this loop will be + // skipped. We (intentionally) don't suppress this execution path becasue + // here the analyzer doesn't assume anything new when it evaluates the loop + // condition. + for (unsigned i = 0; i < len; i++) + if (GlobalArray[i] > 0) + return GlobalArray[i]; + + return GlobalArray[len - 1]; // expected-warning{{Out of bound access to memory}} +} + +void no_suppression_when_no_assumption_third_iteration(int len) { + if (len < 20) { + // This 'if' introduces a state split with len >= 20 on one branch. + } + + int buf[2] = {0}; + for (int i = 0; i < len; i++) { + // As in no_suppression_when_no_assumption_zero_iterations, the suppression + // only activates when the analyzer assumes something new in the loop + // condition. On the branch where `len >= 20` entering the third iteration + // doesn't involve a new assumption, so this warning is not suppressed: + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } +} + +void loop_suppress_in_third_iteration_logical_and(int len, int flag) { + int buf[2] = {0}; + for (int i = 0; i < len && flag; i++) { + // FIXME: In this case the checker should suppress the warning the same way + // as it's suppressed in loop_suppress_in_third_iteration, but the + // suppression is not activated because the terminator statement associated + // with the loop is just the expression 'flag', while 'i < len' is a + // separate terminator statement that's associated with the + // short-circuiting operator '&&'. + // I have seen a real-world FP that looks like this, but it is much rarer + // than the basic setup. + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } +} + +void loop_suppress_in_third_iteration_logical_and_2(int len, int flag) { + int buf[2] = {0}; + for (int i = 0; flag && i < len; i++) { + // If the two operands of '&&' are flipped, the suppression works, because + // then 'flag' is the terminator statement associated with '&&' (which + // determines whether the short-circuiting happens or not) and 'i < len' is + // the terminator statement of the loop itself. + buf[i] = 1; // no-warning + } +} + +void loop_suppress_in_third_iteration_cast(int len) { + int buf[2] = {0}; + for (int i = 0; (unsigned)(i < len); i++) { + // The behavior of this suppression is slightly different under + // eagerly-assume=true (the default) and eagerly-assume=false: + // * When eager assumptions are disabled, it's enough to look for cases + // where we get two non-null states from splitting the state over the + // 'SVal' that represents the full loop condition. + // * When eager assumptions are enabled, we also accept situations when the + // loop condition expression triggers an eager state split and therefore + // we won't see a state split at the "normal" point because it's executed + // on two already separated execution paths. + // However, for the sake of simplicity we don't activate the suppression in + // cases when _a subexpression_ of the loop condition triggers an eager + // assumption. There are already many differences between analysis with and + // without eager assumptions, so it would be pointless to write more + // complicated code to eliminate these rare differences. + buf[i] = 1; // eagerlyassume-warning{{Out of bound access to memory}} + } +} + +int coinflip(void); +int do_while_report_after_one_iteration(void) { + int i = 0; + do { + i++; + } while (coinflip()); + // Unlike `loop_suppress_after_zero_iterations`, running just one iteration + // in a do-while is not a corner case that would produce too many false + // positives, so don't suppress bounds errors in these situations. + return GlobalArray[i-2]; // expected-warning{{Out of bound access to memory}} +} + +void do_while_report_in_second_iteration(int len) { + int buf[1] = {0}; + int i = 0; + do { + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } while (i++ < len); +} + +void do_while_suppress_in_third_iteration(int len) { + int buf[2] = {0}; + int i = 0; + do { + buf[i] = 1; // no-warning + } while (i++ < len); +} >From 310bb3a6aa2894ab7303582ab0bd775fbf8405ed Mon Sep 17 00:00:00 2001 From: Balazs Benics <benicsbal...@gmail.com> Date: Sat, 5 Oct 2024 15:16:33 +0200 Subject: [PATCH 3/5] Add SuppressReportsHavingWeakLoopAssumption visitor --- .../Checkers/ArrayBoundCheckerV2.cpp | 135 +++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 3f837564cf47c4..3fcdbc6de6a068 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -11,10 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" +#include "clang/AST/Expr.h" #include "clang/AST/ParentMapContext.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/ProgramPoint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Checkers/Taint.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" @@ -22,7 +29,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include <optional> @@ -32,6 +39,131 @@ using namespace ento; using namespace taint; using llvm::formatv; +namespace { + +class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor { +private: + friend class BugReporterVisitor; + llvm::DenseSet<const Stmt *> LoopsSeen; + llvm::DenseMap<const Expr *, unsigned> EagerlyAssumedTrue; + llvm::DenseMap<const Expr *, unsigned> EagerlyAssumedFalse; + + void Profile(llvm::FoldingSetNodeID &ID) const final { + static const bool Tag = 0; + ID.AddPointer(&Tag); + } + + static bool isLoopAndNonNull(const Stmt *S) { + return isa_and_nonnull<ForStmt, WhileStmt, CXXForRangeStmt, DoStmt>(S); + } + + void registerLoopStatements(const ExplodedNode *N) { + if (auto Entrance = N->getLocation().getAs<BlockEntrance>()) { + const Stmt *TermStmt = Entrance->getBlock()->getTerminatorStmt(); + if (isLoopAndNonNull(TermStmt)) + LoopsSeen.insert(TermStmt); + } + } + + void registerOccurrence(llvm::DenseMap<const Expr *, unsigned> &Map, + const Expr *Key) { + if (auto [Place, Inserted] = Map.try_emplace(Key, 1); !Inserted) + ++Place->second; + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) final { + registerLoopStatements(Succ); + + auto AsPostStmt = Succ->getLocationAs<PostStmt>(); + const Expr *CurrExpr = + AsPostStmt ? dyn_cast<Expr>(AsPostStmt->getStmt()) : nullptr; + + const auto *Tag = Succ->getLocation().getTag(); + if (!Tag) + return nullptr; + + StringRef TagDesc = Tag->getTagDescription(); + if (TagDesc == "ExprEngine : Eagerly Assume True") + registerOccurrence(EagerlyAssumedTrue, CurrExpr); + if (TagDesc == "ExprEngine : Eagerly Assume False") + registerOccurrence(EagerlyAssumedFalse, CurrExpr); + return nullptr; + } + + static const Expr *getCond(const Stmt *S) { + assert(isLoopAndNonNull(S)); + switch (S->getStmtClass()) { + case Stmt::StmtClass::ForStmtClass: + return cast<ForStmt>(S)->getCond(); + case Stmt::StmtClass::WhileStmtClass: + return cast<WhileStmt>(S)->getCond(); + case Stmt::StmtClass::CXXForRangeStmtClass: + return cast<CXXForRangeStmt>(S)->getCond(); + case Stmt::StmtClass::DoStmtClass: + return cast<DoStmt>(S)->getCond(); + default: + return nullptr; + } + } + + void trySuppressReport(PathSensitiveBugReport &R, const Stmt *Loop, + const Expr *CondSubExpr) const { + auto NumEagerlyTrue = EagerlyAssumedTrue.lookup(CondSubExpr); + auto NumEagerlyFalse = EagerlyAssumedFalse.lookup(CondSubExpr); + + // Suppress the report if it avoided a loop with an eager assumption. + if (NumEagerlyTrue == 0 && NumEagerlyFalse == 1) { + R.markInvalid("eagerly decided never taking the loop body", CondSubExpr); + return; + } + + // Account for do-while loops where the body is already "taken" + // unconditionally for the first round. + if (isa<DoStmt>(Loop)) { + // Suppress the report if we have taken the loop body for the second time + // with an eager assumption. + if (NumEagerlyTrue > 1 && NumEagerlyFalse == 0) { + R.markInvalid("eagerly taken the do-while body at least 2 times", + CondSubExpr); + } + return; + } + + // Suppress the report if it iterates with eager assumptions 3 or more + // times. + if (NumEagerlyTrue > 2 && NumEagerlyFalse == 0) { + R.markInvalid("eagerly taken the loop body at least 2 times", + CondSubExpr); + } + } + + void finalizeVisitor(BugReporterContext &BRC, const ExplodedNode *, + PathSensitiveBugReport &R) final { + ASTContext &Ctx = BRC.getASTContext(); + + // Go over all the loops we have seen (either avoided or entered), and check + // if the condition of such loops were eagerly decided. + for (const Stmt *Loop : LoopsSeen) { + if (const Expr *Cond = getCond(Loop)) { + // Let's try all sub-exprs to cover cases that use short-circuiting. + // We need to check if any sub-expression of the condition was eagerly + // decided, because the '&&' and '||' logical operators could + // short-circuit, thus the expression on which we recorded the + // "eager decision" was a sub-expression of the Loop condition. + using namespace ast_matchers; + for (auto Binding : match(findAll(expr().bind("e")), *Cond, Ctx)) { + trySuppressReport(R, Loop, Binding.getNodeAs<Expr>("e")); + if (!R.isValid()) + return; + } + } + } + } +}; +} // namespace + namespace { /// If `E` is a "clean" array subscript expression, return the type of the /// accessed element. If the base of the subscript expression is modified by @@ -722,6 +854,7 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, if (Extent) markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug); + BR->addVisitor<SuppressReportsHavingWeakLoopAssumption>(); C.emitReport(std::move(BR)); } >From 6e6ed6fabe7326f2429f84554c2a95e99127d944 Mon Sep 17 00:00:00 2001 From: Balazs Benics <benicsbal...@gmail.com> Date: Sat, 5 Oct 2024 16:59:45 +0200 Subject: [PATCH 4/5] Implement without using eagerly-assume tags --- .../Checkers/ArrayBoundCheckerV2.cpp | 82 +++++++++++++------ clang/test/Analysis/out-of-bounds.c | 4 +- 2 files changed, 58 insertions(+), 28 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 3fcdbc6de6a068..aafd44d037273d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -28,8 +28,11 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include <optional> @@ -42,17 +45,36 @@ using llvm::formatv; namespace { class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor { +public: + SuppressReportsHavingWeakLoopAssumption(const PathSensitiveBugReport &R) + : OrigCursor(R.getErrorNode()) {} + private: friend class BugReporterVisitor; + + // The exploded node we are currently visiting, but in the original exploded + // graph. This graph is not trimmed, unlike the one we usually visit. + const ExplodedNode *OrigCursor; + llvm::DenseSet<const Stmt *> LoopsSeen; - llvm::DenseMap<const Expr *, unsigned> EagerlyAssumedTrue; - llvm::DenseMap<const Expr *, unsigned> EagerlyAssumedFalse; + llvm::DenseMap<const Expr *, unsigned> NumTimesTakenTrueDecision; + llvm::DenseMap<const Expr *, unsigned> NumTimesTakenFalseDecision; void Profile(llvm::FoldingSetNodeID &ID) const final { static const bool Tag = 0; ID.AddPointer(&Tag); } + static const ExplodedNode *selectMatchingPred(const ExplodedNode *OrigSucc, + unsigned SoughtPredId) { + auto MatchesSoughtId = [SoughtPredId](const ExplodedNode *N) { + return N->getID() == SoughtPredId; + }; + auto It = llvm::find_if(OrigSucc->preds(), MatchesSoughtId); + assert(It != OrigSucc->pred_end()); + return *It; + } + static bool isLoopAndNonNull(const Stmt *S) { return isa_and_nonnull<ForStmt, WhileStmt, CXXForRangeStmt, DoStmt>(S); } @@ -74,21 +96,33 @@ class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor { PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ, BugReporterContext &BRC, PathSensitiveBugReport &BR) final { + OrigCursor = selectMatchingPred(OrigCursor, Succ->getID()); + assert(OrigCursor->getID() == Succ->getID()); + registerLoopStatements(Succ); auto AsPostStmt = Succ->getLocationAs<PostStmt>(); const Expr *CurrExpr = AsPostStmt ? dyn_cast<Expr>(AsPostStmt->getStmt()) : nullptr; - const auto *Tag = Succ->getLocation().getTag(); - if (!Tag) + bool IsSplittingTwoWays = OrigCursor->succ_size() == 2; + + if (!CurrExpr || !IsSplittingTwoWays) return nullptr; - StringRef TagDesc = Tag->getTagDescription(); - if (TagDesc == "ExprEngine : Eagerly Assume True") - registerOccurrence(EagerlyAssumedTrue, CurrExpr); - if (TagDesc == "ExprEngine : Eagerly Assume False") - registerOccurrence(EagerlyAssumedFalse, CurrExpr); + const ExplodedNode *Next = Succ->getFirstSucc(); + SValBuilder &SVB = Succ->getState()->getStateManager().getSValBuilder(); + auto Query = SVB.evalCast(Succ->getSVal(CurrExpr), SVB.getConditionType(), + CurrExpr->getType()) + .getAs<DefinedOrUnknownSVal>(); + if (!Query) + return nullptr; + + ProgramStateRef TakenTrueBranch = Next->getState()->assume(*Query, true); + registerOccurrence(TakenTrueBranch ? NumTimesTakenTrueDecision + : NumTimesTakenFalseDecision, + CurrExpr); + return nullptr; } @@ -110,30 +144,26 @@ class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor { void trySuppressReport(PathSensitiveBugReport &R, const Stmt *Loop, const Expr *CondSubExpr) const { - auto NumEagerlyTrue = EagerlyAssumedTrue.lookup(CondSubExpr); - auto NumEagerlyFalse = EagerlyAssumedFalse.lookup(CondSubExpr); + unsigned NumTimesTakenLoopBody = + NumTimesTakenTrueDecision.lookup(CondSubExpr); + unsigned NumTimesNotTakenLoopBody = + NumTimesTakenFalseDecision.lookup(CondSubExpr); + + // Adjust for do-while loops, where the loop body is always taken. + if (isa<DoStmt>(Loop)) + NumTimesTakenLoopBody += 1; // Suppress the report if it avoided a loop with an eager assumption. - if (NumEagerlyTrue == 0 && NumEagerlyFalse == 1) { + if (NumTimesTakenLoopBody == 0 && NumTimesNotTakenLoopBody == 1) { + // llvm::errs() << "eagerly decided never taking the loop body\n"; R.markInvalid("eagerly decided never taking the loop body", CondSubExpr); return; } - // Account for do-while loops where the body is already "taken" - // unconditionally for the first round. - if (isa<DoStmt>(Loop)) { - // Suppress the report if we have taken the loop body for the second time - // with an eager assumption. - if (NumEagerlyTrue > 1 && NumEagerlyFalse == 0) { - R.markInvalid("eagerly taken the do-while body at least 2 times", - CondSubExpr); - } - return; - } - // Suppress the report if it iterates with eager assumptions 3 or more // times. - if (NumEagerlyTrue > 2 && NumEagerlyFalse == 0) { + if (NumTimesTakenLoopBody > 2 && NumTimesNotTakenLoopBody == 0) { + // llvm::errs() << "eagerly taken the loop body at least 2 times\n"; R.markInvalid("eagerly taken the loop body at least 2 times", CondSubExpr); } @@ -854,7 +884,7 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, if (Extent) markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug); - BR->addVisitor<SuppressReportsHavingWeakLoopAssumption>(); + BR->addVisitor<SuppressReportsHavingWeakLoopAssumption>(*BR); C.emitReport(std::move(BR)); } diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c index fde53d148ba442..1b62b2eed4ee84 100644 --- a/clang/test/Analysis/out-of-bounds.c +++ b/clang/test/Analysis/out-of-bounds.c @@ -279,7 +279,7 @@ void loop_suppress_in_third_iteration_logical_and(int len, int flag) { // short-circuiting operator '&&'. // I have seen a real-world FP that looks like this, but it is much rarer // than the basic setup. - buf[i] = 1; // expected-warning{{Out of bound access to memory}} + buf[i] = 1; // no-warning } } @@ -311,7 +311,7 @@ void loop_suppress_in_third_iteration_cast(int len) { // assumption. There are already many differences between analysis with and // without eager assumptions, so it would be pointless to write more // complicated code to eliminate these rare differences. - buf[i] = 1; // eagerlyassume-warning{{Out of bound access to memory}} + buf[i] = 1; // no-warning } } >From 26f02f3f5824d0c78132e46ace2b3d41f8dd68e2 Mon Sep 17 00:00:00 2001 From: Balazs Benics <benicsbal...@gmail.com> Date: Sat, 5 Oct 2024 16:59:55 +0200 Subject: [PATCH 5/5] Revert "Use the eager assumption tags only if the decision was ambiguous" This reverts commit c429df63af6cdeec7e721e8f9c322f1ac6ba3b3d. --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 15 ++++-------- clang/test/Analysis/assuming-unsigned-ge-0.c | 5 +++- clang/test/Analysis/cast-value-notes.cpp | 20 +++++++++------- clang/test/Analysis/cast-value-state-dump.cpp | 5 ++-- .../std-c-library-functions-arg-constraints.c | 24 ++++++++++++------- 5 files changed, 40 insertions(+), 29 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 62066632db87d0..b1919d7027cf4d 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3771,29 +3771,24 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, SVal V = state->getSVal(Ex, Pred->getLocationContext()); std::optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>(); if (SEV && SEV->isExpression()) { + const std::pair<const ProgramPointTag *, const ProgramPointTag*> &tags = + geteagerlyAssumeBinOpBifurcationTags(); + ProgramStateRef StateTrue, StateFalse; std::tie(StateTrue, StateFalse) = state->assume(*SEV); - const ProgramPointTag *EagerTrueTag = nullptr; - const ProgramPointTag *EagerFalseTag = nullptr; - - // Use the eager assumption tags if the choice was ambiguous. - if (StateTrue && StateFalse) - std::tie(EagerTrueTag, EagerFalseTag) = - geteagerlyAssumeBinOpBifurcationTags(); - // First assume that the condition is true. if (StateTrue) { SVal Val = svalBuilder.makeIntVal(1U, Ex->getType()); StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val); - Bldr.generateNode(Ex, Pred, StateTrue, EagerTrueTag); + Bldr.generateNode(Ex, Pred, StateTrue, tags.first); } // Next, assume that the condition is false. if (StateFalse) { SVal Val = svalBuilder.makeIntVal(0U, Ex->getType()); StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val); - Bldr.generateNode(Ex, Pred, StateFalse, EagerFalseTag); + Bldr.generateNode(Ex, Pred, StateFalse, tags.second); } } } diff --git a/clang/test/Analysis/assuming-unsigned-ge-0.c b/clang/test/Analysis/assuming-unsigned-ge-0.c index 18e637a6e5310e..553e68cb96c6bd 100644 --- a/clang/test/Analysis/assuming-unsigned-ge-0.c +++ b/clang/test/Analysis/assuming-unsigned-ge-0.c @@ -2,7 +2,10 @@ // RUN: -analyzer-checker=core -verify %s int assuming_unsigned_ge_0(unsigned arg) { - // expected-note@+2 {{'arg' is >= 0}} + // TODO This testcase demonstrates the current incorrect behavior of Clang + // Static Analyzer: here 'arg' is unsigned, so "arg >= 0" is not a fresh + // assumption, but it still appears in the diagnostics as if it's fresh: + // expected-note@+2 {{Assuming 'arg' is >= 0}} // expected-note@+1 {{Taking false branch}} if (arg < 0) return 0; diff --git a/clang/test/Analysis/cast-value-notes.cpp b/clang/test/Analysis/cast-value-notes.cpp index 0235404e73adf6..7ee224dc6e5d8f 100644 --- a/clang/test/Analysis/cast-value-notes.cpp +++ b/clang/test/Analysis/cast-value-notes.cpp @@ -168,8 +168,9 @@ void evalNonNullParamNonNullReturnReference(const Shape &S) { // expected-note@-2 {{Taking true branch}} (void)(1 / !C); - // expected-note@-1 {{Division by zero}} - // expected-warning@-2 {{Division by zero}} + // expected-note@-1 {{'C' is non-null}} + // expected-note@-2 {{Division by zero}} + // expected-warning@-3 {{Division by zero}} } } @@ -225,8 +226,9 @@ void evalNonNullParamNonNullReturn(const Shape *S) { // expected-note@-2 {{Taking true branch}} (void)(1 / !C); - // expected-note@-1 {{Division by zero}} - // expected-warning@-2 {{Division by zero}} + // expected-note@-1 {{'C' is non-null}} + // expected-note@-2 {{Division by zero}} + // expected-warning@-3 {{Division by zero}} } } @@ -241,8 +243,9 @@ void evalNonNullParamNullReturn(const Shape *S) { // expected-note@-4 {{Taking true branch}} (void)(1 / !T); - // expected-note@-1 {{Division by zero}} - // expected-warning@-2 {{Division by zero}} + // expected-note@-1 {{'T' is non-null}} + // expected-note@-2 {{Division by zero}} + // expected-warning@-3 {{Division by zero}} } } @@ -262,8 +265,9 @@ void evalZeroParamNonNullReturnPointer(const Shape *S) { // expected-note@-2 {{'C' initialized here}} (void)(1 / !C); - // expected-note@-1 {{Division by zero}} - // expected-warning@-2 {{Division by zero}} + // expected-note@-1 {{'C' is non-null}} + // expected-note@-2 {{Division by zero}} + // expected-warning@-3 {{Division by zero}} } void evalZeroParamNonNullReturn(const Shape &S) { diff --git a/clang/test/Analysis/cast-value-state-dump.cpp b/clang/test/Analysis/cast-value-state-dump.cpp index 97f3b2236db087..07fd7abd848ab7 100644 --- a/clang/test/Analysis/cast-value-state-dump.cpp +++ b/clang/test/Analysis/cast-value-state-dump.cpp @@ -40,7 +40,8 @@ void evalNonNullParamNonNullReturn(const Shape *S) { // CHECK-NEXT: ] } (void)(1 / !C); - // expected-note@-1 {{Division by zero}} - // expected-warning@-2 {{Division by zero}} + // expected-note@-1 {{'C' is non-null}} + // expected-note@-2 {{Division by zero}} + // expected-warning@-3 {{Division by zero}} } diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.c b/clang/test/Analysis/std-c-library-functions-arg-constraints.c index a6e0ad15129f60..0b817dda98c727 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints.c +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.c @@ -42,7 +42,8 @@ void test_alnum_symbolic(int x) { // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} \ - // bugpath-note{{Left side of '&&' is true}} + // bugpath-note{{Left side of '&&' is true}} \ + // bugpath-note{{'x' is <= 255}} } void test_alnum_symbolic2(int x) { @@ -75,7 +76,8 @@ void test_toupper_symbolic(int x) { // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} \ - // bugpath-note{{Left side of '&&' is true}} + // bugpath-note{{Left side of '&&' is true}} \ + // bugpath-note{{'x' is <= 255}} } void test_toupper_symbolic2(int x) { @@ -108,7 +110,8 @@ void test_tolower_symbolic(int x) { // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} \ - // bugpath-note{{Left side of '&&' is true}} + // bugpath-note{{Left side of '&&' is true}} \ + // bugpath-note{{'x' is <= 255}} } void test_tolower_symbolic2(int x) { @@ -141,7 +144,8 @@ void test_toascii_symbolic(int x) { // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} \ - // bugpath-note{{Left side of '&&' is true}} + // bugpath-note{{Left side of '&&' is true}} \ + // bugpath-note{{'x' is <= 255}} } void test_toascii_symbolic2(int x) { @@ -169,7 +173,8 @@ void test_notnull_symbolic(FILE *fp, int *buf) { clang_analyzer_eval(buf != 0); // \ // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ - // bugpath-note{{TRUE}} + // bugpath-note{{TRUE}} \ + // bugpath-note{{'buf' is not equal to null}} } void test_notnull_symbolic2(FILE *fp, int *buf) { if (!buf) // bugpath-note{{Assuming 'buf' is null}} \ @@ -213,7 +218,8 @@ void test_notnull_buffer_3(void *buf) { clang_analyzer_eval(buf != 0); // \ // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ - // bugpath-note{{TRUE}} + // bugpath-note{{TRUE}} \ + // bugpath-note{{'buf' is not equal to null}} } void test_no_node_after_bug(FILE *fp, size_t size, size_t n, void *buf) { @@ -293,7 +299,8 @@ void test_buf_size_symbolic(int s) { clang_analyzer_eval(s <= 3); // \ // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ - // bugpath-note{{TRUE}} + // bugpath-note{{TRUE}} \ + // bugpath-note{{'s' is <= 3}} } void test_buf_size_symbolic_and_offset(int s) { char buf[3]; @@ -301,7 +308,8 @@ void test_buf_size_symbolic_and_offset(int s) { clang_analyzer_eval(s <= 2); // \ // report-warning{{TRUE}} \ // bugpath-warning{{TRUE}} \ - // bugpath-note{{TRUE}} + // bugpath-note{{TRUE}} \ + // bugpath-note{{'s' is <= 2}} } int __buf_size_arg_constraint_mul(const void *, size_t, size_t); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits