https://github.com/NagyDonat updated 
https://github.com/llvm/llvm-project/pull/119388

From cb9b5ef4d8b0ed8e67276947525d327c547424fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Thu, 28 Nov 2024 17:22:58 +0100
Subject: [PATCH 1/2] [analyzer] Don't assume third iteration in loops

This commit ensures that if the loop condition is opaque (the analyzer
cannot determine whether it's true or false) and there were at least two
iterations, then the analyzer doesn't make the unjustified assumption
that it can enter yet another iteration.

Note that the presence of a loop suggests that the developer thought
that two iterations can happen (otherwise an `if` would've been
sufficient), but it does not imply that the developer expected three or
four iterations -- and in fact there are many false positives where a
loop iterates over a two-element (or three-element) data structure, but
the analyzer cannot understand the loop condition and blindly assumes
that there may be three or more iterations. (In particular, analyzing
the FFMPEG project produces 100+ such false positives.)

Moreover, this provides some performance improvements in the sense that
the analyzer won't waste time on traversing the execution paths with 3
or 4 iterations in a loop (which are very similar to the paths with 2
iterations) and therefore will be able to traverse more branches
elsewhere on the `ExplodedGraph`.

This logic is disabled if the user enables the widen-loops analyzer
option (which is disabled by default), because the "simulate one final
iteration after the invalidation" execution path would be suppressed by
the "exit the loop if the loop condition is opaque and there were at
least two iterations" logic. If we want to support loop widening, we
would need to create a follow-up commit which ensures that it "plays
nicely" with this logic.
---
 .../Core/PathSensitive/CoreEngine.h           |  8 +++
 .../Core/PathSensitive/ExprEngine.h           | 18 +++---
 clang/lib/StaticAnalyzer/Core/CoreEngine.cpp  | 27 ++++++++-
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  | 55 ++++++++++++++++---
 clang/test/Analysis/loop-unrolling.cpp        | 35 +++++++-----
 clang/test/Analysis/misc-ps-region-store.m    | 31 ++++++++---
 6 files changed, 133 insertions(+), 41 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h 
b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
index a6d05a3ac67b4e..80b79fd4e928f8 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
@@ -126,6 +126,14 @@ class CoreEngine {
   ExplodedNode *generateCallExitBeginNode(ExplodedNode *N,
                                           const ReturnStmt *RS);
 
+  /// Helper function called by `HandleBranch()`. If the currently handled
+  /// branch corresponds to a loop, this returns the number of already
+  /// completed iterations in that loop, otherwise the return value is
+  /// `std::nullopt`. Note that this counts _all_ earlier iterations, including
+  /// ones that were performed within an earlier iteration of an outer loop.
+  std::optional<unsigned> getCompletedIterationCount(const CFGBlock *B,
+                                                     ExplodedNode *Pred) const;
+
 public:
   /// Construct a CoreEngine object to analyze the provided CFG.
   CoreEngine(ExprEngine &exprengine,
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h 
b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index 8c7493e27fcaa6..20c446e33ef9a5 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -321,14 +321,14 @@ class ExprEngine {
                                NodeBuilderWithSinks &nodeBuilder,
                                ExplodedNode *Pred);
 
-  /// ProcessBranch - Called by CoreEngine.  Used to generate successor
-  ///  nodes by processing the 'effects' of a branch condition.
-  void processBranch(const Stmt *Condition,
-                     NodeBuilderContext& BuilderCtx,
-                     ExplodedNode *Pred,
-                     ExplodedNodeSet &Dst,
-                     const CFGBlock *DstT,
-                     const CFGBlock *DstF);
+  /// ProcessBranch - Called by CoreEngine. Used to generate successor nodes by
+  /// processing the 'effects' of a branch condition. If the branch condition
+  /// is a loop condition, IterationsCompletedInLoop is the number of completed
+  /// iterations (otherwise it's std::nullopt).
+  void processBranch(const Stmt *Condition, NodeBuilderContext &BuilderCtx,
+                     ExplodedNode *Pred, ExplodedNodeSet &Dst,
+                     const CFGBlock *DstT, const CFGBlock *DstF,
+                     std::optional<unsigned> IterationsCompletedInLoop);
 
   /// Called by CoreEngine.
   /// Used to generate successor nodes for temporary destructors depending
@@ -588,6 +588,8 @@ class ExprEngine {
   void evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst, ExplodedNodeSet &Src,
                                     const Expr *Ex);
 
+  bool didEagerlyAssumeBifurcateAt(ProgramStateRef State, const Expr *Ex) 
const;
+
   static std::pair<const ProgramPointTag *, const ProgramPointTag *>
   getEagerlyAssumeBifurcationTags();
 
diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp 
b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
index 67b7d30853d9de..775a22e18c6199 100644
--- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -444,7 +444,8 @@ void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt 
*Term,
   NodeBuilderContext Ctx(*this, B, Pred);
   ExplodedNodeSet Dst;
   ExprEng.processBranch(Cond, Ctx, Pred, Dst, *(B->succ_begin()),
-                       *(B->succ_begin() + 1));
+                        *(B->succ_begin() + 1),
+                        getCompletedIterationCount(B, Pred));
   // Enqueue the new frontier onto the worklist.
   enqueue(Dst);
 }
@@ -591,6 +592,30 @@ ExplodedNode 
*CoreEngine::generateCallExitBeginNode(ExplodedNode *N,
   return isNew ? Node : nullptr;
 }
 
+std::optional<unsigned>
+CoreEngine::getCompletedIterationCount(const CFGBlock *B,
+                                       ExplodedNode *Pred) const {
+  const LocationContext *LC = Pred->getLocationContext();
+  BlockCounter Counter = WList->getBlockCounter();
+  unsigned BlockCount =
+      Counter.getNumVisited(LC->getStackFrame(), B->getBlockID());
+
+  const Stmt *Term = B->getTerminatorStmt();
+  if (isa<ForStmt, WhileStmt, CXXForRangeStmt>(Term)) {
+    assert(BlockCount >= 1 &&
+           "Block count of currently analyzed block must be >= 1");
+    return BlockCount - 1;
+  }
+  if (isa<DoStmt>(Term)) {
+    // In a do-while loop one iteration happens before the first evaluation of
+    // the loop condition, so we don't subtract one.
+    return BlockCount;
+  }
+  // ObjCForCollectionStmt is skipped intentionally because the current
+  // application of the iteration counts is not relevant for it.
+  return std::nullopt;
+}
+
 void CoreEngine::enqueue(ExplodedNodeSet &Set) {
   for (const auto I : Set)
     WList->enqueue(I);
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp 
b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index b46cd9fe86fc11..db31206a13c36b 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2753,12 +2753,10 @@ assumeCondition(const Stmt *Condition, ExplodedNode *N) 
{
   return State->assume(V);
 }
 
-void ExprEngine::processBranch(const Stmt *Condition,
-                               NodeBuilderContext& BldCtx,
-                               ExplodedNode *Pred,
-                               ExplodedNodeSet &Dst,
-                               const CFGBlock *DstT,
-                               const CFGBlock *DstF) {
+void ExprEngine::processBranch(
+    const Stmt *Condition, NodeBuilderContext &BldCtx, ExplodedNode *Pred,
+    ExplodedNodeSet &Dst, const CFGBlock *DstT, const CFGBlock *DstF,
+    std::optional<unsigned> IterationsCompletedInLoop) {
   assert((!Condition || !isa<CXXBindTemporaryExpr>(Condition)) &&
          "CXXBindTemporaryExprs are handled by processBindTemporary.");
   const LocationContext *LCtx = Pred->getLocationContext();
@@ -2801,8 +2799,35 @@ void ExprEngine::processBranch(const Stmt *Condition,
     if (StTrue && StFalse)
       assert(!isa<ObjCForCollectionStmt>(Condition));
 
-    if (StTrue)
-      Builder.generateNode(StTrue, true, PredN);
+    if (StTrue) {
+      // If we are processing a loop condition where two iterations have
+      // already been completed and the the false branch is also feasible, then
+      // don't assume a third iteration, because it is a redundant execution
+      // path (unlikely to be different from earlier loop exits) and can cause
+      // false positives if e.g. the loop iterates over a two-element structure
+      // with an opaque condition.
+      //
+      // The iteration count "2" is hardcoded because it's the natural limit:
+      // * the fact that the programmer wrote a loop (and not just an `if`)
+      //   implies that they thought that the loop body may be executed twice;
+      // * however, there are situations where the programmer knows that there
+      //   are at most two iterations, but writes a loop that appears to be
+      //   generic, because there is no special syntax for "loop with at most
+      //   two iterations". (This pattern is common in FFMPEG and appears in
+      //   many other projects as well.)
+      bool CompletedTwoIterations = IterationsCompletedInLoop.value_or(0) >= 2;
+      bool FalseAlsoFeasible =
+          StFalse ||
+          didEagerlyAssumeBifurcateAt(PrevState, dyn_cast<Expr>(Condition));
+      bool SkipTrueBranch = CompletedTwoIterations && FalseAlsoFeasible;
+
+      // FIXME: This "don't assume third iteration" heuristic partially
+      // conflicts with the widen-loop analysis option (which is off by
+      // default). If we intend to support and stabilize the loop widening,
+      // we'll need to ensure that it 'plays nicely' with this logic.
+      if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops)
+        Builder.generateNode(StTrue, true, PredN);
+    }
 
     if (StFalse)
       Builder.generateNode(StFalse, false, PredN);
@@ -3724,6 +3749,10 @@ ExprEngine::getEagerlyAssumeBifurcationTags() {
   return std::make_pair(&TrueTag, &FalseTag);
 }
 
+/// The last expression where EagerlyAssume produced two transitions (i.e. it
+/// activated and the true and false case were both feasible).
+REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeBifurcationAt, const Expr *)
+
 void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
                                               ExplodedNodeSet &Src,
                                               const Expr *Ex) {
@@ -3746,6 +3775,11 @@ void 
ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
 
       auto [StateTrue, StateFalse] = State->assume(*SEV);
 
+      if (StateTrue && StateFalse) {
+        StateTrue = StateTrue->set<LastEagerlyAssumeBifurcationAt>(Ex);
+        StateFalse = StateFalse->set<LastEagerlyAssumeBifurcationAt>(Ex);
+      }
+
       // First assume that the condition is true.
       if (StateTrue) {
         SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
@@ -3763,6 +3797,11 @@ void 
ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
   }
 }
 
+bool ExprEngine::didEagerlyAssumeBifurcateAt(ProgramStateRef State,
+                                             const Expr *Ex) const {
+  return Ex && State->get<LastEagerlyAssumeBifurcationAt>() == Ex;
+}
+
 void ExprEngine::VisitGCCAsmStmt(const GCCAsmStmt *A, ExplodedNode *Pred,
                                  ExplodedNodeSet &Dst) {
   StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
diff --git a/clang/test/Analysis/loop-unrolling.cpp 
b/clang/test/Analysis/loop-unrolling.cpp
index 66a828abfb5133..bf05a7739ce48c 100644
--- a/clang/test/Analysis/loop-unrolling.cpp
+++ b/clang/test/Analysis/loop-unrolling.cpp
@@ -63,7 +63,7 @@ int simple_no_unroll1() {
   int a[9];
   int k = 42;
   for (int i = 0; i < 9; i++) {
-    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    clang_analyzer_numTimesReached(); // expected-warning {{2}}
     a[i] = 42;
     foo(i);
   }
@@ -76,7 +76,7 @@ int simple_no_unroll2() {
   int k = 42;
   int i;
   for (i = 0; i < 9; i++) {
-    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    clang_analyzer_numTimesReached(); // expected-warning {{2}}
     a[i] = 42;
     i += getNum();
   }
@@ -309,9 +309,9 @@ int nested_inner_unrolled() {
   int k = 42;
   int j = 0;
   for (int i = 0; i < getNum(); i++) {
-    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    clang_analyzer_numTimesReached(); // expected-warning {{2}}
     for (j = 0; j < 8; ++j) {
-      clang_analyzer_numTimesReached(); // expected-warning {{32}}
+      clang_analyzer_numTimesReached(); // expected-warning {{16}}
       a[j] = 22;
     }
     a[i] = 42;
@@ -346,11 +346,7 @@ int simple_known_bound_loop() {
 
 int simple_unknown_bound_loop() {
   for (int i = 2; i < getNum(); i++) {
-#ifdef DFS
-    clang_analyzer_numTimesReached(); // expected-warning {{16}}
-#else
     clang_analyzer_numTimesReached(); // expected-warning {{8}}
-#endif
   }
   return 0;
 }
@@ -368,11 +364,7 @@ int nested_inlined_unroll1() {
 int nested_inlined_no_unroll1() {
   int k;
   for (int i = 0; i < 9; i++) {
-#ifdef DFS
-    clang_analyzer_numTimesReached(); // expected-warning {{18}}
-#else
-    clang_analyzer_numTimesReached(); // expected-warning {{14}}
-#endif
+    clang_analyzer_numTimesReached(); // expected-warning {{10}}
     k = simple_unknown_bound_loop();  // reevaluation without inlining, splits 
the state as well
   }
   int a = 22 / k; // no-warning
@@ -475,9 +467,13 @@ int num_steps_over_limit2() {
 
 int num_steps_on_limit3() {
   for (int i = 0; i < getNum(); i++) {
-    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    clang_analyzer_numTimesReached(); // expected-warning {{2}}
     for (int j = 0; j < 32; j++) {
-      clang_analyzer_numTimesReached(); // expected-warning {{128}}
+      // Here the loop unrollig logic calculates with four potential iterations
+      // in the outer loop where it cannot determine the iteration count in
+      // advance; but after two loops the analyzer conservatively assumes that
+      // the (still opaque) loop condition is false.
+      clang_analyzer_numTimesReached(); // expected-warning {{64}}
     }
   }
   return 0;
@@ -493,6 +489,15 @@ int num_steps_over_limit3() {
   return 0;
 }
 
+int num_steps_on_limit4() {
+  for (int i = 0; i < 4; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    for (int j = 0; j < 32; j++) {
+      clang_analyzer_numTimesReached(); // expected-warning {{128}}
+    }
+  }
+  return 0;
+}
 
 void pr34943() {
   for (int i = 0; i < 6L; ++i) {
diff --git a/clang/test/Analysis/misc-ps-region-store.m 
b/clang/test/Analysis/misc-ps-region-store.m
index 668b5ffd7001a6..b0a04b836efb99 100644
--- a/clang/test/Analysis/misc-ps-region-store.m
+++ b/clang/test/Analysis/misc-ps-region-store.m
@@ -910,13 +910,13 @@ void pr6302(id x, Class y) {
 
 
//===----------------------------------------------------------------------===//
 // Specially handle global variables that are declared constant.  In the
-// example below, this forces the loop to take exactly 2 iterations.
+// example below, this forces the loop to take exactly 1 iterations.
 
//===----------------------------------------------------------------------===//
 
-const int pr6288_L_N = 2;
+const int pr6288_L_N = 1;
 void pr6288_(void) {
-  int x[2];
-  int *px[2];
+  int x[1];
+  int *px[1];
   int i;
   for (i = 0; i < pr6288_L_N; i++)
     px[i] = &x[i];
@@ -924,8 +924,8 @@ void pr6288_(void) {
 }
 
 void pr6288_pos(int z) {
-  int x[2];
-  int *px[2];
+  int x[1];
+  int *px[1];
   int i;
   for (i = 0; i < z; i++)
     px[i] = &x[i]; // expected-warning{{Access out-of-bound array element 
(buffer overflow)}}
@@ -933,15 +933,28 @@ void pr6288_pos(int z) {
 }
 
 void pr6288_b(void) {
-  const int L_N = 2;
-  int x[2];
-  int *px[2];
+  const int L_N = 1;
+  int x[1];
+  int *px[1];
   int i;
   for (i = 0; i < L_N; i++)
     px[i] = &x[i];
   *(px[0]) = 0; // no-warning
 }
 
+void pr6288_no_third_iter(int z) {
+  int x[2];
+  int *px[2];
+  int i;
+  // If the loop condition is opaque, we assume that there may be two
+  // iterations (becasuse otherwise the loop could be replaced by an if); but
+  // we do not assume that there may be a third iteration. Therefore,
+  // unlike 'pr6288_pos', this testcase does not produce an out-of-bounds 
error.
+  for (i = 0; i < z; i++)
+    px[i] = &x[i];
+  *(px[0]) = 0; // expected-warning{{Dereference of undefined pointer value}}
+}
+
 // A bug in RemoveDeadBindings was causing instance variable bindings to get
 // prematurely pruned from the state.
 @interface Rdar7817800 {

From db451194747293d5e4a32468cdaee6f244322ffb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Fri, 13 Dec 2024 17:03:02 +0100
Subject: [PATCH 2/2] Apply grammar suggestions

Co-authored-by: Balazs Benics <benicsbal...@gmail.com>
---
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp 
b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index db31206a13c36b..f8b13618c2e2dd 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2801,17 +2801,17 @@ void ExprEngine::processBranch(
 
     if (StTrue) {
       // If we are processing a loop condition where two iterations have
-      // already been completed and the the false branch is also feasible, then
-      // don't assume a third iteration, because it is a redundant execution
+      // already been completed and the false branch is also feasible, then
+      // don't assume a third iteration because it is a redundant execution
       // path (unlikely to be different from earlier loop exits) and can cause
       // false positives if e.g. the loop iterates over a two-element structure
       // with an opaque condition.
       //
       // The iteration count "2" is hardcoded because it's the natural limit:
       // * the fact that the programmer wrote a loop (and not just an `if`)
-      //   implies that they thought that the loop body may be executed twice;
+      //   implies that they thought that the loop body might be executed 
twice;
       // * however, there are situations where the programmer knows that there
-      //   are at most two iterations, but writes a loop that appears to be
+      //   are at most two iterations but writes a loop that appears to be
       //   generic, because there is no special syntax for "loop with at most
       //   two iterations". (This pattern is common in FFMPEG and appears in
       //   many other projects as well.)
@@ -2824,7 +2824,7 @@ void ExprEngine::processBranch(
       // FIXME: This "don't assume third iteration" heuristic partially
       // conflicts with the widen-loop analysis option (which is off by
       // default). If we intend to support and stabilize the loop widening,
-      // we'll need to ensure that it 'plays nicely' with this logic.
+      // we must ensure that it 'plays nicely' with this logic.
       if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops)
         Builder.generateNode(StTrue, true, PredN);
     }
@@ -3750,7 +3750,7 @@ ExprEngine::getEagerlyAssumeBifurcationTags() {
 }
 
 /// The last expression where EagerlyAssume produced two transitions (i.e. it
-/// activated and the true and false case were both feasible).
+/// activated and the true and false cases were both feasible).
 REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeBifurcationAt, const Expr *)
 
 void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to