https://github.com/rojer created https://github.com/llvm/llvm-project/pull/177449
None >From 045bdc71851d4d0d53edaf81806813c7658e95cb Mon Sep 17 00:00:00 2001 From: Deomid rojer Ryabkov <[email protected]> Date: Thu, 22 Jan 2026 21:32:43 +0200 Subject: [PATCH 1/4] Fix --- .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 6108931f737d4..4cb2368cdb556 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -1070,6 +1070,57 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, // conjuring an expression instead. SymbolRef LHSSym = lhs.getAsLocSymbol(); SymbolRef RHSSym = rhs.getAsLocSymbol(); + + // Check if one symbol is derived from the other through a pointer field. + // In linked list traversal patterns like `ptr = ptr->next`, the new ptr + // cannot equal the old ptr (assuming acyclic lists). This helps avoid + // false positives in use-after-free detection for list traversal code. + if (LHSSym && RHSSym && (op == BO_EQ || op == BO_NE)) { + // Check if Sym's origin region contains a SymbolicRegion based on Target, + // accessed through a pointer-typed field. This detects patterns like: + // ptr2 = ptr1->next (where ptr2 cannot equal ptr1 for acyclic structures) + auto isDerivedThroughPointerField = [](SymbolRef Sym, + SymbolRef Target) -> bool { + // Get the origin region for this symbol + const MemRegion *SymRegion = Sym->getOriginRegion(); + if (!SymRegion) + return false; + + // Walk up the region hierarchy looking for: + // 1. A pointer-typed FieldRegion (the "next" pointer) + // 2. That is based on a SymbolicRegion of Target + bool foundPointerField = false; + const MemRegion *R = SymRegion; + while (R) { + if (const auto *FR = dyn_cast<FieldRegion>(R)) { + if (FR->getDecl()->getType()->isPointerType()) + foundPointerField = true; + } + if (const auto *SymR = dyn_cast<SymbolicRegion>(R)) { + if (foundPointerField && SymR->getSymbol() == Target) + return true; + // Stop at symbolic regions - don't continue past them + break; + } + if (const auto *SR = dyn_cast<SubRegion>(R)) + R = SR->getSuperRegion(); + else + break; + } + return false; + }; + + if (isDerivedThroughPointerField(LHSSym, RHSSym) || + isDerivedThroughPointerField(RHSSym, LHSSym)) { + // A symbol derived through a pointer field cannot equal its parent + // (assuming acyclic data structures, which is the common case). + if (op == BO_EQ) + return makeTruthVal(false, resultTy); + if (op == BO_NE) + return makeTruthVal(true, resultTy); + } + } + if (LHSSym && RHSSym) return makeNonLoc(LHSSym, op, RHSSym, resultTy); >From 204ced66cfe08d0f8ebe31af84ce3e4936310d17 Mon Sep 17 00:00:00 2001 From: Deomid rojer Ryabkov <[email protected]> Date: Thu, 22 Jan 2026 21:38:48 +0200 Subject: [PATCH 2/4] Test --- .../Analysis/ptr-iter-derived-compare.cpp | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 clang/test/Analysis/ptr-iter-derived-compare.cpp diff --git a/clang/test/Analysis/ptr-iter-derived-compare.cpp b/clang/test/Analysis/ptr-iter-derived-compare.cpp new file mode 100644 index 0000000000000..0e456bd342a59 --- /dev/null +++ b/clang/test/Analysis/ptr-iter-derived-compare.cpp @@ -0,0 +1,47 @@ +// RUN: %clang_analyze_cc1 -std=c++11 \ +// RUN: -analyzer-checker=core,cplusplus.NewDelete \ +// RUN: -verify %s +// expected-no-diagnostics + +// Test that the analyzer correctly determines that a pointer derived through +// a pointer field (like linked list traversal) cannot equal its original value. +// This prevents false positives in patterns like STAILQ_REMOVE. + +// Matches the structure in sys/queue.h STAILQ implementation +struct Foo { + int n; + struct { struct Foo *stqe_next; } next; +}; + +struct FooHead { struct Foo *stqh_first; struct Foo **stqh_last; } +foos = { nullptr, &(foos).stqh_first }; + +// This pattern is from STAILQ_FOREACH + STAILQ_REMOVE usage. +// Previously, the analyzer would falsely report a use-after-free because +// it could not determine that after `fi = fi->next.stqe_next`, the pointer +// `fi` cannot be equal to `foos.stqh_first`. +void test_stailq_foreach_remove() { + bool removed; + do { + removed = false; + for (Foo *fi = foos.stqh_first; fi; fi = fi->next.stqe_next) { + if (fi->n == 1) { + // STAILQ_REMOVE: if fi == head, remove head; else search and remove + if (foos.stqh_first == fi) { + foos.stqh_first = foos.stqh_first->next.stqe_next; + if (foos.stqh_first == nullptr) + foos.stqh_last = &foos.stqh_first; + } else { + Foo *curelm = foos.stqh_first; + while (curelm->next.stqe_next != fi) + curelm = curelm->next.stqe_next; + if ((curelm->next.stqe_next = curelm->next.stqe_next->next.stqe_next) == nullptr) + foos.stqh_last = &curelm->next.stqe_next; + } + delete fi; + removed = true; + break; + } + } + } while (removed); +} >From c24c94362e31f1369fb8d47a7b078ffd272cde60 Mon Sep 17 00:00:00 2001 From: Deomid rojer Ryabkov <[email protected]> Date: Thu, 22 Jan 2026 21:51:33 +0200 Subject: [PATCH 3/4] Repro cases --- test1/.clang-tidy | 2 ++ test1/test.cpp | 46 +++++++++++++++++++++++++++++++++++++++++++ test1/test2.cpp | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 test1/.clang-tidy create mode 100644 test1/test.cpp create mode 100644 test1/test2.cpp diff --git a/test1/.clang-tidy b/test1/.clang-tidy new file mode 100644 index 0000000000000..630b66b66e5c0 --- /dev/null +++ b/test1/.clang-tidy @@ -0,0 +1,2 @@ +Checks: clang-analyzer-cplusplus.NewDelete +WarningsAsErrors: '*' diff --git a/test1/test.cpp b/test1/test.cpp new file mode 100644 index 0000000000000..606673a339e4d --- /dev/null +++ b/test1/test.cpp @@ -0,0 +1,46 @@ +#include <stdio.h> + +struct Foo { + Foo(int _n) : n(_n) {} + + int n = 0; + struct { + struct Foo *stqe_next; + } next = {}; +}; + +struct foos { + struct Foo *stqh_first; + struct Foo **stqh_last; +} foos = {nullptr, &(foos).stqh_first}; + +int main() { + bool removed; + do { + removed = false; + for (Foo *fi = foos.stqh_first; fi; fi = fi->next.stqe_next) { + printf("%p\n", fi); // False UAF reported here + if (fi->n == 1) { + // STAILQ_REMOVE expanded - the if check below is key to the bug + if (foos.stqh_first == fi) { + // STAILQ_REMOVE_HEAD + foos.stqh_first = foos.stqh_first->next.stqe_next; + if (foos.stqh_first == nullptr) + foos.stqh_last = &foos.stqh_first; + } else { + // Remove from middle/end + Foo *curelm = foos.stqh_first; + while (curelm->next.stqe_next != fi) + curelm = curelm->next.stqe_next; + if ((curelm->next.stqe_next = + curelm->next.stqe_next->next.stqe_next) == nullptr) + foos.stqh_last = &curelm->next.stqe_next; + } + delete fi; + removed = true; + break; + } + } + } while (removed); + return 0; +} diff --git a/test1/test2.cpp b/test1/test2.cpp new file mode 100644 index 0000000000000..6b83761bc75ec --- /dev/null +++ b/test1/test2.cpp @@ -0,0 +1,50 @@ +#include <stdio.h> +#include <sys/queue.h> + +struct Foo { + Foo(int _n) : n(_n) {} + + int n = 0; + STAILQ_ENTRY(Foo) next = {}; +}; + +STAILQ_HEAD(foos, Foo) +foos = STAILQ_HEAD_INITIALIZER(foos); + +int main() { + Foo *fi; + + for (int i = 1; i <= 3; i++) { + fi = new Foo(i); + STAILQ_INSERT_TAIL(&foos, fi, next); + } + + STAILQ_FOREACH(fi, &foos, next) { + printf("%d\n", fi->n); + } + printf("\n"); + + bool removed = false; + do { + removed = false; + printf("start\n"); + STAILQ_FOREACH(fi, &foos, next) { + printf("%p\n", fi); // (1) False UAF reported here + if (fi->n == 1) { + STAILQ_REMOVE(&foos, fi, Foo, next); + printf(" delete %p\n", fi); + delete fi; + // fi->n = 2; // (2) THIS is UAF + removed = true; + break; + } + } + } while (removed); + printf("\n"); + + STAILQ_FOREACH(fi, &foos, next) { + printf("%d\n", fi->n); + } + + return 0; +} >From b08d59b158b244048d01cd8205389460c527bec3 Mon Sep 17 00:00:00 2001 From: Deomid rojer Ryabkov <[email protected]> Date: Thu, 22 Jan 2026 21:52:28 +0200 Subject: [PATCH 4/4] Doc --- fix_167586.md | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 fix_167586.md diff --git a/fix_167586.md b/fix_167586.md new file mode 100644 index 0000000000000..32b90b5818570 --- /dev/null +++ b/fix_167586.md @@ -0,0 +1,125 @@ +# Fix for False Positive Use-After-Free in Linked List Traversal + +## Problem Summary + +The Clang Static Analyzer reports false positive "Use of memory after it is released" warnings when analyzing code that traverses linked lists using patterns like BSD's `STAILQ_FOREACH` combined with `STAILQ_REMOVE`. + +## Root Cause + +When comparing two pointers in `SimpleSValBuilder::evalBinOpLL()`, the analyzer could not determine that a pointer derived through a field access (like `ptr->next`) cannot be equal to the original pointer (assuming acyclic data structures). + +### Example Code That Triggered False Positive + +```cpp +struct Foo { + int n; + struct { struct Foo *stqe_next; } next; +}; + +struct FooHead { struct Foo *stqh_first; } foos; + +void remove_from_list() { + bool removed; + do { + removed = false; + for (Foo *fi = foos.stqh_first; fi; fi = fi->next.stqe_next) { + if (fi->n == 1) { + // STAILQ_REMOVE pattern: check if fi is the head + if (foos.stqh_first == fi) { // <-- Analyzer incorrectly assumed this could be true + foos.stqh_first = foos.stqh_first->next.stqe_next; + } + delete fi; + removed = true; + break; + } + } + } while (removed); +} +``` + +### Analysis Path Leading to False Positive + +1. **1st for-iteration**: `fi = foos.stqh_first` (symbol S1), condition `n != 1` is false +2. **Loop increment**: `fi = fi->next.stqe_next` (fi now has derived symbol S2) +3. **2nd for-iteration**: `n == 1` is true +4. **STAILQ_REMOVE check**: Analyzer evaluated `foos.stqh_first == fi` + - LHS: `foos.stqh_first` still has symbol S1 + - RHS: `fi` has symbol S2 (derived from S1 through `->next.stqe_next`) + - **Bug**: Analyzer could not prove S1 ≠ S2, so it explored both branches +5. Taking the true branch incorrectly, the analyzer assumed `fi == foos.stqh_first` +6. After `delete fi` and loop re-entry, reading `foos.stqh_first` appeared to return the deleted pointer + +### Why The Analyzer Got Confused + +When comparing two `SymbolicRegion` pointers where both are based on symbolic values (not concrete heap allocations), the analyzer fell through to creating a `SymSymExpr` and letting the constraint manager handle it. The constraint manager had no information to prove the symbols were different, so `assumeDual()` explored both true and false branches as feasible. + +The key insight is that S2's **origin region** contains a path through a pointer field (`->next.stqe_next`) based on S1's `SymbolicRegion`. For acyclic data structures (the common case), traversing a pointer field yields a different pointer than the original. + +## The Fix + +Added logic in `SimpleSValBuilder::evalBinOpLL()` to detect when one symbol is derived from another through a pointer field access. When this pattern is detected, the comparison `ptr1 == ptr2` returns `false` (and `ptr1 != ptr2` returns `true`). + +### Implementation Details + +The fix adds a lambda `isDerivedThroughPointerField` that: + +1. Gets the origin region of the symbol being checked +2. Walks up the region hierarchy looking for: + - A `FieldRegion` with a pointer type (the "next" pointer) + - A `SymbolicRegion` based on the target symbol +3. If both are found (pointer field leading to target's symbolic region), returns true + +```cpp +auto isDerivedThroughPointerField = [](SymbolRef Sym, SymbolRef Target) -> bool { + const MemRegion *SymRegion = Sym->getOriginRegion(); + if (!SymRegion) + return false; + + bool foundPointerField = false; + const MemRegion *R = SymRegion; + while (R) { + if (const auto *FR = dyn_cast<FieldRegion>(R)) { + if (FR->getDecl()->getType()->isPointerType()) + foundPointerField = true; + } + if (const auto *SymR = dyn_cast<SymbolicRegion>(R)) { + if (foundPointerField && SymR->getSymbol() == Target) + return true; + break; + } + if (const auto *SR = dyn_cast<SubRegion>(R)) + R = SR->getSuperRegion(); + else + break; + } + return false; +}; +``` + +When a derivation through a pointer field is detected, the comparison returns a definite result: +- `BO_EQ` → `false` +- `BO_NE` → `true` + +### Files Changed + +- `clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp`: Added the derivation check in `evalBinOpLL()` +- `clang/test/Analysis/ptr-iter-derived-compare.cpp`: Added regression test + +## Assumptions and Limitations + +The fix assumes **acyclic data structures**, which is the common case for: +- Singly-linked lists (SLIST, STAILQ) +- Doubly-linked lists (LIST, TAILQ) +- Tree structures +- Most pointer-based data structures + +For **circular lists**, this assumption may not hold (a node's `next` pointer could eventually point back to itself). However: +- Circular lists are less common +- The false positives from non-circular lists are more harmful to usability +- The fix only applies to direct derivation (one level of `->next`), not deep chains + +## Testing + +- The fix eliminates false positives on the original test cases (`test1/test.cpp`, `test1/test2.cpp`) +- Added regression test `clang/test/Analysis/ptr-iter-derived-compare.cpp` +- All existing analyzer tests pass (`check-clang-analysis`) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
