https://github.com/bazuzi created https://github.com/llvm/llvm-project/pull/175980
…sing during (de)serialization. Stack overflows have been seen in practice for very large formulas. >From 93b0b1548f2d181b6e97f7f0f23d5c7fb3778e0d Mon Sep 17 00:00:00 2001 From: Samira Bakon <[email protected]> Date: Wed, 14 Jan 2026 11:08:19 -0500 Subject: [PATCH] [clang][dataflow] Manage stacks of formulas directly instead of recursing during (de)serialization. Stack overflows have been seen in practice for very large formulas. --- .../FlowSensitive/DataflowAnalysisContext.cpp | 47 ++-- .../FlowSensitive/FormulaSerialization.cpp | 209 +++++++++++------- .../Analysis/FlowSensitive/FormulaTest.cpp | 7 +- 3 files changed, 171 insertions(+), 92 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index 4196d6821c184..6e3a270e6bed6 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -18,6 +18,8 @@ #include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" #include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/CommandLine.h" @@ -27,6 +29,7 @@ #include "llvm/Support/raw_ostream.h" #include <cassert> #include <memory> +#include <stack> #include <string> #include <utility> #include <vector> @@ -261,23 +264,33 @@ void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( static void getReferencedAtoms(const Formula &F, llvm::DenseSet<dataflow::Atom> &Refs) { - switch (F.kind()) { - case Formula::AtomRef: - Refs.insert(F.getAtom()); - break; - case Formula::Literal: - break; - case Formula::Not: - getReferencedAtoms(*F.operands()[0], Refs); - break; - case Formula::And: - case Formula::Or: - case Formula::Implies: - case Formula::Equal: - ArrayRef<const Formula *> Operands = F.operands(); - getReferencedAtoms(*Operands[0], Refs); - getReferencedAtoms(*Operands[1], Refs); - break; + // Avoid recursion to avoid stack overflows from very large formulas. + // The shape of the tree structure for very large formulas is such that there + // are at most 2 children from any node, but there may be many generations. + std::stack<const Formula *> WorkList; + WorkList.push(&F); + + while (!WorkList.empty()) { + const Formula *Current = WorkList.top(); + WorkList.pop(); + switch (Current->kind()) { + case Formula::AtomRef: + Refs.insert(Current->getAtom()); + break; + case Formula::Literal: + break; + case Formula::Not: + WorkList.push(Current->operands()[0]); + break; + case Formula::And: + case Formula::Or: + case Formula::Implies: + case Formula::Equal: + ArrayRef<const Formula *> Operands = Current->operands(); + WorkList.push(Operands[0]); + WorkList.push(Operands[1]); + break; + } } } diff --git a/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp b/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp index df15a1d6eaadb..d11129f2f413e 100644 --- a/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp +++ b/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp @@ -11,12 +11,13 @@ #include "clang/Analysis/FlowSensitive/Formula.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> +#include <cstddef> +#include <stack> +#include <vector> namespace clang::dataflow { @@ -42,104 +43,164 @@ static char compactSigil(Formula::Kind K) { llvm_unreachable("unhandled formula kind"); } +// Avoids recursion to avoid stack overflows from very large formulas. void serializeFormula(const Formula &F, llvm::raw_ostream &OS) { - switch (Formula::numOperands(F.kind())) { - case 0: - switch (F.kind()) { - case Formula::AtomRef: - OS << F.getAtom(); + std::stack<const Formula *> WorkList; + WorkList.push(&F); + while (!WorkList.empty()) { + const Formula *Current = WorkList.top(); + WorkList.pop(); + switch (Formula::numOperands(Current->kind())) { + case 0: + switch (Current->kind()) { + case Formula::AtomRef: + OS << Current->getAtom(); + break; + case Formula::Literal: + OS << (Current->literal() ? 'T' : 'F'); + break; + default: + llvm_unreachable("unhandled formula kind"); + } break; - case Formula::Literal: - OS << (F.literal() ? 'T' : 'F'); + case 1: + OS << compactSigil(Current->kind()); + WorkList.push(Current->operands()[0]); + break; + case 2: + OS << compactSigil(Current->kind()); + WorkList.push(Current->operands()[1]); + WorkList.push(Current->operands()[0]); break; default: - llvm_unreachable("unhandled formula kind"); + llvm_unreachable("unhandled formula arity"); } - break; - case 1: - OS << compactSigil(F.kind()); - serializeFormula(*F.operands()[0], OS); - break; - case 2: - OS << compactSigil(F.kind()); - serializeFormula(*F.operands()[0], OS); - serializeFormula(*F.operands()[1], OS); - break; - default: - llvm_unreachable("unhandled formula arity"); } } -static llvm::Expected<const Formula *> -parsePrefix(llvm::StringRef &Str, Arena &A, - llvm::DenseMap<unsigned, Atom> &AtomMap) { - if (Str.empty()) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "unexpected end of input"); - - char Prefix = Str[0]; - Str = Str.drop_front(); - - switch (Prefix) { - case 'T': - return &A.makeLiteral(true); - case 'F': - return &A.makeLiteral(false); - case 'V': { - unsigned AtomID; - if (Str.consumeInteger(10, AtomID)) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "expected atom id"); - auto [It, Inserted] = AtomMap.try_emplace(AtomID, Atom()); - if (Inserted) - It->second = A.makeAtom(); - return &A.makeAtomRef(It->second); - } - case '!': { - auto OperandOrErr = parsePrefix(Str, A, AtomMap); - if (!OperandOrErr) - return OperandOrErr.takeError(); - return &A.makeNot(**OperandOrErr); - } - case '&': - case '|': - case '>': - case '=': { - auto LeftOrErr = parsePrefix(Str, A, AtomMap); - if (!LeftOrErr) - return LeftOrErr.takeError(); +struct Operation { + Operation(Formula::Kind Kind) : Kind(Kind) {} + const Formula::Kind Kind; + const unsigned ExpectedNumOperands = Formula::numOperands(Kind); + std::vector<const Formula *> Operands; +}; - auto RightOrErr = parsePrefix(Str, A, AtomMap); - if (!RightOrErr) - return RightOrErr.takeError(); +// Avoids recursion to avoid stack overflows from very large formulas. +static llvm::Expected<const Formula *> +parseFormulaInternal(llvm::StringRef &Str, Arena &A, + llvm::DenseMap<unsigned, Atom> &AtomMap) { + std::stack<Operation> ActiveOperations; - const Formula &LHS = **LeftOrErr; - const Formula &RHS = **RightOrErr; + while (!Str.empty()) { + char Prefix = Str[0]; + Str = Str.drop_front(); switch (Prefix) { + // Terminals + case 'T': + case 'F': + case 'V': { + const Formula *TerminalFormula; + switch (Prefix) { + case 'T': + TerminalFormula = &A.makeLiteral(true); + break; + case 'F': + TerminalFormula = &A.makeLiteral(false); + break; + case 'V': { + unsigned AtomID; + if (Str.consumeInteger(10, AtomID)) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "expected atom id"); + auto [It, Inserted] = AtomMap.try_emplace(AtomID, Atom()); + if (Inserted) + It->second = A.makeAtom(); + TerminalFormula = &A.makeAtomRef(It->second); + break; + } + default: + llvm_unreachable("unexpected terminal character"); + } + + if (ActiveOperations.empty()) { + return TerminalFormula; + } + Operation *Op = &ActiveOperations.top(); + Op->Operands.push_back(TerminalFormula); + while (Op->Operands.size() == Op->ExpectedNumOperands) { + const Formula *OpFormula = nullptr; + switch (Op->Kind) { + case Formula::Kind::Not: + OpFormula = &A.makeNot(*Op->Operands[0]); + break; + case Formula::Kind::And: + OpFormula = &A.makeAnd(*Op->Operands[0], *Op->Operands[1]); + break; + case Formula::Kind::Or: + OpFormula = &A.makeOr(*Op->Operands[0], *Op->Operands[1]); + break; + case Formula::Kind::Implies: + OpFormula = &A.makeImplies(*Op->Operands[0], *Op->Operands[1]); + break; + case Formula::Kind::Equal: + OpFormula = &A.makeEquals(*Op->Operands[0], *Op->Operands[1]); + break; + default: + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "only unary and binary operations are " + "expected, but got Formula::Kind %u", + Op->Kind); + } + ActiveOperations.pop(); + if (ActiveOperations.empty()) + return OpFormula; + Op = &ActiveOperations.top(); + Op->Operands.push_back(OpFormula); + } + if (Op->Operands.size() > Op->ExpectedNumOperands) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "too many operands"); + } + break; + } + case '!': + ActiveOperations.emplace(Formula::Kind::Not); + break; case '&': - return &A.makeAnd(LHS, RHS); + ActiveOperations.emplace(Formula::Kind::And); + break; case '|': - return &A.makeOr(LHS, RHS); + ActiveOperations.emplace(Formula::Kind::Or); + break; case '>': - return &A.makeImplies(LHS, RHS); + ActiveOperations.emplace(Formula::Kind::Implies); + break; case '=': - return &A.makeEquals(LHS, RHS); + ActiveOperations.emplace(Formula::Kind::Equal); + break; default: - llvm_unreachable("unexpected binary op"); + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "unexpected prefix character: %c", Prefix); } } - default: + + if (ActiveOperations.empty()) return llvm::createStringError(llvm::inconvertibleErrorCode(), - "unexpected prefix character: %c", Prefix); - } + "unexpected end of input"); + + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "not enough operands: expected at least %u, but got %lu", + ActiveOperations.top().ExpectedNumOperands, + ActiveOperations.top().Operands.size()); } llvm::Expected<const Formula *> parseFormula(llvm::StringRef Str, Arena &A, llvm::DenseMap<unsigned, Atom> &AtomMap) { size_t OriginalSize = Str.size(); - llvm::Expected<const Formula *> F = parsePrefix(Str, A, AtomMap); + llvm::Expected<const Formula *> F = parseFormulaInternal(Str, A, AtomMap); if (!F) return F.takeError(); if (!Str.empty()) diff --git a/clang/unittests/Analysis/FlowSensitive/FormulaTest.cpp b/clang/unittests/Analysis/FlowSensitive/FormulaTest.cpp index db9c02854c7fa..d501a00edfe7f 100644 --- a/clang/unittests/Analysis/FlowSensitive/FormulaTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/FormulaTest.cpp @@ -9,10 +9,13 @@ #include "clang/Analysis/FlowSensitive/Formula.h" #include "clang/Analysis/FlowSensitive/Arena.h" #include "clang/Analysis/FlowSensitive/FormulaSerialization.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include <string> namespace { @@ -94,7 +97,7 @@ class ParseFormulaTest : public ::testing::Test { AtomMap[1] = Atom2; } - // Convenience wrapper for `testParseFormula`. + // Convenience wrapper for `parseFormula`. llvm::Expected<const Formula *> testParseFormula(llvm::StringRef Str) { return parseFormula(Str, A, AtomMap); } @@ -194,6 +197,8 @@ TEST_F(ParseFormulaTest, MalformedFormulaFails) { EXPECT_THAT_EXPECTED(testParseFormula("|V0"), Failed()); EXPECT_THAT_EXPECTED(testParseFormula(">V0"), Failed()); EXPECT_THAT_EXPECTED(testParseFormula("=V0"), Failed()); + // Too many operands + EXPECT_THAT_EXPECTED(testParseFormula("=V0V1V2"), Failed()); } } // namespace _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
