llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-analysis @llvm/pr-subscribers-clang Author: None (martinboehme) <details> <summary>Changes</summary> This component can be useful when creating implementations of `Solver`, as some SAT solvers require the input to be in 3-CNF. As part of making `CNFFormula` externally accessible, I have moved some member variables out of it that aren't really part of the representation of a 3-CNF formula and thus live better elsewhere: * `WatchedHead` and `NextWatched` have been moved to `WatchedLiteralsSolverImpl`, as they're part of the specific algorithm used by that SAT solver. * `Atomics` has become an output parameter of `buildCNF()` because it has to do with the relationship between a `CNFFormula` and the set of `Formula`s it is derived from rather than being an integral part of the representation of a 3-CNF formula. I have also made all member variables private and added appropriate accessors. --- Patch is 45.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92401.diff 7 Files Affected: - (modified) clang/docs/tools/clang-formatted-files.txt (+3) - (added) clang/include/clang/Analysis/FlowSensitive/CNFFormula.h (+180) - (modified) clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h (-1) - (modified) clang/lib/Analysis/FlowSensitive/CMakeLists.txt (+1) - (added) clang/lib/Analysis/FlowSensitive/CNFFormula.cpp (+305) - (modified) clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp (+52-416) - (modified) llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn (+1) ``````````diff diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt index eaeadf2656b0b..2b55c014791e1 100644 --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -124,6 +124,7 @@ clang/include/clang/Analysis/Analyses/CFGReachabilityAnalysis.h clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h clang/include/clang/Analysis/FlowSensitive/ASTOps.h +clang/include/clang/Analysis/FlowSensitive/CNFFormula.h clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -626,6 +627,7 @@ clang/tools/libclang/CXCursor.h clang/tools/scan-build-py/tests/functional/src/include/clean-one.h clang/unittests/Analysis/CFGBuildResult.h clang/unittests/Analysis/MacroExpansionContextTest.cpp +clang/unittests/Analysis/FlowSensitive/CNFFormula.cpp clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp clang/unittests/Analysis/FlowSensitive/MapLatticeTest.cpp @@ -637,6 +639,7 @@ clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp clang/unittests/Analysis/FlowSensitive/TestingSupport.h clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +clang/unittests/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp clang/unittests/Analysis/FlowSensitive/WatchedLiteralsSolverTest.cpp clang/unittests/AST/ASTImporterFixtures.cpp clang/unittests/AST/ASTImporterFixtures.h diff --git a/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h b/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h new file mode 100644 index 0000000000000..90b0c591b5df5 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h @@ -0,0 +1,180 @@ +//===- CNFFormula.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A representation of a boolean formula in 3-CNF. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CNFFORMULA_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CNFFORMULA_H + +#include <cstdint> +#include <vector> + +#include "clang/Analysis/FlowSensitive/Formula.h" + +namespace clang { +namespace dataflow { + +/// Boolean variables are represented as positive integers. +using Variable = uint32_t; + +/// A null boolean variable is used as a placeholder in various data structures +/// and algorithms. +constexpr Variable NullVar = 0; + +/// Literals are represented as positive integers. Specifically, for a boolean +/// variable `V` that is represented as the positive integer `I`, the positive +/// literal `V` is represented as the integer `2*I` and the negative literal +/// `!V` is represented as the integer `2*I+1`. +using Literal = uint32_t; + +/// A null literal is used as a placeholder in various data structures and +/// algorithms. +constexpr Literal NullLit = 0; + +/// Clause identifiers are represented as positive integers. +using ClauseID = uint32_t; + +/// A null clause identifier is used as a placeholder in various data structures +/// and algorithms. +constexpr ClauseID NullClause = 0; + +/// Returns the positive literal `V`. +inline constexpr Literal posLit(Variable V) { return 2 * V; } + +/// Returns whether `L` is a positive literal. +inline constexpr bool isPosLit(Literal L) { return 0 == (L & 1); } + +/// Returns whether `L` is a negative literal. +inline constexpr bool isNegLit(Literal L) { return 1 == (L & 1); } + +/// Returns the negative literal `!V`. +inline constexpr Literal negLit(Variable V) { return 2 * V + 1; } + +/// Returns the negated literal `!L`. +inline constexpr Literal notLit(Literal L) { return L ^ 1; } + +/// Returns the variable of `L`. +inline constexpr Variable var(Literal L) { return L >> 1; } + +/// A boolean formula in 3-CNF (conjunctive normal form with at most 3 literals +/// per clause). +class CNFFormula { + /// `LargestVar` is equal to the largest positive integer that represents a + /// variable in the formula. + const Variable LargestVar; + + /// Literals of all clauses in the formula. + /// + /// The element at index 0 stands for the literal in the null clause. It is + /// set to 0 and isn't used. Literals of clauses in the formula start from the + /// element at index 1. + /// + /// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of + /// `Clauses` will be `[0, L1, L2, L2, L3, L4]`. + std::vector<Literal> Clauses; + + /// Start indices of clauses of the formula in `Clauses`. + /// + /// The element at index 0 stands for the start index of the null clause. It + /// is set to 0 and isn't used. Start indices of clauses in the formula start + /// from the element at index 1. + /// + /// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of + /// `ClauseStarts` will be `[0, 1, 3]`. Note that the literals of the first + /// clause always start at index 1. The start index for the literals of the + /// second clause depends on the size of the first clause and so on. + std::vector<size_t> ClauseStarts; + + /// Indicates that we already know the formula is unsatisfiable. + /// During construction, we catch simple cases of conflicting unit-clauses. + bool KnownContradictory; + +public: + explicit CNFFormula(Variable LargestVar); + + /// Adds the `L1 v ... v Ln` clause to the formula. + /// Requirements: + /// + /// `Li` must not be `NullLit`. + /// + /// All literals in the input that are not `NullLit` must be distinct. + void addClause(ArrayRef<Literal> lits); + + /// Returns whether the formula is known to be contradictory. + /// This is the case if any of the clauses is empty. + bool knownContradictory() const { return KnownContradictory; } + + /// Returns the largest variable in the formula. + Variable largestVar() const { return LargestVar; } + + /// Returns the number of clauses in the formula. + /// Valid clause IDs are in the range [1, `numClauses()`]. + ClauseID numClauses() const { return ClauseStarts.size() - 1; } + + /// Returns the number of literals in clause `C`. + size_t clauseSize(ClauseID C) const { + return C == ClauseStarts.size() - 1 ? Clauses.size() - ClauseStarts[C] + : ClauseStarts[C + 1] - ClauseStarts[C]; + } + + /// Returns the literals of clause `C`. + /// If `knownContradictory()` is false, each clause has at least one literal. + llvm::ArrayRef<Literal> clauseLiterals(ClauseID C) const { + size_t S = clauseSize(C); + if (S == 0) + return llvm::ArrayRef<Literal>(); + return llvm::ArrayRef<Literal>(&Clauses[ClauseStarts[C]], S); + } + + /// An iterator over all literals of all clauses in the formula. + /// The iterator allows mutation of the literal through the `*` operator. + /// This is to support solvers that mutate the formula during solving. + class Iterator { + friend class CNFFormula; + CNFFormula *CNF; + size_t Idx; + Iterator(CNFFormula *CNF, size_t Idx) : CNF(CNF), Idx(Idx) {} + + public: + Iterator(const Iterator &) = default; + Iterator &operator=(const Iterator &) = default; + + Iterator &operator++() { + ++Idx; + assert(Idx < CNF->Clauses.size() && "Iterator out of bounds"); + return *this; + } + + Iterator next() const { + Iterator I = *this; + ++I; + return I; + } + + Literal &operator*() const { return CNF->Clauses[Idx]; } + }; + friend class Iterator; + + /// Returns an iterator to the first literal of clause `C`. + Iterator startOfClause(ClauseID C) { return Iterator(this, ClauseStarts[C]); } +}; + +/// Converts the conjunction of `Vals` into a formula in conjunctive normal +/// form where each clause has at least one and at most three literals. +/// `Atomics` is populated with a mapping from `Variables` to the corresponding +/// `Atom`s for atomic booleans in the input formulas. +CNFFormula +buildCNF(const llvm::ArrayRef<const Formula *> &Formulas, + llvm::DenseMap<Variable, Atom> &Atomics); + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CNFFORMULA_H diff --git a/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h b/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h index b5cd7aa10fd7d..c948aa21cec3b 100644 --- a/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h +++ b/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h @@ -17,7 +17,6 @@ #include "clang/Analysis/FlowSensitive/Formula.h" #include "clang/Analysis/FlowSensitive/Solver.h" #include "llvm/ADT/ArrayRef.h" -#include <limits> namespace clang { namespace dataflow { diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt index 6631fe27f3d90..f89d4e57e5819 100644 --- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -2,6 +2,7 @@ add_clang_library(clangAnalysisFlowSensitive AdornedCFG.cpp Arena.cpp ASTOps.cpp + CNFFormula.cpp DataflowAnalysisContext.cpp DataflowEnvironment.cpp Formula.cpp diff --git a/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp b/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp new file mode 100644 index 0000000000000..521b81fe78aa0 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp @@ -0,0 +1,305 @@ +//===- CNFFormula.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A representation of a boolean formula in 3-CNF. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/CNFFormula.h" +#include "llvm/ADT/DenseSet.h" + +#include <queue> + +namespace clang { +namespace dataflow { + +namespace { + +/// Applies simplifications while building up a BooleanFormula. +/// We keep track of unit clauses, which tell us variables that must be +/// true/false in any model that satisfies the overall formula. +/// Such variables can be dropped from subsequently-added clauses, which +/// may in turn yield more unit clauses or even a contradiction. +/// The total added complexity of this preprocessing is O(N) where we +/// for every clause, we do a lookup for each unit clauses. +/// The lookup is O(1) on average. This method won't catch all +/// contradictory formulas, more passes can in principle catch +/// more cases but we leave all these and the general case to the +/// proper SAT solver. +struct CNFFormulaBuilder { + // Formula should outlive CNFFormulaBuilder. + explicit CNFFormulaBuilder(CNFFormula &CNF) + : Formula(CNF) {} + + /// Adds the `L1 v ... v Ln` clause to the formula. Applies + /// simplifications, based on single-literal clauses. + /// + /// Requirements: + /// + /// `Li` must not be `NullLit`. + /// + /// All literals must be distinct. + void addClause(ArrayRef<Literal> Literals) { + // We generate clauses with up to 3 literals in this file. + assert(!Literals.empty() && Literals.size() <= 3); + // Contains literals of the simplified clause. + llvm::SmallVector<Literal> Simplified; + for (auto L : Literals) { + assert(L != NullLit && + llvm::all_of(Simplified, + [L](Literal S) { return S != L; })); + auto X = var(L); + if (trueVars.contains(X)) { // X must be true + if (isPosLit(L)) + return; // Omit clause `(... v X v ...)`, it is `true`. + else + continue; // Omit `!X` from `(... v !X v ...)`. + } + if (falseVars.contains(X)) { // X must be false + if (isNegLit(L)) + return; // Omit clause `(... v !X v ...)`, it is `true`. + else + continue; // Omit `X` from `(... v X v ...)`. + } + Simplified.push_back(L); + } + if (Simplified.empty()) { + // Simplification made the clause empty, which is equivalent to `false`. + // We already know that this formula is unsatisfiable. + Formula.addClause(Simplified); + return; + } + if (Simplified.size() == 1) { + // We have new unit clause. + const Literal lit = Simplified.front(); + const Variable v = var(lit); + if (isPosLit(lit)) + trueVars.insert(v); + else + falseVars.insert(v); + } + Formula.addClause(Simplified); + } + + /// Returns true if we observed a contradiction while adding clauses. + /// In this case then the formula is already known to be unsatisfiable. + bool isKnownContradictory() { return Formula.knownContradictory(); } + +private: + CNFFormula &Formula; + llvm::DenseSet<Variable> trueVars; + llvm::DenseSet<Variable> falseVars; +}; + +} // namespace + +CNFFormula::CNFFormula(Variable LargestVar) + : LargestVar(LargestVar), KnownContradictory(false) { + Clauses.push_back(0); + ClauseStarts.push_back(0); +} + +void CNFFormula::addClause(ArrayRef<Literal> lits) { + assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; })); + + if (lits.empty()) + KnownContradictory = true; + + const size_t S = Clauses.size(); + ClauseStarts.push_back(S); + Clauses.insert(Clauses.end(), lits.begin(), lits.end()); +} + +CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas, + llvm::DenseMap<Variable, Atom> &Atomics) { + // The general strategy of the algorithm implemented below is to map each + // of the sub-values in `Vals` to a unique variable and use these variables in + // the resulting CNF expression to avoid exponential blow up. The number of + // literals in the resulting formula is guaranteed to be linear in the number + // of sub-formulas in `Vals`. + + // Map each sub-formula in `Vals` to a unique variable. + llvm::DenseMap<const Formula *, Variable> FormulaToVar; + // Store variable identifiers and Atom of atomic booleans. + Variable NextVar = 1; + { + std::queue<const Formula *> UnprocessedFormulas; + for (const Formula *F : Formulas) + UnprocessedFormulas.push(F); + while (!UnprocessedFormulas.empty()) { + Variable Var = NextVar; + const Formula *F = UnprocessedFormulas.front(); + UnprocessedFormulas.pop(); + + if (!FormulaToVar.try_emplace(F, Var).second) + continue; + ++NextVar; + + for (const Formula *Op : F->operands()) + UnprocessedFormulas.push(Op); + if (F->kind() == Formula::AtomRef) + Atomics[Var] = F->getAtom(); + } + } + + auto GetVar = [&FormulaToVar](const Formula *F) { + auto ValIt = FormulaToVar.find(F); + assert(ValIt != FormulaToVar.end()); + return ValIt->second; + }; + + CNFFormula CNF(NextVar - 1); + std::vector<bool> ProcessedSubVals(NextVar, false); + CNFFormulaBuilder builder(CNF); + + // Add a conjunct for each variable that represents a top-level conjunction + // value in `Vals`. + for (const Formula *F : Formulas) + builder.addClause(posLit(GetVar(F))); + + // Add conjuncts that represent the mapping between newly-created variables + // and their corresponding sub-formulas. + std::queue<const Formula *> UnprocessedFormulas; + for (const Formula *F : Formulas) + UnprocessedFormulas.push(F); + while (!UnprocessedFormulas.empty()) { + const Formula *F = UnprocessedFormulas.front(); + UnprocessedFormulas.pop(); + const Variable Var = GetVar(F); + + if (ProcessedSubVals[Var]) + continue; + ProcessedSubVals[Var] = true; + + switch (F->kind()) { + case Formula::AtomRef: + break; + case Formula::Literal: + CNF.addClause(F->literal() ? posLit(Var) : negLit(Var)); + break; + case Formula::And: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + if (LHS == RHS) { + // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS)}); + builder.addClause({posLit(Var), negLit(LHS)}); + } else { + // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v + // !B)` which is already in conjunctive normal form. Below we add each + // of the conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS)}); + builder.addClause({negLit(Var), posLit(RHS)}); + builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)}); + } + break; + } + case Formula::Or: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + if (LHS == RHS) { + // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS)}); + builder.addClause({posLit(Var), negLit(LHS)}); + } else { + // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v + // !B)` which is already in conjunctive normal form. Below we add each + // of the conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)}); + builder.addClause({posLit(Var), negLit(LHS)}); + builder.addClause({posLit(Var), negLit(RHS)}); + } + break; + } + case Formula::Not: { + const Variable Operand = GetVar(F->operands()[0]); + + // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), negLit(Operand)}); + builder.addClause({posLit(Var), posLit(Operand)}); + break; + } + case Formula::Implies: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + // `X <=> (A => B)` is equivalent to + // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in + // conjunctive normal form. Below we add each of the conjuncts of + // the latter expression to the result. + builder.addClause({posLit(Var), posLit(LHS)}); + builder.addClause({posLit(Var), negLit(RHS)}); + builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)}); + break; + } + case Formula::Equal: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + if (LHS == RHS) { + // `X <=> (A <=> A)` is equivalent to `X` which is already in + // conjunctive normal form. Below we add each of the conjuncts of the + // latter expression to the result. + builder.addClause(posLit(Var)); + + // No need to visit the sub-values of `Val`. + continue; + } + // `X <=> (A <=> B)` is equivalent to + // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which + // is already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)}); + builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)}); + builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)}); + builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)}); + break; + } + } + if (builder.isKnownContradictory()) { + return CNF; + } + for (const Formula *Child : F->operands()) + Unprocessed... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/92401 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits