[clang] [clang][dataflow] Manage stacks of formulas directly instead of recur… (PR #175980)

Yitzhak Mandelbaum via cfe-commits Wed, 14 Jan 2026 13:43:31 -0800

================
@@ -42,104 +43,164 @@ static char compactSigil(Formula::Kind K) {
   llvm_unreachable("unhandled formula kind");
 }
 
+// Avoids recursion to avoid stack overflows from very large formulas.
 void serializeFormula(const Formula &F, llvm::raw_ostream &OS) {
-  switch (Formula::numOperands(F.kind())) {
-  case 0:
-    switch (F.kind()) {
-    case Formula::AtomRef:
-      OS << F.getAtom();
+  std::stack<const Formula *> WorkList;
+  WorkList.push(&F);
+  while (!WorkList.empty()) {
+    const Formula *Current = WorkList.top();
+    WorkList.pop();
+    switch (Formula::numOperands(Current->kind())) {
+    case 0:
+      switch (Current->kind()) {
+      case Formula::AtomRef:
+        OS << Current->getAtom();
+        break;
+      case Formula::Literal:
+        OS << (Current->literal() ? 'T' : 'F');
+        break;
+      default:
+        llvm_unreachable("unhandled formula kind");
+      }
       break;
-    case Formula::Literal:
-      OS << (F.literal() ? 'T' : 'F');
+    case 1:
+      OS << compactSigil(Current->kind());
+      WorkList.push(Current->operands()[0]);
+      break;
+    case 2:
+      OS << compactSigil(Current->kind());
+      WorkList.push(Current->operands()[1]);
+      WorkList.push(Current->operands()[0]);
       break;
     default:
-      llvm_unreachable("unhandled formula kind");
+      llvm_unreachable("unhandled formula arity");
     }
-    break;
-  case 1:
-    OS << compactSigil(F.kind());
-    serializeFormula(*F.operands()[0], OS);
-    break;
-  case 2:
-    OS << compactSigil(F.kind());
-    serializeFormula(*F.operands()[0], OS);
-    serializeFormula(*F.operands()[1], OS);
-    break;
-  default:
-    llvm_unreachable("unhandled formula arity");
   }
 }
 
-static llvm::Expected<const Formula *>
-parsePrefix(llvm::StringRef &Str, Arena &A,
-            llvm::DenseMap<unsigned, Atom> &AtomMap) {
-  if (Str.empty())
-    return llvm::createStringError(llvm::inconvertibleErrorCode(),
-                                   "unexpected end of input");
-
-  char Prefix = Str[0];
-  Str = Str.drop_front();
-
-  switch (Prefix) {
-  case 'T':
-    return &A.makeLiteral(true);
-  case 'F':
-    return &A.makeLiteral(false);
-  case 'V': {
-    unsigned AtomID;
-    if (Str.consumeInteger(10, AtomID))
-      return llvm::createStringError(llvm::inconvertibleErrorCode(),
-                                     "expected atom id");
-    auto [It, Inserted] = AtomMap.try_emplace(AtomID, Atom());
-    if (Inserted)
-      It->second = A.makeAtom();
-    return &A.makeAtomRef(It->second);
-  }
-  case '!': {
-    auto OperandOrErr = parsePrefix(Str, A, AtomMap);
-    if (!OperandOrErr)
-      return OperandOrErr.takeError();
-    return &A.makeNot(**OperandOrErr);
-  }
-  case '&':
-  case '|':
-  case '>':
-  case '=': {
-    auto LeftOrErr = parsePrefix(Str, A, AtomMap);
-    if (!LeftOrErr)
-      return LeftOrErr.takeError();
+struct Operation {
+  Operation(Formula::Kind Kind) : Kind(Kind) {}
+  const Formula::Kind Kind;
+  const unsigned ExpectedNumOperands = Formula::numOperands(Kind);
+  std::vector<const Formula *> Operands;
+};
 
-    auto RightOrErr = parsePrefix(Str, A, AtomMap);
-    if (!RightOrErr)
-      return RightOrErr.takeError();
+// Avoids recursion to avoid stack overflows from very large formulas.
+static llvm::Expected<const Formula *>
+parseFormulaInternal(llvm::StringRef &Str, Arena &A,
+                     llvm::DenseMap<unsigned, Atom> &AtomMap) {
+  std::stack<Operation> ActiveOperations;
 
-    const Formula &LHS = **LeftOrErr;
-    const Formula &RHS = **RightOrErr;
+  while (!Str.empty()) {
+    char Prefix = Str[0];
+    Str = Str.drop_front();
 
     switch (Prefix) {
+    // Terminals
+    case 'T':
+    case 'F':
+    case 'V': {
+      const Formula *TerminalFormula;
+      switch (Prefix) {
+      case 'T':
+        TerminalFormula = &A.makeLiteral(true);
+        break;
+      case 'F':
+        TerminalFormula = &A.makeLiteral(false);
+        break;
+      case 'V': {
+        unsigned AtomID;
+        if (Str.consumeInteger(10, AtomID))
+          return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                         "expected atom id");
+        auto [It, Inserted] = AtomMap.try_emplace(AtomID, Atom());
+        if (Inserted)
+          It->second = A.makeAtom();
+        TerminalFormula = &A.makeAtomRef(It->second);
+        break;
+      }
+      default:
+        llvm_unreachable("unexpected terminal character");
+      }
+
+      if (ActiveOperations.empty()) {
+        return TerminalFormula;
+      }
+      Operation *Op = &ActiveOperations.top();
+      Op->Operands.push_back(TerminalFormula);
+      while (Op->Operands.size() == Op->ExpectedNumOperands) {
----------------
ymand wrote:


So, you were right about one stack but that got me thinking more about what was 
bothering me here. :) I realized that you've basically implemented a 
zero-lookahead shift-reduce parser. So, certain states shift and others reduce 
and you're currently splitting that out where the outer loop shifts and the 
inner loop reduces. But, you can handle this generically with a single loop. 
Moreover, the loop should be driven by the stack, rather than the input. This 
also helps guarantee well formedness because the top element on the stack 
always tells you what to do next (and therefore where the input violates that 
expectation).

The main difference is that start of the loop looks at the stack, and if the 
top element requires more operands, you shift (read and process one element of 
the input); otherwise, it's complete, and you reduce: line 130.

Otherwise, the code is basically exactly what you already have. I think this 
will be clearer and safer code -- hand written parsers are notoriously bug 
prone, so I think its worth keeping this as simple as possible.

https://github.com/llvm/llvm-project/pull/175980
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][dataflow] Manage stacks of formulas directly instead of recur… (PR #175980)

Reply via email to