================
@@ -42,104 +43,164 @@ static char compactSigil(Formula::Kind K) {
llvm_unreachable("unhandled formula kind");
}
+// Avoids recursion to avoid stack overflows from very large formulas.
void serializeFormula(const Formula &F, llvm::raw_ostream &OS) {
- switch (Formula::numOperands(F.kind())) {
- case 0:
- switch (F.kind()) {
- case Formula::AtomRef:
- OS << F.getAtom();
+ std::stack<const Formula *> WorkList;
+ WorkList.push(&F);
+ while (!WorkList.empty()) {
+ const Formula *Current = WorkList.top();
+ WorkList.pop();
+ switch (Formula::numOperands(Current->kind())) {
+ case 0:
+ switch (Current->kind()) {
+ case Formula::AtomRef:
+ OS << Current->getAtom();
+ break;
+ case Formula::Literal:
+ OS << (Current->literal() ? 'T' : 'F');
+ break;
+ default:
+ llvm_unreachable("unhandled formula kind");
+ }
break;
- case Formula::Literal:
- OS << (F.literal() ? 'T' : 'F');
+ case 1:
+ OS << compactSigil(Current->kind());
+ WorkList.push(Current->operands()[0]);
+ break;
+ case 2:
+ OS << compactSigil(Current->kind());
+ WorkList.push(Current->operands()[1]);
+ WorkList.push(Current->operands()[0]);
break;
default:
- llvm_unreachable("unhandled formula kind");
+ llvm_unreachable("unhandled formula arity");
}
- break;
- case 1:
- OS << compactSigil(F.kind());
- serializeFormula(*F.operands()[0], OS);
- break;
- case 2:
- OS << compactSigil(F.kind());
- serializeFormula(*F.operands()[0], OS);
- serializeFormula(*F.operands()[1], OS);
- break;
- default:
- llvm_unreachable("unhandled formula arity");
}
}
-static llvm::Expected<const Formula *>
-parsePrefix(llvm::StringRef &Str, Arena &A,
- llvm::DenseMap<unsigned, Atom> &AtomMap) {
- if (Str.empty())
- return llvm::createStringError(llvm::inconvertibleErrorCode(),
- "unexpected end of input");
-
- char Prefix = Str[0];
- Str = Str.drop_front();
-
- switch (Prefix) {
- case 'T':
- return &A.makeLiteral(true);
- case 'F':
- return &A.makeLiteral(false);
- case 'V': {
- unsigned AtomID;
- if (Str.consumeInteger(10, AtomID))
- return llvm::createStringError(llvm::inconvertibleErrorCode(),
- "expected atom id");
- auto [It, Inserted] = AtomMap.try_emplace(AtomID, Atom());
- if (Inserted)
- It->second = A.makeAtom();
- return &A.makeAtomRef(It->second);
- }
- case '!': {
- auto OperandOrErr = parsePrefix(Str, A, AtomMap);
- if (!OperandOrErr)
- return OperandOrErr.takeError();
- return &A.makeNot(**OperandOrErr);
- }
- case '&':
- case '|':
- case '>':
- case '=': {
- auto LeftOrErr = parsePrefix(Str, A, AtomMap);
- if (!LeftOrErr)
- return LeftOrErr.takeError();
+struct Operation {
+ Operation(Formula::Kind Kind) : Kind(Kind) {}
+ const Formula::Kind Kind;
+ const unsigned ExpectedNumOperands = Formula::numOperands(Kind);
+ std::vector<const Formula *> Operands;
+};
- auto RightOrErr = parsePrefix(Str, A, AtomMap);
- if (!RightOrErr)
- return RightOrErr.takeError();
+// Avoids recursion to avoid stack overflows from very large formulas.
+static llvm::Expected<const Formula *>
+parseFormulaInternal(llvm::StringRef &Str, Arena &A,
+ llvm::DenseMap<unsigned, Atom> &AtomMap) {
+ std::stack<Operation> ActiveOperations;
- const Formula &LHS = **LeftOrErr;
- const Formula &RHS = **RightOrErr;
+ while (!Str.empty()) {
+ char Prefix = Str[0];
+ Str = Str.drop_front();
switch (Prefix) {
+ // Terminals
+ case 'T':
+ case 'F':
+ case 'V': {
+ const Formula *TerminalFormula;
+ switch (Prefix) {
+ case 'T':
+ TerminalFormula = &A.makeLiteral(true);
+ break;
+ case 'F':
+ TerminalFormula = &A.makeLiteral(false);
+ break;
+ case 'V': {
+ unsigned AtomID;
+ if (Str.consumeInteger(10, AtomID))
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "expected atom id");
+ auto [It, Inserted] = AtomMap.try_emplace(AtomID, Atom());
+ if (Inserted)
+ It->second = A.makeAtom();
+ TerminalFormula = &A.makeAtomRef(It->second);
+ break;
+ }
+ default:
+ llvm_unreachable("unexpected terminal character");
+ }
+
+ if (ActiveOperations.empty()) {
+ return TerminalFormula;
+ }
+ Operation *Op = &ActiveOperations.top();
+ Op->Operands.push_back(TerminalFormula);
+ while (Op->Operands.size() == Op->ExpectedNumOperands) {
----------------
ymand wrote:
So, you were right about one stack but that got me thinking more about what was
bothering me here. :) I realized that you've basically implemented a
zero-lookahead shift-reduce parser. So, certain states shift and others reduce
and you're currently splitting that out where the outer loop shifts and the
inner loop reduces. But, you can handle this generically with a single loop.
Moreover, the loop should be driven by the stack, rather than the input. This
also helps guarantee well formedness because the top element on the stack
always tells you what to do next (and therefore where the input violates that
expectation).
The main difference is that start of the loop looks at the stack, and if the
top element requires more operands, you shift (read and process one element of
the input); otherwise, it's complete, and you reduce: line 130.
Otherwise, the code is basically exactly what you already have. I think this
will be clearer and safer code -- hand written parsers are notoriously bug
prone, so I think its worth keeping this as simple as possible.
https://github.com/llvm/llvm-project/pull/175980
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits