zhaih commented on a change in pull request #513: URL: https://github.com/apache/lucene/pull/513#discussion_r762599350
########## File path: lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java ########## @@ -556,165 +538,84 @@ static RegExp newLeafNode( * toAutomaton(null)</code> (empty automaton map). */ public Automaton toAutomaton() { - return toAutomaton(null, null, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); - } - - /** - * Constructs new <code>Automaton</code> from this <code>RegExp</code>. The constructed automaton - * is minimal and deterministic and has no transitions to dead states. - * - * @param determinizeWorkLimit maximum effort to spend while determinizing the automata. If - * determinizing the automata would require more than this effort, - * TooComplexToDeterminizeException is thrown. Higher numbers require more space but can - * process more complex regexes. Use {@link Operations#DEFAULT_DETERMINIZE_WORK_LIMIT} as a - * decent default if you don't otherwise know what to specify. - * @exception IllegalArgumentException if this regular expression uses a named identifier that is - * not available from the automaton provider - * @exception TooComplexToDeterminizeException if determinizing this regexp requires more effort - * than determinizeWorkLimit states - */ - public Automaton toAutomaton(int determinizeWorkLimit) - throws IllegalArgumentException, TooComplexToDeterminizeException { - return toAutomaton(null, null, determinizeWorkLimit); + return toAutomaton(null, null); } /** - * Constructs new <code>Automaton</code> from this <code>RegExp</code>. The constructed automaton - * is minimal and deterministic and has no transitions to dead states. + * Constructs new <code>Automaton</code> from this <code>RegExp</code>. * * @param automaton_provider provider of automata for named identifiers - * @param determinizeWorkLimit maximum effort to spend while determinizing the automata. If - * determinizing the automata would require more than this effort, - * TooComplexToDeterminizeException is thrown. Higher numbers require more space but can - * process more complex regexes. Use {@link Operations#DEFAULT_DETERMINIZE_WORK_LIMIT} as a - * decent default if you don't otherwise know what to specify. * @exception IllegalArgumentException if this regular expression uses a named identifier that is * not available from the automaton provider - * @exception TooComplexToDeterminizeException if determinizing this regexp requires more effort - * than determinizeWorkLimit states */ - public Automaton toAutomaton(AutomatonProvider automaton_provider, int determinizeWorkLimit) + public Automaton toAutomaton(AutomatonProvider automaton_provider) throws IllegalArgumentException, TooComplexToDeterminizeException { - return toAutomaton(null, automaton_provider, determinizeWorkLimit); + return toAutomaton(null, automaton_provider); } /** - * Constructs new <code>Automaton</code> from this <code>RegExp</code>. The constructed automaton - * is minimal and deterministic and has no transitions to dead states. + * Constructs new <code>Automaton</code> from this <code>RegExp</code>. * * @param automata a map from automaton identifiers to automata (of type <code>Automaton</code>). - * @param determinizeWorkLimit maximum effort to spend while determinizing the automata. If - * determinizing the automata would require more than this effort, - * TooComplexToDeterminizeException is thrown. Higher numbers require more space but can - * process more complex regexes. * @exception IllegalArgumentException if this regular expression uses a named identifier that * does not occur in the automaton map - * @exception TooComplexToDeterminizeException if determinizing this regexp requires more effort - * than determinizeWorkLimit states */ - public Automaton toAutomaton(Map<String, Automaton> automata, int determinizeWorkLimit) + public Automaton toAutomaton(Map<String, Automaton> automata) throws IllegalArgumentException, TooComplexToDeterminizeException { - return toAutomaton(automata, null, determinizeWorkLimit); + return toAutomaton(automata, null); } private Automaton toAutomaton( - Map<String, Automaton> automata, - AutomatonProvider automaton_provider, - int determinizeWorkLimit) - throws IllegalArgumentException, TooComplexToDeterminizeException { - try { - return toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit); - } catch (TooComplexToDeterminizeException e) { - throw new TooComplexToDeterminizeException(this, e); - } - } - - private Automaton toAutomatonInternal( - Map<String, Automaton> automata, - AutomatonProvider automaton_provider, - int determinizeWorkLimit) + Map<String, Automaton> automata, AutomatonProvider automaton_provider) throws IllegalArgumentException { List<Automaton> list; Automaton a = null; switch (kind) { case REGEXP_PRE_CLASS: RegExp expanded = expandPredefined(); - a = expanded.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit); + a = expanded.toAutomaton(automata, automaton_provider); break; case REGEXP_UNION: list = new ArrayList<>(); - findLeaves( - exp1, Kind.REGEXP_UNION, list, automata, automaton_provider, determinizeWorkLimit); - findLeaves( - exp2, Kind.REGEXP_UNION, list, automata, automaton_provider, determinizeWorkLimit); + findLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider); + findLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider); a = Operations.union(list); - a = MinimizationOperations.minimize(a, determinizeWorkLimit); break; case REGEXP_CONCATENATION: list = new ArrayList<>(); - findLeaves( - exp1, - Kind.REGEXP_CONCATENATION, - list, - automata, - automaton_provider, - determinizeWorkLimit); - findLeaves( - exp2, - Kind.REGEXP_CONCATENATION, - list, - automata, - automaton_provider, - determinizeWorkLimit); + findLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); + findLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); a = Operations.concatenate(list); - a = MinimizationOperations.minimize(a, determinizeWorkLimit); break; case REGEXP_INTERSECTION: a = Operations.intersection( - exp1.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit), - exp2.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit)); - a = MinimizationOperations.minimize(a, determinizeWorkLimit); + exp1.toAutomaton(automata, automaton_provider), + exp2.toAutomaton(automata, automaton_provider)); break; case REGEXP_OPTIONAL: - a = - Operations.optional( - exp1.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit)); - a = MinimizationOperations.minimize(a, determinizeWorkLimit); + a = Operations.optional(exp1.toAutomaton(automata, automaton_provider)); break; case REGEXP_REPEAT: - a = - Operations.repeat( - exp1.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit)); - a = MinimizationOperations.minimize(a, determinizeWorkLimit); Review comment: I wonder whether there're some boundary cases where people can construct a near 10000 states minimized DFA before but not now? Like if a minimized component have 200 states and is repeated 50 times, but without minimization it would have >200 states and thus the minimization operation on the whole automaton will throw a `TooComplexToDeterminize` Exception? But I guess this is not that important, since the user can control the `determinizeWorkLimit` anyway, and our default 10000 is a relatively high number. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org