bruno-roustant commented on a change in pull request #1160: LUCENE-9125: Improve Automaton.step() with binary search URL: https://github.com/apache/lucene-solr/pull/1160#discussion_r365452385
########## File path: lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java ########## @@ -658,22 +658,84 @@ public String toDot() { public int step(int state, int label) { assert state >= 0; assert label >= 0; - int trans = states[2*state]; - int limit = trans + 3*states[2*state+1]; - // TODO: we could do bin search; transitions are sorted - while (trans < limit) { - int dest = transitions[trans]; - int min = transitions[trans+1]; - int max = transitions[trans+2]; - if (min <= label && label <= max) { - return dest; + int stateIndex = 2 * state; + int firstTransitionIndex = states[stateIndex]; + int numTransitions = states[stateIndex + 1]; + + // Since transitions are sorted, + // binary search the transition for which label is within [minLabel, maxLabel]. + int low = 0; + int high = numTransitions - 1; + while (low <= high) { + int mid = (low + high) >>> 1; + int transitionIndex = firstTransitionIndex + 3 * mid; + int minLabel = transitions[transitionIndex + 1]; + if (minLabel > label) { + high = mid - 1; + } else { + int maxLabel = transitions[transitionIndex + 2]; + if (maxLabel < label){ + low = mid + 1; + } else { + return transitions[transitionIndex]; + } } - trans += 3; } - return -1; } + /** + * Looks for the next transition that matches the provided label, assuming determinism. + * <p> + * This method is similar to {@link #step(int, int)} but is used more efficiently + * when iterating over multiple transitions from the same source state. It keeps + * the latest reached transition index in {@code transition.transitionUpto} so + * the next call to this method can continue from there instead of restarting + * from the first transition. + * + * @param transition The transition to start the lookup from (inclusive, using its + * {@link Transition#source} and {@link Transition#transitionUpto}). + * It is updated with the matched transition; + * or with {@link Transition#dest} = -1 if no match. + * @param label The codepoint to look up. + * @return The destination state; or -1 if no matching outgoing transition. + */ + public int next(Transition transition, int label) { + // Copy of step() method with + // - binary search 'low' bound initialized to transition.transitionUpto. + // - param transition .dest/.min/.max/.transitionUpto set to the matching transition. + assert transition.source >= 0; + assert label >= 0; + int stateIndex = 2 * transition.source; + int firstTransitionIndex = states[stateIndex]; + int numTransitions = states[stateIndex + 1]; + + // Since transitions are sorted, Review comment: I'll try. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org