dweiss commented on code in PR #844: URL: https://github.com/apache/lucene/pull/844#discussion_r860523184
########## lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java: ########## @@ -184,110 +195,174 @@ public List<Completion> lookup(CharSequence key, int num) { return EMPTY_RESULT; } - try { - BytesRef keyUtf8 = new BytesRef(key); - if (!higherWeightsFirst && rootArcs.length > 1) { - // We could emit a warning here (?). An optimal strategy for - // alphabetically sorted - // suggestions would be to add them with a constant weight -- this saves - // unnecessary - // traversals and sorting. - return lookupSortedAlphabetically(keyUtf8, num); - } else { - return lookupSortedByWeight(keyUtf8, num, false); - } - } catch (IOException e) { - // Should never happen, but anyway. - throw new RuntimeException(e); + if (!higherWeightsFirst && rootArcs.length > 1) { + // We could emit a warning here (?). An optimal strategy for + // alphabetically sorted + // suggestions would be to add them with a constant weight -- this saves + // unnecessary + // traversals and sorting. + return lookup(key).sorted().limit(num).collect(Collectors.toList()); + } else { + return lookup(key).limit(num).collect(Collectors.toList()); } } /** - * Lookup suggestions sorted alphabetically <b>if weights are not constant</b>. This is a - * workaround: in general, use constant weights for alphabetically sorted result. + * Lookup suggestions to <code>key</code> and return a stream of matching completions. The stream + * fetches completions dynamically - it can be filtered and limited to acquire the desired number + * of completions without collecting all of them. + * + * @param key The prefix to which suggestions should be sought. + * @return Returns the suggestions */ - private List<Completion> lookupSortedAlphabetically(BytesRef key, int num) throws IOException { - // Greedily get num results from each weight branch. - List<Completion> res = lookupSortedByWeight(key, num, true); - - // Sort and trim. - Collections.sort(res); - if (res.size() > num) { - res = res.subList(0, num); + public Stream<Completion> lookup(CharSequence key) { + if (key.length() == 0 || automaton == null) { + return Stream.empty(); + } + + try { + return lookupSortedByWeight(new BytesRef(key)); + } catch (IOException e) { + throw new RuntimeException(e); } - return res; } - /** - * Lookup suggestions sorted by weight (descending order). - * - * @param collectAll If <code>true</code>, the routine terminates immediately when <code>num - * </code> suggestions have been collected. If <code>false</code>, it will collect suggestions - * from all weight arcs (needed for {@link #lookupSortedAlphabetically}. - */ - private ArrayList<Completion> lookupSortedByWeight(BytesRef key, int num, boolean collectAll) - throws IOException { - // Don't overallocate the results buffers. This also serves the purpose of - // allowing the user of this class to request all matches using Integer.MAX_VALUE as - // the number of results. - final ArrayList<Completion> res = new ArrayList<>(Math.min(10, num)); - - final BytesRef output = BytesRef.deepCopyOf(key); - for (int i = 0; i < rootArcs.length; i++) { - final FST.Arc<Object> rootArc = rootArcs[i]; - final FST.Arc<Object> arc = new FST.Arc<>().copyFrom(rootArc); - - // Descend into the automaton using the key as prefix. - if (descendWithPrefix(arc, key)) { - // A subgraph starting from the current node has the completions - // of the key prefix. The arc we're at is the last key's byte, - // so we will collect it too. - output.length = key.length - 1; - if (collect(res, num, rootArc.label(), output, arc) && !collectAll) { - // We have enough suggestions to return immediately. Keep on looking - // for an - // exact match, if requested. - if (exactFirst) { - if (!checkExistingAndReorder(res, key)) { - int exactMatchBucket = getExactMatchStartingFromRootArc(i, key); - if (exactMatchBucket != -1) { - // Insert as the first result and truncate at num. - while (res.size() >= num) { - res.remove(res.size() - 1); - } - res.add(0, new Completion(key, exactMatchBucket)); - } - } - } + /** Lookup suggestions sorted by weight (descending order). */ + private Stream<Completion> lookupSortedByWeight(BytesRef key) throws IOException { + + // Look for an exact match first. + Completion exactCompletion; + if (exactFirst) { + Completion c = null; + for (int i = 0; i < rootArcs.length; i++) { + int exactMatchBucket = getExactMatchStartingFromRootArc(i, key); + if (exactMatchBucket != -1) { + // root arcs are sorted by decreasing weight so any first exact match will always win. + c = new Completion(key, exactMatchBucket); break; } } + exactCompletion = c; + } else { + exactCompletion = null; + } + + Stream<Completion> stream = + IntStream.range(0, rootArcs.length) + .boxed() Review Comment: IntStream.flatMap returns an IntStream - there is no method that converts an IntStream to a flattened stream of Object (T). These are small integers, they're all cached instances - shouldn't play a big role. I don't know what the deep impact of replacing a method with a stream here will be but I think it'll be small compared to the overall cost of fst traversal and other unrelated things happening outside of the lookup code. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org