tteofili commented on code in PR #14094: URL: https://github.com/apache/lucene/pull/14094#discussion_r2007923461
########## lucene/core/src/java/org/apache/lucene/search/HnswQueueSaturationCollector.java: ########## @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +/** + * A {@link HnswKnnCollector} that early exits when nearest neighbor queue keeps saturating beyond a + * 'patience' parameter. This records the rate of collection of new nearest neighbors in the {@code + * delegate} KnnCollector queue, at each HNSW node candidate visit. Once it saturates for a number + * of consecutive node visits (e.g., the patience parameter), this early terminates. + * + * @lucene.experimental + */ +public class HnswQueueSaturationCollector extends HnswKnnCollector { + + private final KnnCollector delegate; + private final double saturationThreshold; + private final int patience; + private boolean patienceFinished; + private int countSaturated; + private int previousQueueSize; + private int currentQueueSize; + + HnswQueueSaturationCollector(KnnCollector delegate, double saturationThreshold, int patience) { + super(delegate); + this.delegate = delegate; + this.previousQueueSize = 0; + this.currentQueueSize = 0; + this.countSaturated = 0; + this.patienceFinished = false; + this.saturationThreshold = saturationThreshold; + this.patience = patience; + } + + @Override + public boolean earlyTerminated() { + return delegate.earlyTerminated() || patienceFinished; + } + + @Override + public boolean collect(int docId, float similarity) { + boolean collect = delegate.collect(docId, similarity); + if (collect) { + currentQueueSize++; + } + return collect; + } + + @Override + public float minCompetitiveSimilarity() { + return delegate.minCompetitiveSimilarity(); + } + + @Override + public TopDocs topDocs() { + TopDocs topDocs; + if (patienceFinished && delegate.earlyTerminated() == false) { + TopDocs delegateDocs = delegate.topDocs(); + TotalHits totalHits = + new TotalHits(delegateDocs.totalHits.value(), TotalHits.Relation.EQUAL_TO); + topDocs = new TopDocs(totalHits, delegateDocs.scoreDocs); + } else { + topDocs = delegate.topDocs(); + } + return topDocs; + } + + @Override + public void nextCandidate() { Review Comment: I really like this idea Ben, I'll see if I can make up something reasonable for that ;) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org