mayya-sharipova commented on code in PR #12962:
URL: https://github.com/apache/lucene/pull/12962#discussion_r1452703248
##########
lucene/core/src/java/org/apache/lucene/search/TopKnnCollector.java:
##########
@@ -27,25 +29,72 @@
*/
public final class TopKnnCollector extends AbstractKnnCollector {
+ // greediness of globally non-competitive search: [0,1]
+ private static final float DEFAULT_GREEDINESS = 0.9f;
private final NeighborQueue queue;
+ private final float greediness;
+ private final FloatHeap nonCompetitiveQueue;
+ private final FloatHeap updatesQueue;
+ private final int interval = 0x3ff; // 1023
+ private final BlockingFloatHeap globalSimilarityQueue;
+ private boolean kResultsCollected = false;
+ private float cachedGlobalMinSim = Float.NEGATIVE_INFINITY;
/**
* @param k the number of neighbors to collect
* @param visitLimit how many vector nodes the results are allowed to visit
*/
- public TopKnnCollector(int k, int visitLimit) {
+ public TopKnnCollector(int k, int visitLimit, BlockingFloatHeap
globalSimilarityQueue) {
super(k, visitLimit);
+ this.greediness = DEFAULT_GREEDINESS;
this.queue = new NeighborQueue(k, false);
+ this.globalSimilarityQueue = globalSimilarityQueue;
+
+ if (globalSimilarityQueue == null) {
+ this.nonCompetitiveQueue = null;
+ this.updatesQueue = null;
+ } else {
+ this.nonCompetitiveQueue = new FloatHeap(Math.max(1, Math.round((1 -
greediness) * k)));
+ this.updatesQueue = new FloatHeap(k);
+ }
}
@Override
public boolean collect(int docId, float similarity) {
- return queue.insertWithOverflow(docId, similarity);
+ boolean localSimUpdated = queue.insertWithOverflow(docId, similarity);
+ boolean firstKResultsCollected = (kResultsCollected == false &&
queue.size() == k());
+ if (firstKResultsCollected) {
+ kResultsCollected = true;
+ }
+
+ boolean globalSimUpdated = false;
+ if (globalSimilarityQueue != null) {
+ updatesQueue.offer(similarity);
+ globalSimUpdated = nonCompetitiveQueue.offer(similarity);
+
+ if (kResultsCollected) {
+ // as we've collected k results, we can start do periodic updates with
the global queue
+ if (firstKResultsCollected || (visitedCount & interval) == 0) {
+ cachedGlobalMinSim =
globalSimilarityQueue.offer(updatesQueue.getHeap());
Review Comment:
@jimczi Sorry I did not understand this comment, can you please clarify it.
What does it mean `k` is close to the size of queue? The
`globalSimilarityQueue` is of size `k`.
Also I am not clear how you derive the value of `24`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]