mayya-sharipova commented on code in PR #12794:
URL: https://github.com/apache/lucene/pull/12794#discussion_r1403584908


##########
lucene/core/src/java/org/apache/lucene/search/TopKnnCollector.java:
##########
@@ -26,26 +26,71 @@
  * @lucene.experimental
  */
 public final class TopKnnCollector extends AbstractKnnCollector {
+  private static final float DEFAULT_GREEDINESS = 0.9f;
 
   private final NeighborQueue queue;
+  private final float greediness;
+  private final NeighborQueue queueg;
+  private final MaxScoreAccumulator globalMinSimAcc;
+  private boolean kResultsCollected = false;
+  private float cachedGlobalMinSim = Float.NEGATIVE_INFINITY;
+
+  // greediness of globally non-competitive search: [0,1]
 
   /**
    * @param k the number of neighbors to collect
    * @param visitLimit how many vector nodes the results are allowed to visit
+   * @param globalMinSimAcc the global minimum competitive similarity tracked 
across all segments
    */
-  public TopKnnCollector(int k, int visitLimit) {
+  public TopKnnCollector(int k, int visitLimit, MaxScoreAccumulator 
globalMinSimAcc) {
+    super(k, visitLimit);
+    this.greediness = DEFAULT_GREEDINESS;
+    this.queue = new NeighborQueue(k, false);
+    int queuegSize = Math.max(1, Math.round((1 - greediness) * k));
+    this.queueg = new NeighborQueue(queuegSize, false);
+    this.globalMinSimAcc = globalMinSimAcc;
+  }
+
+  public TopKnnCollector(
+      int k, int visitLimit, MaxScoreAccumulator globalMinSimAcc, float 
greediness) {
     super(k, visitLimit);
+    this.greediness = greediness;
     this.queue = new NeighborQueue(k, false);
+    this.queueg = new NeighborQueue(Math.round((1 - greediness) * k), false);
+    this.globalMinSimAcc = globalMinSimAcc;
   }
 
   @Override
   public boolean collect(int docId, float similarity) {
-    return queue.insertWithOverflow(docId, similarity);
+    boolean result = queue.insertWithOverflow(docId, similarity);
+    queueg.insertWithOverflow(docId, similarity);
+
+    boolean reachedKResults = (kResultsCollected == false && queue.size() == 
k());
+    if (reachedKResults) {
+      kResultsCollected = true;
+    }
+    if (globalMinSimAcc != null && kResultsCollected) {
+      // as we've collected k results, we can start exchanging globally
+      globalMinSimAcc.accumulate(queue.topNode(), queue.topScore());
+
+      // periodically update the local copy of global similarity
+      if (reachedKResults || (visitedCount & globalMinSimAcc.modInterval) == 
0) {
+        MaxScoreAccumulator.DocAndScore docAndScore = globalMinSimAcc.get();
+        cachedGlobalMinSim = docAndScore.score;

Review Comment:
   Thanks for your idea,  it looks like it should provide better speedups than 
the current approach.
   We can check globalMinSim before starting search, and see if it makes 
difference



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to