stefanvodita commented on code in PR #12966:
URL: https://github.com/apache/lucene/pull/12966#discussion_r1518798524


##########
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java:
##########
@@ -76,6 +111,78 @@ public int compare(FacetResult a, FacetResult b) {
     this.config = config;
     this.fc = fc;
     parents = taxoReader.getParallelTaxonomyArrays().parents();
+    valueComparator = Comparator.comparingInt((x) -> (int) x);
+  }
+
+  /** Return true if a sparse hash table should be used for counting, instead 
of a dense int[]. */
+  private boolean useHashTable(FacetsCollector fc, TaxonomyReader taxoReader) {
+    if (taxoReader.getSize() < 1024) {
+      // small number of unique values: use an array
+      return false;
+    }
+
+    if (fc == null) {
+      // counting all docs: use an array
+      return false;
+    }
+
+    int maxDoc = 0;
+    int sumTotalHits = 0;
+    for (FacetsCollector.MatchingDocs docs : fc.getMatchingDocs()) {
+      sumTotalHits += docs.totalHits;
+      maxDoc += docs.context.reader().maxDoc();
+    }
+
+    // if our result set is < 10% of the index, we collect sparsely (use hash 
map):
+    return sumTotalHits < maxDoc / 10;
+  }
+
+  protected void initializeValueCounters() {
+    if (initialized) {
+      return;
+    }
+    initialized = true;
+    assert sparseCounts == null && counts == null;
+    if (useHashTable(fc, taxoReader)) {
+      sparseCounts = new IntIntHashMap();
+    } else {
+      counts = new int[taxoReader.getSize()];
+    }
+  }
+
+  /** Set the count for this ordinal to {@code newValue}. */
+  protected void setCount(int ordinal, int newValue) {
+    if (sparseCounts != null) {
+      sparseCounts.put(ordinal, newValue);
+    } else {
+      counts[ordinal] = newValue;
+    }
+  }
+
+  /** Get the count for this ordinal. */
+  protected int getCount(int ordinal) {
+    if (sparseCounts != null) {
+      return sparseCounts.get(ordinal);
+    } else {
+      return counts[ordinal];
+    }
+  }
+
+  /** Get the aggregation value for this ordinal. */
+  protected Number getAggregationValue(int ordinal) {
+    // By default, this is just the count.
+    return getCount(ordinal);
+  }
+
+  /** Apply an aggregation to the two values and return the result. */
+  protected Number aggregate(Number existingVal, Number newVal) {
+    // By default, we are computing counts, so the values are interpreted as 
integers and summed.
+    return (int) existingVal + (int) newVal;

Review Comment:
   This is a tricky bit. You'll see that when we override, we do use an 
aggregation function, but the default implementation is to count.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to