stefanvodita commented on code in PR #12966: URL: https://github.com/apache/lucene/pull/12966#discussion_r1518798524
########## lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java: ########## @@ -76,6 +111,78 @@ public int compare(FacetResult a, FacetResult b) { this.config = config; this.fc = fc; parents = taxoReader.getParallelTaxonomyArrays().parents(); + valueComparator = Comparator.comparingInt((x) -> (int) x); + } + + /** Return true if a sparse hash table should be used for counting, instead of a dense int[]. */ + private boolean useHashTable(FacetsCollector fc, TaxonomyReader taxoReader) { + if (taxoReader.getSize() < 1024) { + // small number of unique values: use an array + return false; + } + + if (fc == null) { + // counting all docs: use an array + return false; + } + + int maxDoc = 0; + int sumTotalHits = 0; + for (FacetsCollector.MatchingDocs docs : fc.getMatchingDocs()) { + sumTotalHits += docs.totalHits; + maxDoc += docs.context.reader().maxDoc(); + } + + // if our result set is < 10% of the index, we collect sparsely (use hash map): + return sumTotalHits < maxDoc / 10; + } + + protected void initializeValueCounters() { + if (initialized) { + return; + } + initialized = true; + assert sparseCounts == null && counts == null; + if (useHashTable(fc, taxoReader)) { + sparseCounts = new IntIntHashMap(); + } else { + counts = new int[taxoReader.getSize()]; + } + } + + /** Set the count for this ordinal to {@code newValue}. */ + protected void setCount(int ordinal, int newValue) { + if (sparseCounts != null) { + sparseCounts.put(ordinal, newValue); + } else { + counts[ordinal] = newValue; + } + } + + /** Get the count for this ordinal. */ + protected int getCount(int ordinal) { + if (sparseCounts != null) { + return sparseCounts.get(ordinal); + } else { + return counts[ordinal]; + } + } + + /** Get the aggregation value for this ordinal. */ + protected Number getAggregationValue(int ordinal) { + // By default, this is just the count. + return getCount(ordinal); + } + + /** Apply an aggregation to the two values and return the result. */ + protected Number aggregate(Number existingVal, Number newVal) { + // By default, we are computing counts, so the values are interpreted as integers and summed. + return (int) existingVal + (int) newVal; Review Comment: This is a tricky bit. You'll see that when we override, we do use an aggregation function, but the default implementation is to count. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org