gsmiller commented on code in PR #779: URL: https://github.com/apache/lucene/pull/779#discussion_r871508282
########## lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java: ########## @@ -222,41 +262,162 @@ public FacetResult getTopChildren(int topN, String dim, String... path) throws I } } } - ord = siblings[ord]; } } - if (aggregatedValue == 0) { - return null; - } - if (dimConfig.multiValued) { if (dimConfig.requireDimCount) { aggregatedValue = getValue(dimOrd); } else { // Our sum'd value is not correct, in general: aggregatedValue = -1; } - } else { - // Our sum'd dim value is accurate, so we keep it } - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - int[] ordinals = new int[labelValues.length]; - int[] values = new int[labelValues.length]; + return new ChildOrdsResult(aggregatedValue, childCount, q); + } - for (int i = labelValues.length - 1; i >= 0; i--) { - TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); - ordinals[i] = ordAndValue.ord; - values[i] = ordAndValue.value; + /** Returns value/count of a dimension. */ + private int getDimValue( + FacetsConfig.DimConfig dimConfig, + String dim, + int dimOrd, + int topN, + HashMap<String, ChildOrdsResult> dimToChildOrdsResult) + throws IOException { + + // if dimConfig.hierarchical == true || dim is multiValued and dim count has been aggregated at + // indexing time, return dimCount directly + if (dimConfig.hierarchical == true || (dimConfig.multiValued && dimConfig.requireDimCount)) { + return getValue(dimOrd); } - FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals); - for (int i = 0; i < labelValues.length; i++) { - labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], values[i]); + // if dimCount was not aggregated at indexing time, iterate over childOrds to get dimCount + ChildOrdsResult childOrdsResult = getChildOrdsResult(dimConfig, dimOrd, topN); + + // if no early termination, store dim and childOrdsResult into a hashmap to avoid calling + // getChildOrdsResult again in getTopDims + dimToChildOrdsResult.put(dim, childOrdsResult); + return childOrdsResult.aggregatedValue; + } + + @Override + public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException { + if (topNDims <= 0 || topNChildren <= 0) { + throw new IllegalArgumentException("topN must be > 0"); } - return new FacetResult(dim, path, aggregatedValue, labelValues, childCount); + // get children and siblings ordinal array from TaxonomyFacets + int[] children = getChildren(); + int[] siblings = getSiblings(); + + // Create priority queue to store top dimensions and sort by their aggregated values/hits and + // string values. + PriorityQueue<DimValueResult> pq = + new PriorityQueue<>(topNDims) { + @Override + protected boolean lessThan(DimValueResult a, DimValueResult b) { + if (a.value > b.value) { + return false; + } else if (a.value < b.value) { + return true; + } else { + return a.dim.compareTo(b.dim) > 0; + } + } + }; + + // create hashMap to store the ChildOrdsResult to avoid calling getChildOrdsResult for all dims + HashMap<String, ChildOrdsResult> dimToChildOrdsResult = new HashMap<>(); + + // iterate over children and siblings ordinals for all dims + int ord = children[TaxonomyReader.ROOT_ORDINAL]; + while (ord != TaxonomyReader.INVALID_ORDINAL) { + String dim = taxoReader.getPath(ord).components[0]; + FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim); + if (dimConfig.indexFieldName.equals(indexFieldName)) { + FacetLabel cp = new FacetLabel(dim, emptyPath); + int dimOrd = taxoReader.getOrdinal(cp); + // if dimOrd = -1, we skip this dim, else call getDimValue + if (dimOrd != -1) { + int dimCount = getDimValue(dimConfig, dim, dimOrd, topNChildren, dimToChildOrdsResult); + if (dimCount != 0) { + // use priority queue to store DimValueResult for topNDims + if (pq.size() < topNDims) { + pq.add(new DimValueResult(dim, dimOrd, dimCount)); + } else { + if (dimCount > pq.top().value + || (dimCount == pq.top().value && dim.compareTo(pq.top().dim) < 0)) { + DimValueResult bottomDim = pq.top(); + bottomDim.dim = dim; + bottomDim.value = dimCount; + pq.updateTop(); + } + } + } + } + } + ord = siblings[ord]; + } + + // use fixed-size array to reduce space usage + FacetResult[] results = new FacetResult[pq.size()]; + + while (pq.size() > 0) { + DimValueResult dimValueResult = pq.pop(); + String dim = dimValueResult.dim; + ChildOrdsResult childOrdsResult; + // if the childOrdsResult was stored in the map, avoid calling getChildOrdsResult again + if (dimToChildOrdsResult.containsKey(dim)) { + childOrdsResult = dimToChildOrdsResult.get(dim); + } else { + FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim); + childOrdsResult = getChildOrdsResult(dimConfig, dimValueResult.dimOrd, topNChildren); + } + // FacetResult requires String[] path, and path is always empty for getTopDims. + // FacetLabelLength is always equal to 1 when FacetLabel is constructed with + // FacetLabel(dim, emptyPath), and therefore, 1 is passed in when calling getLabelValues + FacetResult facetResult = + new FacetResult( + dimValueResult.dim, + emptyPath, + dimValueResult.value, + getLabelValues(childOrdsResult.q, new FacetLabel(dim, emptyPath)), Review Comment: Hmm, OK I see that since you changed `getLabelValues` to take the full `FacetLabel` instead of the path length that you're now having to instantiate a `FacetLabel` just to call the method. That's no good. Let's avoid creating extra garbage here and go back to how you had `getLabelValues` but maybe try to come up with a good parameter name that's somewhat easy to understand. Sorry if I led you down the wrong path here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org