gsmiller commented on code in PR #779:
URL: https://github.com/apache/lucene/pull/779#discussion_r868497732
##########
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java:
##########
@@ -109,7 +109,23 @@ public boolean childrenLoaded() {
* @lucene.experimental
*/
public boolean siblingsLoaded() {
- return children != null;
+ return siblings != null;
+ }
+
+ /**
+ * Returns existing int[] mapping each ordinal to its next sibling to avoid
re-creating int[] for
+ * siblings in subclass
+ */
+ public int[] getExistingSiblings() throws IOException {
+ return childrenLoaded() ? this.siblings : getSiblings();
+ }
+
+ /**
+ * Returns existing int[] mapping each ordinal to its first child to avoid
re-creating int[] for
+ * children in subclass
+ */
+ public int[] getExistingChildren() throws IOException {
+ return childrenLoaded() ? this.children : getChildren();
Review Comment:
I'm not sure I understand the need for these new methods. Wouldn't calling
`getSiblings()` / `getChildren()` directly do the same thing here?
##########
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java:
##########
@@ -222,41 +265,163 @@ public FacetResult getTopChildren(int topN, String dim,
String... path) throws I
}
}
}
-
ord = siblings[ord];
}
}
- if (aggregatedValue == 0) {
- return null;
- }
-
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
aggregatedValue = getValue(dimOrd);
} else {
// Our sum'd value is not correct, in general:
aggregatedValue = -1;
}
- } else {
- // Our sum'd dim value is accurate, so we keep it
}
- LabelAndValue[] labelValues = new LabelAndValue[q.size()];
- int[] ordinals = new int[labelValues.length];
- int[] values = new int[labelValues.length];
+ return new ChildOrdsResult(aggregatedValue, childCount, q);
+ }
- for (int i = labelValues.length - 1; i >= 0; i--) {
- TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
- ordinals[i] = ordAndValue.ord;
- values[i] = ordAndValue.value;
+ /** Returns value/count of a dimension. */
+ private int getDimValue(
+ FacetsConfig.DimConfig dimConfig,
+ String dim,
+ int dimOrd,
+ int topN,
+ HashMap<String, ChildOrdsResult> dimToChildOrdsResult)
+ throws IOException {
+
+ // if dimConfig.hierarchical == true || dim is multiValued and dim count
has been aggregated at
+ // indexing time, return dimCount directly
+ if (dimConfig.hierarchical == true || (dimConfig.multiValued &&
dimConfig.requireDimCount)) {
+ return getValue(dimOrd);
}
- FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
- for (int i = 0; i < labelValues.length; i++) {
- labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length],
values[i]);
+ // if dimCount was not aggregated at indexing time, iterate over childOrds
to get dimCount
+ ChildOrdsResult childOrdsResult = getChildOrdsResult(dimConfig, dimOrd,
topN);
+
+ // if no early termination, store dim and childOrdsResult into a hashmap
to avoid calling
+ // getChildOrdsResult again in getTopDims
+ dimToChildOrdsResult.put(dim, childOrdsResult);
+ return childOrdsResult.aggregatedValue;
+ }
+
+ @Override
+ public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws
IOException {
+ if (topNDims <= 0 || topNChildren <= 0) {
+ throw new IllegalArgumentException("topN must be > 0");
+ }
+
+ // get existing children and siblings ordinal array from TaxonomyFacets
+ int[] children = getExistingChildren();
+ int[] siblings = getExistingSiblings();
+
+ // Create priority queue to store top dimensions and sort by their
aggregated values/hits and
+ // string values.
+ PriorityQueue<DimValueResult> pq =
+ new PriorityQueue<>(topNDims) {
+ @Override
+ protected boolean lessThan(DimValueResult a, DimValueResult b) {
+ if (a.value > b.value) {
+ return false;
+ } else if (a.value < b.value) {
+ return true;
+ } else {
+ return a.dim.compareTo(b.dim) > 0;
+ }
+ }
+ };
+
+ // create hashMap to store the ChildOrdsResult to avoid calling
getChildOrdsResult for all dims
+ HashMap<String, ChildOrdsResult> dimToChildOrdsResult = new HashMap<>();
+
+ // iterate over children and siblings ordinals for all dims
+ int ord = children[TaxonomyReader.ROOT_ORDINAL];
+ while (ord != TaxonomyReader.INVALID_ORDINAL) {
+ String dim = taxoReader.getPath(ord).components[0];
+ FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
+ if (dimConfig.indexFieldName.equals(indexFieldName)) {
+ FacetLabel cp = new FacetLabel(dim, emptyPath);
+ int dimOrd = taxoReader.getOrdinal(cp);
+ int dimCount = 0;
Review Comment:
minor: You don't really need to initialize `dimCount` to zero here. I would
suggest you remove this line and declare the variable on line 348 inside the
conditional for simplicity.
##########
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java:
##########
@@ -169,18 +176,54 @@ public FacetResult getTopChildren(int topN, String dim,
String... path) throws I
return null;
}
- TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(),
topN));
+ ChildOrdsResult childOrdsResult = getChildOrdsResult(dimConfig, dimOrd,
topN);
+
+ if (childOrdsResult.q == null || childOrdsResult.aggregatedValue == 0) {
+ return null;
+ }
+ LabelAndValue[] labelValues = getLabelValues(childOrdsResult.q, cp.length);
+ return new FacetResult(
+ dim, path, childOrdsResult.aggregatedValue, labelValues,
childOrdsResult.childCount);
+ }
+
+ /**
+ * Returns label values for dims This portion of code is moved from
getTopChildren because
+ * getTopDims needs to reuse it
+ */
+ private LabelAndValue[] getLabelValues(TopOrdAndIntQueue q, int
facetLabelLength)
Review Comment:
minor: It took me a minute to understand the purpose of `facetLabelLength`
here. I think the parameter name felt a bit confusing. I wonder if it would be
easier to read the code if you actually passed in the "parent" `FacetLabel`
itself and then pull the length from it? Or maybe just a different parameter
name or a comment might help?
##########
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java:
##########
@@ -169,18 +176,54 @@ public FacetResult getTopChildren(int topN, String dim,
String... path) throws I
return null;
}
- TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(),
topN));
+ ChildOrdsResult childOrdsResult = getChildOrdsResult(dimConfig, dimOrd,
topN);
+
+ if (childOrdsResult.q == null || childOrdsResult.aggregatedValue == 0) {
+ return null;
+ }
+ LabelAndValue[] labelValues = getLabelValues(childOrdsResult.q, cp.length);
+ return new FacetResult(
+ dim, path, childOrdsResult.aggregatedValue, labelValues,
childOrdsResult.childCount);
+ }
+
+ /**
+ * Returns label values for dims This portion of code is moved from
getTopChildren because
Review Comment:
minor: You have a similar comment in a couple places that functionality was
moved from getTopChildren. I'm not sure it's necessary to have that comment.
Could maybe create more confusion?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]