gsmiller commented on code in PR #779:
URL: https://github.com/apache/lucene/pull/779#discussion_r871508282


##########
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java:
##########
@@ -222,41 +262,162 @@ public FacetResult getTopChildren(int topN, String dim, 
String... path) throws I
             }
           }
         }
-
         ord = siblings[ord];
       }
     }
 
-    if (aggregatedValue == 0) {
-      return null;
-    }
-
     if (dimConfig.multiValued) {
       if (dimConfig.requireDimCount) {
         aggregatedValue = getValue(dimOrd);
       } else {
         // Our sum'd value is not correct, in general:
         aggregatedValue = -1;
       }
-    } else {
-      // Our sum'd dim value is accurate, so we keep it
     }
 
-    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
-    int[] ordinals = new int[labelValues.length];
-    int[] values = new int[labelValues.length];
+    return new ChildOrdsResult(aggregatedValue, childCount, q);
+  }
 
-    for (int i = labelValues.length - 1; i >= 0; i--) {
-      TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
-      ordinals[i] = ordAndValue.ord;
-      values[i] = ordAndValue.value;
+  /** Returns value/count of a dimension. */
+  private int getDimValue(
+      FacetsConfig.DimConfig dimConfig,
+      String dim,
+      int dimOrd,
+      int topN,
+      HashMap<String, ChildOrdsResult> dimToChildOrdsResult)
+      throws IOException {
+
+    // if dimConfig.hierarchical == true || dim is multiValued and dim count 
has been aggregated at
+    // indexing time, return dimCount directly
+    if (dimConfig.hierarchical == true || (dimConfig.multiValued && 
dimConfig.requireDimCount)) {
+      return getValue(dimOrd);
     }
 
-    FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
-    for (int i = 0; i < labelValues.length; i++) {
-      labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], 
values[i]);
+    // if dimCount was not aggregated at indexing time, iterate over childOrds 
to get dimCount
+    ChildOrdsResult childOrdsResult = getChildOrdsResult(dimConfig, dimOrd, 
topN);
+
+    // if no early termination, store dim and childOrdsResult into a hashmap 
to avoid calling
+    // getChildOrdsResult again in getTopDims
+    dimToChildOrdsResult.put(dim, childOrdsResult);
+    return childOrdsResult.aggregatedValue;
+  }
+
+  @Override
+  public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws 
IOException {
+    if (topNDims <= 0 || topNChildren <= 0) {
+      throw new IllegalArgumentException("topN must be > 0");
     }
 
-    return new FacetResult(dim, path, aggregatedValue, labelValues, 
childCount);
+    // get children and siblings ordinal array from TaxonomyFacets
+    int[] children = getChildren();
+    int[] siblings = getSiblings();
+
+    // Create priority queue to store top dimensions and sort by their 
aggregated values/hits and
+    // string values.
+    PriorityQueue<DimValueResult> pq =
+        new PriorityQueue<>(topNDims) {
+          @Override
+          protected boolean lessThan(DimValueResult a, DimValueResult b) {
+            if (a.value > b.value) {
+              return false;
+            } else if (a.value < b.value) {
+              return true;
+            } else {
+              return a.dim.compareTo(b.dim) > 0;
+            }
+          }
+        };
+
+    // create hashMap to store the ChildOrdsResult to avoid calling 
getChildOrdsResult for all dims
+    HashMap<String, ChildOrdsResult> dimToChildOrdsResult = new HashMap<>();
+
+    // iterate over children and siblings ordinals for all dims
+    int ord = children[TaxonomyReader.ROOT_ORDINAL];
+    while (ord != TaxonomyReader.INVALID_ORDINAL) {
+      String dim = taxoReader.getPath(ord).components[0];
+      FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
+      if (dimConfig.indexFieldName.equals(indexFieldName)) {
+        FacetLabel cp = new FacetLabel(dim, emptyPath);
+        int dimOrd = taxoReader.getOrdinal(cp);
+        // if dimOrd = -1, we skip this dim, else call getDimValue
+        if (dimOrd != -1) {
+          int dimCount = getDimValue(dimConfig, dim, dimOrd, topNChildren, 
dimToChildOrdsResult);
+          if (dimCount != 0) {
+            // use priority queue to store DimValueResult for topNDims
+            if (pq.size() < topNDims) {
+              pq.add(new DimValueResult(dim, dimOrd, dimCount));
+            } else {
+              if (dimCount > pq.top().value
+                  || (dimCount == pq.top().value && 
dim.compareTo(pq.top().dim) < 0)) {
+                DimValueResult bottomDim = pq.top();
+                bottomDim.dim = dim;
+                bottomDim.value = dimCount;
+                pq.updateTop();
+              }
+            }
+          }
+        }
+      }
+      ord = siblings[ord];
+    }
+
+    // use fixed-size array to reduce space usage
+    FacetResult[] results = new FacetResult[pq.size()];
+
+    while (pq.size() > 0) {
+      DimValueResult dimValueResult = pq.pop();
+      String dim = dimValueResult.dim;
+      ChildOrdsResult childOrdsResult;
+      // if the childOrdsResult was stored in the map, avoid calling 
getChildOrdsResult again
+      if (dimToChildOrdsResult.containsKey(dim)) {
+        childOrdsResult = dimToChildOrdsResult.get(dim);
+      } else {
+        FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
+        childOrdsResult = getChildOrdsResult(dimConfig, dimValueResult.dimOrd, 
topNChildren);
+      }
+      // FacetResult requires String[] path, and path is always empty for 
getTopDims.
+      // FacetLabelLength is always equal to 1 when FacetLabel is constructed 
with
+      // FacetLabel(dim, emptyPath), and therefore, 1 is passed in when 
calling getLabelValues
+      FacetResult facetResult =
+          new FacetResult(
+              dimValueResult.dim,
+              emptyPath,
+              dimValueResult.value,
+              getLabelValues(childOrdsResult.q, new FacetLabel(dim, 
emptyPath)),

Review Comment:
   Hmm, OK I see that since you changed `getLabelValues` to take the full 
`FacetLabel` instead of the path length that you're now having to instantiate a 
`FacetLabel` just to call the method. That's no good. Let's avoid creating 
extra garbage here and go back to how you had `getLabelValues` but maybe try to 
come up with a good parameter name that's somewhat easy to understand. Sorry if 
I led you down the wrong path here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to