stefanvodita commented on code in PR #12966:
URL: https://github.com/apache/lucene/pull/12966#discussion_r1518965319
##########
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java:
##########
@@ -142,6 +249,301 @@ DimConfig verifyDim(String dim) {
return dimConfig;
}
+ /**
+ * Roll-up the aggregation values from {@code childOrdinal} to {@code
ordinal}. Overrides should
+ * probably call this to update the counts. Overriding allows us to work
with primitive types for
+ * the aggregation values, keeping aggregation efficient.
+ */
+ protected void updateValueFromRollup(int ordinal, int childOrdinal) throws
IOException {
+ setCount(ordinal, getCount(ordinal) + rollup(childOrdinal));
+ }
+
+ /**
+ * Return a {@link TopOrdAndNumberQueue} of the appropriate type, i.e. a
{@link TopOrdAndIntQueue}
+ * or a {@link org.apache.lucene.facet.TopOrdAndFloatQueue}.
+ */
+ protected TopOrdAndNumberQueue makeTopOrdAndNumberQueue(int topN) {
+ return new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
+ }
+
+ // TODO: We don't need this if we're okay with having an integer -1 in the
results even for float
+ // aggregations.
+ /** Return the value for a missing aggregation, i.e. {@code -1} or {@code
-1f}. */
+ protected Number missingAggregationValue() {
+ return -1;
+ }
+
+ /** Rolls up any single-valued hierarchical dimensions. */
+ void rollup() throws IOException {
+ if (initialized == false) {
+ return;
+ }
+
+ // Rollup any necessary dims:
+ int[] children = null;
+ for (Map.Entry<String, FacetsConfig.DimConfig> ent :
config.getDimConfigs().entrySet()) {
+ String dim = ent.getKey();
+ FacetsConfig.DimConfig ft = ent.getValue();
+ if (ft.hierarchical && ft.multiValued == false) {
+ int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
+ // It can be -1 if this field was declared in the
+ // config but never indexed:
+ if (dimRootOrd > 0) {
+ if (children == null) {
+ // lazy init
+ children = getChildren();
+ }
+ updateValueFromRollup(dimRootOrd, children[dimRootOrd]);
+ }
+ }
+ }
+ }
+
+ private int rollup(int ord) throws IOException {
+ int[] children = getChildren();
+ int[] siblings = getSiblings();
+ int aggregatedValue = 0;
+ while (ord != TaxonomyReader.INVALID_ORDINAL) {
+ int currentValue = getCount(ord);
+ int newValue = currentValue + rollup(children[ord]);
+ setCount(ord, newValue);
+ aggregatedValue += getCount(ord);
+ ord = siblings[ord];
+ }
+ return aggregatedValue;
+ }
+
+ /**
+ * Create a FacetResult for the provided dim + path and intermediate
results. Does the extra work
+ * of resolving ordinals -> labels, etc. Will return null if there are no
children.
+ */
+ private FacetResult createFacetResult(
+ TopChildrenForPath topChildrenForPath, String dim, String... path)
throws IOException {
+ // If the intermediate result is null or there are no children, we return
null:
+ if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
+ return null;
+ }
+
+ TopOrdAndNumberQueue q = topChildrenForPath.childQueue;
+ assert q != null;
+
+ LabelAndValue[] labelValues = new LabelAndValue[q.size()];
+ int[] ordinals = new int[labelValues.length];
+ Number[] values = new Number[labelValues.length];
+
+ for (int i = labelValues.length - 1; i >= 0; i--) {
+ TopOrdAndNumberQueue.OrdAndValue ordAndValue = q.pop();
+ assert ordAndValue != null;
+ ordinals[i] = ordAndValue.ord;
+ values[i] = ordAndValue.value;
+ }
+
+ FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
+ // The path component we're interested in is the one immediately after the
provided path. We
+ // add 1 here to also account for the dim:
+ int childComponentIdx = path.length + 1;
+ for (int i = 0; i < labelValues.length; i++) {
+ labelValues[i] = new
LabelAndValue(bulkPath[i].components[childComponentIdx], values[i]);
+ }
+
+ return new FacetResult(
+ dim, path, topChildrenForPath.pathValue, labelValues,
topChildrenForPath.childCount);
+ }
+
+ @Override
+ public FacetResult getAllChildren(String dim, String... path) throws
IOException {
+ DimConfig dimConfig = verifyDim(dim);
+ FacetLabel cp = new FacetLabel(dim, path);
+ int dimOrd = taxoReader.getOrdinal(cp);
+ if (dimOrd == -1) {
+ return null;
+ }
+
+ if (initialized == false) {
+ return null;
+ }
+
+ Number aggregatedValue = 0;
+ int aggregatedCount = 0;
+
+ IntArrayList ordinals = new IntArrayList();
+ List<Number> ordValues = new ArrayList<>();
+
+ if (sparseCounts != null) {
+ for (IntIntCursor ordAndCount : sparseCounts) {
+ int ord = ordAndCount.key;
+ int count = ordAndCount.value;
+ Number value = getAggregationValue(ord);
+ if (parents[ord] == dimOrd && count > 0) {
+ aggregatedCount += count;
+ aggregatedValue = aggregate(aggregatedValue, value);
+ ordinals.add(ord);
+ ordValues.add(value);
+ }
+ }
+ } else {
+ int[] children = getChildren();
+ int[] siblings = getSiblings();
+ int ord = children[dimOrd];
+ while (ord != TaxonomyReader.INVALID_ORDINAL) {
+ int count = counts[ord];
+ Number value = getAggregationValue(ord);
+ if (count > 0) {
+ aggregatedCount += count;
+ aggregatedValue = aggregate(aggregatedValue, value);
+ ordinals.add(ord);
+ ordValues.add(value);
+ }
+ ord = siblings[ord];
+ }
+ }
+
+ if (aggregatedCount == 0) {
+ return null;
+ }
+
+ if (dimConfig.multiValued) {
+ if (dimConfig.requireDimCount) {
+ aggregatedValue = getAggregationValue(dimOrd);
+ } else {
+ // Our aggregated value is not correct, in general:
+ aggregatedValue = missingAggregationValue();
+ }
+ } else {
+ // Our aggregateddim value is accurate, so we keep it
+ }
+
+ // TODO: It would be nice if TaxonomyReader let us pass in a buffer + size
so we didn't have to
+ // do an array copy here:
+ FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals.toArray());
+
+ LabelAndValue[] labelValues = new LabelAndValue[ordValues.size()];
+ for (int i = 0; i < ordValues.size(); i++) {
+ labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length],
ordValues.get(i));
+ }
+ return new FacetResult(dim, path, aggregatedValue, labelValues,
ordinals.size());
+ }
+
+ private TopOrdAndNumberQueue.OrdAndValue insertIntoQueue(
+ TopOrdAndNumberQueue q,
+ int topN,
+ TopOrdAndNumberQueue.OrdAndValue bottomOrdAndValue,
+ TopOrdAndNumberQueue.OrdAndValue incomingOrdAndValue,
+ int ord,
+ Number value) {
+ if (incomingOrdAndValue == null) {
+ incomingOrdAndValue = new TopOrdAndNumberQueue.OrdAndValue();
+ }
+ incomingOrdAndValue.ord = ord;
+ incomingOrdAndValue.value = value;
+
+ if (q.size() < topN || q.lessThan(bottomOrdAndValue, incomingOrdAndValue))
{
+ incomingOrdAndValue = q.insertWithOverflow(incomingOrdAndValue);
+ bottomOrdAndValue.ord = q.top().ord;
+ bottomOrdAndValue.value = q.top().value;
+ }
+ return incomingOrdAndValue;
+ }
+
+ /**
+ * Determine the top-n children for a specified dimension + path. Results
are in an intermediate
+ * form.
+ */
+ protected TopChildrenForPath getTopChildrenForPath(DimConfig dimConfig, int
pathOrd, int topN)
Review Comment:
I would like to avoid making API changes in this PR. It's an interesting
question whether all `Facets` should have this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]