shaie commented on code in PR #841: URL: https://github.com/apache/lucene/pull/841#discussion_r875458505
########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/DoubleHyperRectangle.java: ########## @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.util.Arrays; +import org.apache.lucene.util.NumericUtils; + +/** Stores a hyper rectangle as an array of DoubleRangePairs */ +public class DoubleHyperRectangle extends HyperRectangle { + + /** Creates DoubleHyperRectangle */ + public DoubleHyperRectangle(String label, DoubleRangePair... pairs) { + super(label, convertToLongRangePairArray(pairs)); + } + + private static LongRangePair[] convertToLongRangePairArray(DoubleRangePair... pairs) { Review Comment: nit: I find `Array` redundant, maybe `convertToLongRangePairs`? Or `toLongRangePairs`? ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); Review Comment: nit: Now that it's part of `HyperRectangle` you can reference `LongRangePair` directly, but if you prefer to leave it for clarity I'm ok with that. ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) Review Comment: nit: `is` or `are`? I think we're referring to the plural rectangles and dims? So `areHyperRectangleDimsConsistent`? ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in by subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) Review Comment: Yes I think a single constructor with `HyperRectangle...` is clear enough. Not sure we need to protect a user from passing a mix of Long/Double rectangles, and even if they do, we convert them to Long anyway ... I vote for keeping just this variant. ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); + if (!range.accept(point[dim])) { + validPoint = false; + break; + } + } + if (validPoint) { + counts[j]++; + docIsValid = true; + } + } + if (docIsValid) { + totCount++; + } + } + doc = it.nextDoc(); + } + } + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + validateTopN(topN); + if (field.equals(dim) == false) { + throw new IllegalArgumentException( + "invalid dim \"" + dim + "\"; should be \"" + field + "\""); + } + if (path.length != 0) { Review Comment: nit: add `null` check? Do we do this elsewhere? ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangle.java: ########## @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +/** Holds the name and the number of dims for a HyperRectangle */ +public abstract class HyperRectangle { + /** Label that identifies this range. */ + public final String label; + + /** How many dimensions this hyper rectangle has (IE: a regular rectangle would have dims=2) */ + public final int dims; + + /** All subclasses should store pairs as comparable longs */ + protected final LongRangePair[] pairs; + + /** Sole constructor. */ + protected HyperRectangle(String label, LongRangePair... pairs) { + if (label == null) { + throw new IllegalArgumentException("label must not be null"); + } + if (pairs == null || pairs.length == 0) { + throw new IllegalArgumentException("Pairs cannot be null or empty"); + } + this.label = label; + this.dims = pairs.length; + this.pairs = pairs; + } + + /** + * Returns comparable long range for a provided dim + * + * @param dim dimension of the request range + * @return The comparable long version of the requested range + */ + public LongRangePair getComparableDimRange(int dim) { + return pairs[dim]; + } + + /** Defines a single range in a HyperRectangle */ + public static class LongRangePair { + /** Inclusive min */ + public final long min; + + /** Inclusive max */ + public final long max; + + /** + * Creates a LongRangePair, very similar to the constructor of {@link + * org.apache.lucene.facet.range.LongRange} + * + * @param minIn Min value of pair + * @param minInclusive If minIn is inclusive + * @param maxIn Max value of pair + * @param maxInclusive If maxIn is inclusive + */ + public LongRangePair(long minIn, boolean minInclusive, long maxIn, boolean maxInclusive) { + if (!minInclusive) { + if (minIn != Long.MAX_VALUE) { + minIn++; + } else { + throw new IllegalArgumentException("Invalid min input, min=" + minIn); + } + } + + if (!maxInclusive) { + if (maxIn != Long.MIN_VALUE) { + maxIn--; + } else { + throw new IllegalArgumentException("Invalid max input, max=" + maxIn); + } + } + + if (minIn > maxIn) { + throw new IllegalArgumentException("Minimum cannot be greater than maximum"); Review Comment: I think here including the value of min/max in the message will be useful to debug. ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); + if (!range.accept(point[dim])) { + validPoint = false; + break; + } + } + if (validPoint) { + counts[j]++; + docIsValid = true; + } + } + if (docIsValid) { + totCount++; + } + } + doc = it.nextDoc(); + } + } + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + validateTopN(topN); + if (field.equals(dim) == false) { + throw new IllegalArgumentException( + "invalid dim \"" + dim + "\"; should be \"" + field + "\""); + } + if (path.length != 0) { + throw new IllegalArgumentException("path.length should be 0"); Review Comment: Typo: `path.length should not be 0`? Or `Must specify at least one path component`? ########## lucene/core/src/java/org/apache/lucene/document/LongPoint.java: ########## @@ -117,6 +117,25 @@ public static BytesRef pack(long... point) { return new BytesRef(packed); } + /** + * Unpack a BytesRef into a long point + * + * @param bytesRef BytesRef Value + * @throws IllegalArgumentException the value is null + */ + public static long[] unpack(BytesRef bytesRef) { Review Comment: Yeah OK I understand the reasoning. Technically we could get around that by counting the number of valid points per rectangle and only increase the count of a rectangle if all points were accepted, but that would introduce a temporary count array and I'm not sure it's better than unpacking to a `long[]`, so let's leave the code as-is. ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); + if (!range.accept(point[dim])) { + validPoint = false; + break; + } + } + if (validPoint) { + counts[j]++; + docIsValid = true; + } + } + if (docIsValid) { + totCount++; + } + } + doc = it.nextDoc(); + } + } + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + validateTopN(topN); + if (field.equals(dim) == false) { + throw new IllegalArgumentException( + "invalid dim \"" + dim + "\"; should be \"" + field + "\""); + } + if (path.length != 0) { + throw new IllegalArgumentException("path.length should be 0"); + } + LabelAndValue[] labelValues = new LabelAndValue[counts.length]; + for (int i = 0; i < counts.length; i++) { + labelValues[i] = new LabelAndValue(hyperRectangles[i].label, counts[i]); + } + return new FacetResult(dim, path, totCount, labelValues, labelValues.length); + } + + @Override + public Number getSpecificValue(String dim, String... path) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public List<FacetResult> getAllDims(int topN) throws IOException { + validateTopN(topN); + return Collections.singletonList(getTopChildren(topN, field)); Review Comment: Here for example you just pass `field` as the path. Is it useful, or you did that because of the assertion that path length is not 0? ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); + if (!range.accept(point[dim])) { + validPoint = false; + break; + } + } + if (validPoint) { + counts[j]++; + docIsValid = true; + } + } + if (docIsValid) { + totCount++; + } + } + doc = it.nextDoc(); Review Comment: Q: can this be moved to the `for()` line itself? I think so? ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/DoubleHyperRectangle.java: ########## @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.util.Arrays; +import org.apache.lucene.util.NumericUtils; + +/** Stores a hyper rectangle as an array of DoubleRangePairs */ +public class DoubleHyperRectangle extends HyperRectangle { + + /** Creates DoubleHyperRectangle */ + public DoubleHyperRectangle(String label, DoubleRangePair... pairs) { + super(label, convertToLongRangePairArray(pairs)); + } + + private static LongRangePair[] convertToLongRangePairArray(DoubleRangePair... pairs) { + if (pairs == null || pairs.length == 0) { + throw new IllegalArgumentException("Pairs cannot be null or empty"); + } + return Arrays.stream(pairs).map(DoubleRangePair::toLongRangePair).toArray(LongRangePair[]::new); + } + + /** Defines a single range in a DoubleHyperRectangle */ + public static class DoubleRangePair { + /** Inclusive min */ + public final double min; + + /** Inclusive max */ + public final double max; + + /** + * Creates a DoubleRangePair, very similar to the constructor of {@link + * org.apache.lucene.facet.range.DoubleRange} + * + * @param minIn Min value of pair + * @param minInclusive If minIn is inclusive + * @param maxIn Max value of pair + * @param maxInclusive If maxIn is inclusive + */ + public DoubleRangePair(double minIn, boolean minInclusive, double maxIn, boolean maxInclusive) { + if (Double.isNaN(minIn) || Double.isNaN(maxIn)) { + throw new IllegalArgumentException( + "min and max cannot be NaN: min=" + minIn + ", max=" + maxIn); + } + + if (!minInclusive) { + minIn = Math.nextUp(minIn); + } + + if (!maxInclusive) { + // Why no Math.nextDown? Review Comment: Do we want to try answer this question? :) According to https://appdividend.com/2022/01/05/java-math-nextdown-function-example/: ``` The nextDown() method is equivalent to nextAfter(d, Double.NEGATIVE_INFINITY) method. ``` So I think you can just call `nextDown()`? ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); + if (!range.accept(point[dim])) { + validPoint = false; + break; + } + } + if (validPoint) { + counts[j]++; + docIsValid = true; + } + } + if (docIsValid) { + totCount++; + } + } + doc = it.nextDoc(); + } + } + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + validateTopN(topN); + if (field.equals(dim) == false) { + throw new IllegalArgumentException( + "invalid dim \"" + dim + "\"; should be \"" + field + "\""); + } + if (path.length != 0) { + throw new IllegalArgumentException("path.length should be 0"); Review Comment: Now that I think of it, this method doesn't really care about the `path` right? It just passes it to `FacetResult`. Is there a reason to require a path at all? I understand it's part of the method contract, but since this method doesn't use it, I'm not sure we need to enforce at least one path component? WDYT? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org