mdmarshmallow commented on code in PR #841: URL: https://github.com/apache/lucene/pull/841#discussion_r876142339
########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in by subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) Review Comment: Ok sounds good to me, I'll just use the single constructor then. ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); Review Comment: Hmm I don't think I can actually since `HyperRectangleFacetCoutns` doesn't inherit from `HyperRectangle` in any way (or at least Intellij is showing a compilation error when I try this). ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/DoubleHyperRectangle.java: ########## @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.util.Arrays; +import org.apache.lucene.util.NumericUtils; + +/** Stores a hyper rectangle as an array of DoubleRangePairs */ +public class DoubleHyperRectangle extends HyperRectangle { + + /** Creates DoubleHyperRectangle */ + public DoubleHyperRectangle(String label, DoubleRangePair... pairs) { + super(label, convertToLongRangePairArray(pairs)); + } + + private static LongRangePair[] convertToLongRangePairArray(DoubleRangePair... pairs) { + if (pairs == null || pairs.length == 0) { + throw new IllegalArgumentException("Pairs cannot be null or empty"); + } + return Arrays.stream(pairs).map(DoubleRangePair::toLongRangePair).toArray(LongRangePair[]::new); + } + + /** Defines a single range in a DoubleHyperRectangle */ + public static class DoubleRangePair { + /** Inclusive min */ + public final double min; + + /** Inclusive max */ + public final double max; + + /** + * Creates a DoubleRangePair, very similar to the constructor of {@link + * org.apache.lucene.facet.range.DoubleRange} + * + * @param minIn Min value of pair + * @param minInclusive If minIn is inclusive + * @param maxIn Max value of pair + * @param maxInclusive If maxIn is inclusive + */ + public DoubleRangePair(double minIn, boolean minInclusive, double maxIn, boolean maxInclusive) { + if (Double.isNaN(minIn) || Double.isNaN(maxIn)) { + throw new IllegalArgumentException( + "min and max cannot be NaN: min=" + minIn + ", max=" + maxIn); + } + + if (!minInclusive) { + minIn = Math.nextUp(minIn); + } + + if (!maxInclusive) { + // Why no Math.nextDown? Review Comment: Ah thanks for checking that out :). Changed to `Math.nextDown()` ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/DoubleHyperRectangle.java: ########## @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.util.Arrays; +import org.apache.lucene.util.NumericUtils; + +/** Stores a hyper rectangle as an array of DoubleRangePairs */ +public class DoubleHyperRectangle extends HyperRectangle { + + /** Creates DoubleHyperRectangle */ + public DoubleHyperRectangle(String label, DoubleRangePair... pairs) { + super(label, convertToLongRangePairArray(pairs)); + } + + private static LongRangePair[] convertToLongRangePairArray(DoubleRangePair... pairs) { Review Comment: Changed to `convertToLongRangePairs` ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); + if (!range.accept(point[dim])) { + validPoint = false; + break; + } + } + if (validPoint) { + counts[j]++; + docIsValid = true; + } + } + if (docIsValid) { + totCount++; + } + } + doc = it.nextDoc(); Review Comment: I think you are correct here. Changed it. ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangle.java: ########## @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +/** Holds the name and the number of dims for a HyperRectangle */ +public abstract class HyperRectangle { + /** Label that identifies this range. */ + public final String label; + + /** How many dimensions this hyper rectangle has (IE: a regular rectangle would have dims=2) */ + public final int dims; + + /** All subclasses should store pairs as comparable longs */ + protected final LongRangePair[] pairs; + + /** Sole constructor. */ + protected HyperRectangle(String label, LongRangePair... pairs) { + if (label == null) { + throw new IllegalArgumentException("label must not be null"); + } + if (pairs == null || pairs.length == 0) { + throw new IllegalArgumentException("Pairs cannot be null or empty"); + } + this.label = label; + this.dims = pairs.length; + this.pairs = pairs; + } + + /** + * Returns comparable long range for a provided dim + * + * @param dim dimension of the request range + * @return The comparable long version of the requested range + */ + public LongRangePair getComparableDimRange(int dim) { + return pairs[dim]; + } + + /** Defines a single range in a HyperRectangle */ + public static class LongRangePair { + /** Inclusive min */ + public final long min; + + /** Inclusive max */ + public final long max; + + /** + * Creates a LongRangePair, very similar to the constructor of {@link + * org.apache.lucene.facet.range.LongRange} + * + * @param minIn Min value of pair + * @param minInclusive If minIn is inclusive + * @param maxIn Max value of pair + * @param maxInclusive If maxIn is inclusive + */ + public LongRangePair(long minIn, boolean minInclusive, long maxIn, boolean maxInclusive) { + if (!minInclusive) { + if (minIn != Long.MAX_VALUE) { + minIn++; + } else { + throw new IllegalArgumentException("Invalid min input, min=" + minIn); + } + } + + if (!maxInclusive) { + if (maxIn != Long.MIN_VALUE) { + maxIn--; + } else { + throw new IllegalArgumentException("Invalid max input, max=" + maxIn); + } + } + + if (minIn > maxIn) { + throw new IllegalArgumentException("Minimum cannot be greater than maximum"); Review Comment: Added max and min values, also did the same thing in `DoubleHyperRectangle` ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); + if (!range.accept(point[dim])) { + validPoint = false; + break; + } + } + if (validPoint) { + counts[j]++; + docIsValid = true; + } + } + if (docIsValid) { + totCount++; + } + } + doc = it.nextDoc(); + } + } + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + validateTopN(topN); + if (field.equals(dim) == false) { + throw new IllegalArgumentException( + "invalid dim \"" + dim + "\"; should be \"" + field + "\""); + } + if (path.length != 0) { Review Comment: Changed to `if (path != null && path.length != 0)`. Not sure what exactly you mean by doing this elsewhere, but this function is a copy of [`RangeFacetCounts#getTopChildren`](https://github.com/apache/lucene/blob/a071180a806d1bb7ae11ae30a07e43e452bea810/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java#L220). ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); + if (!range.accept(point[dim])) { + validPoint = false; + break; + } + } + if (validPoint) { + counts[j]++; + docIsValid = true; + } + } + if (docIsValid) { + totCount++; + } + } + doc = it.nextDoc(); + } + } + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + validateTopN(topN); + if (field.equals(dim) == false) { + throw new IllegalArgumentException( + "invalid dim \"" + dim + "\"; should be \"" + field + "\""); + } + if (path.length != 0) { + throw new IllegalArgumentException("path.length should be 0"); + } + LabelAndValue[] labelValues = new LabelAndValue[counts.length]; + for (int i = 0; i < counts.length; i++) { + labelValues[i] = new LabelAndValue(hyperRectangles[i].label, counts[i]); + } + return new FacetResult(dim, path, totCount, labelValues, labelValues.length); + } + + @Override + public Number getSpecificValue(String dim, String... path) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public List<FacetResult> getAllDims(int topN) throws IOException { + validateTopN(topN); + return Collections.singletonList(getTopChildren(topN, field)); Review Comment: Field would be the `dim` here, not that `path`. ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) Review Comment: Changed to `are`, thanks for pointing that out it did sound a bit awkward :) ########## lucene/facet/src/java/org/apache/lucene/facet/hyperrectangle/HyperRectangleFacetCounts.java: ########## @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.facet.hyperrectangle; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.DocIdSetIterator; + +/** Get counts given a list of HyperRectangles (which must be of the same type) */ +public class HyperRectangleFacetCounts extends Facets { + /** Hypper rectangles passed to constructor. */ + protected final HyperRectangle[] hyperRectangles; + + /** Counts, initialized in subclass. */ + protected final int[] counts; + + /** Our field name. */ + protected final String field; + + /** Number of dimensions for field */ + protected final int dims; + + /** Total number of hits. */ + protected int totCount; + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of long hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, LongHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + /** + * Create HyperRectangleFacetCounts using + * + * @param field Field name + * @param hits Hits to facet on + * @param hyperRectangles List of double hyper rectangle facets + * @throws IOException If there is a problem reading the field + */ + public HyperRectangleFacetCounts( + String field, FacetsCollector hits, DoubleHyperRectangle... hyperRectangles) + throws IOException { + this(true, field, hits, hyperRectangles); + } + + private HyperRectangleFacetCounts( + boolean discarded, String field, FacetsCollector hits, HyperRectangle... hyperRectangles) + throws IOException { + assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty"; + assert isHyperRectangleDimsConsistent(hyperRectangles) + : "All hyper rectangles must be the same dimensionality"; + this.field = field; + this.hyperRectangles = hyperRectangles; + this.dims = hyperRectangles[0].dims; + this.counts = new int[hyperRectangles.length]; + count(field, hits.getMatchingDocs()); + } + + private boolean isHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles) { + int dims = hyperRectangles[0].dims; + return Arrays.stream(hyperRectangles).allMatch(hyperRectangle -> hyperRectangle.dims == dims); + } + + /** Counts from the provided field. */ + private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) + throws IOException { + + for (int i = 0; i < matchingDocs.size(); i++) { + + FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + + BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field); + + final DocIdSetIterator it = hits.bits.iterator(); + if (it == null) { + continue; + } + + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { + if (binaryDocValues.advanceExact(doc)) { + long[] point = LongPoint.unpack(binaryDocValues.binaryValue()); + assert point.length == dims + : "Point dimension (dim=" + + point.length + + ") is incompatible with hyper rectangle dimension (dim=" + + dims + + ")"; + // linear scan, change this to use R trees + boolean docIsValid = false; + for (int j = 0; j < hyperRectangles.length; j++) { + boolean validPoint = true; + for (int dim = 0; dim < dims; dim++) { + HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim); + if (!range.accept(point[dim])) { + validPoint = false; + break; + } + } + if (validPoint) { + counts[j]++; + docIsValid = true; + } + } + if (docIsValid) { + totCount++; + } + } + doc = it.nextDoc(); + } + } + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + validateTopN(topN); + if (field.equals(dim) == false) { + throw new IllegalArgumentException( + "invalid dim \"" + dim + "\"; should be \"" + field + "\""); + } + if (path.length != 0) { + throw new IllegalArgumentException("path.length should be 0"); Review Comment: Ah I think you're getting this part backwards, this enforces 0 path components. You're right in the having `path` here does not make sense. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org