epotyom commented on code in PR #13568: URL: https://github.com/apache/lucene/pull/13568#discussion_r1709853131
########## lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java: ########## @@ -0,0 +1,272 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.sandbox.facet.cutters.ranges; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.facet.MultiLongValues; +import org.apache.lucene.facet.MultiLongValuesSource; +import org.apache.lucene.facet.range.LongRange; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter; +import org.apache.lucene.search.LongValues; +import org.apache.lucene.search.LongValuesSource; + +/** + * {@link RangeFacetCutter} for ranges of long value that overlap. Uses segment tree optimisation to + * find all matching ranges for a given value <a + * href="https://blog.mikemccandless.com/2013/12/fast-range-faceting-using-segment-trees.html">fast-range-faceting- + * using-segment-trees.html</a> + */ +class OverlappingLongRangeFacetCutter extends LongRangeFacetCutter { + private final LongRangeNode root; + + OverlappingLongRangeFacetCutter( + String field, + MultiLongValuesSource longValuesSource, + LongValuesSource singleLongValuesSource, + LongRange[] longRanges) { + super(field, longValuesSource, singleLongValuesSource, longRanges); + + // Build binary tree on top of intervals: + root = split(0, elementaryIntervals.size(), elementaryIntervals); + + // Set outputs, so we know which range to output for each node in the tree: + for (LongRangeAndPos range : sortedRanges) { + root.addOutputs(range); + } + } + + @Override + List<InclusiveRange> buildElementaryIntervals() { + // Maps all range inclusive endpoints to int flags; 1 + // = start of interval, 2 = end of interval. We need to + // track the start vs end case separately because if a + // given point is both, then it must be its own + // elementary interval: + Map<Long, Integer> endsMap = new HashMap<>(); + + endsMap.put(Long.MIN_VALUE, 1); + endsMap.put(Long.MAX_VALUE, 2); + + for (LongRangeAndPos rangeAndPos : sortedRanges) { + Integer cur = endsMap.get(rangeAndPos.range().min); + if (cur == null) { + endsMap.put(rangeAndPos.range().min, 1); + } else { + endsMap.put(rangeAndPos.range().min, cur | 1); + } + cur = endsMap.get(rangeAndPos.range().max); + if (cur == null) { + endsMap.put(rangeAndPos.range().max, 2); + } else { + endsMap.put(rangeAndPos.range().max, cur | 2); + } + } + + List<Long> endsList = new ArrayList<>(endsMap.keySet()); + Collections.sort(endsList); + + // Build elementaryIntervals (a 1D Venn diagram): + List<InclusiveRange> elementaryIntervals = new ArrayList<>(); + int upto = 1; + long v = endsList.get(0); + long prev; + if (endsMap.get(v) == 3) { + elementaryIntervals.add(new InclusiveRange(v, v)); + prev = v + 1; + } else { + prev = v; + } + + while (upto < endsList.size()) { + v = endsList.get(upto); + int flags = endsMap.get(v); + if (flags == 3) { + // This point is both an end and a start; we need to + // separate it: + if (v > prev) { + elementaryIntervals.add(new InclusiveRange(prev, v - 1)); + } + elementaryIntervals.add(new InclusiveRange(v, v)); + prev = v + 1; + } else if (flags == 1) { + // This point is only the start of an interval; + // attach it to next interval: + if (v > prev) { + elementaryIntervals.add(new InclusiveRange(prev, v - 1)); + } + prev = v; + } else { + assert flags == 2; + // This point is only the end of an interval; attach + // it to last interval: + elementaryIntervals.add(new InclusiveRange(prev, v)); + prev = v + 1; + } + upto++; + } + + return elementaryIntervals; + } + + private static LongRangeNode split(int start, int end, List<InclusiveRange> elementaryIntervals) { + if (start == end - 1) { + // leaf + InclusiveRange range = elementaryIntervals.get(start); + return new LongRangeNode(range.start(), range.end(), null, null, start); + } else { + int mid = (start + end) >>> 1; + LongRangeNode left = split(start, mid, elementaryIntervals); + LongRangeNode right = split(mid, end, elementaryIntervals); + return new LongRangeNode(left.start(), right.end(), left, right, -1); + } + } + + @Override + public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException { + if (singleValues != null) { + LongValues values = singleValues.getValues(context, null); + return new OverlappingSingleValuedRangeLeafFacetCutter( + values, boundaries, pos, requestedRangeCount, root); + } else { + MultiLongValues values = valuesSource.getValues(context); + return new OverlappingMultivaluedRangeLeafFacetCutter( + values, boundaries, pos, requestedRangeCount, root); + } + } + + /** + * TODO: dedup OverlappingMultivaluedRangeLeafFacetCutter and + * OverlappingSingleValuedRangeLeafFacetCutter code - they are similar but they extend different + * base classes. + */ + static class OverlappingMultivaluedRangeLeafFacetCutter + extends LongRangeMultivaluedLeafFacetCutter { + + LongRangeNode elementaryIntervalRoot; + + private int elementaryIntervalUpto; + + OverlappingMultivaluedRangeLeafFacetCutter( + MultiLongValues longValues, + long[] boundaries, + int[] pos, + int requestedRangeCount, + LongRangeNode elementaryIntervalRoot) { + super(longValues, boundaries, pos, requestedRangeCount); + requestedIntervalTracker = new IntervalTracker.MultiIntervalTracker(requestedRangeCount); + this.elementaryIntervalRoot = elementaryIntervalRoot; + } + + @Override + void maybeRollUp(IntervalTracker rollUpInto) { + elementaryIntervalUpto = 0; + rollupMultiValued(elementaryIntervalRoot); + } + + // Note: combined rollUpSingleValued and rollUpMultiValued from OverlappingLongRangeCounter into + // 1 rollUp method + private boolean rollupMultiValued(LongRangeNode node) { Review Comment: I'll update the TODO item to refactor it as a follow up. Thanks! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org