Jackie-Jiang commented on a change in pull request #6409: URL: https://github.com/apache/incubator-pinot/pull/6409#discussion_r557825606
########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java ########## @@ -90,110 +92,139 @@ public H3IndexFilterOperator(IndexSegment segment, Predicate predicate, int numD @Override protected FilterBlock getNextBlock() { if (_upperBound < 0 || _lowerBound > _upperBound) { - // Invalid upper bound - + // Invalid upper bound, return an empty block return new FilterBlock(EmptyDocIdSet.getInstance()); } - if (Double.isNaN(_lowerBound) || _lowerBound < 0) { - // No lower bound + try { + if (Double.isNaN(_lowerBound) || _lowerBound < 0) { + // No lower bound - if (Double.isNaN(_upperBound)) { - // No upper bound - return new FilterBlock(new MatchAllDocIdSet(_numDocs)); - } - - List<Long> fullMatchH3Ids = getFullMatchH3Ids(_upperBound); - HashSet<Long> partialMatchH3Ids = new HashSet<>(getPartialMatchH3Ids(_upperBound)); - partialMatchH3Ids.removeAll(fullMatchH3Ids); + if (Double.isNaN(_upperBound)) { + // No bound, return a match-all block + return new FilterBlock(new MatchAllDocIdSet(_numDocs)); + } - MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap(); - for (long fullMatchH3Id : fullMatchH3Ids) { - fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id)); - } + // Upper bound only + List<Long> fullMatchH3Ids = getAlwaysMatchH3Ids(_upperBound); + HashSet<Long> partialMatchH3Ids = new HashSet<>(getPossibleMatchH3Ids(_upperBound)); + partialMatchH3Ids.removeAll(fullMatchH3Ids); - MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap(); - for (long partialMatchH3Id : partialMatchH3Ids) { - partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id)); - } + MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap(); + for (long fullMatchH3Id : fullMatchH3Ids) { + fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id)); + } - return getFilterBlock(fullMatchDocIds, partialMatchDocIds); - } + MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap(); + for (long partialMatchH3Id : partialMatchH3Ids) { + partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id)); + } - if (Double.isNaN(_upperBound)) { - // No upper bound + return getFilterBlock(fullMatchDocIds, partialMatchDocIds); + } - List<Long> notMatchH3Ids = getFullMatchH3Ids(_lowerBound); - Set<Long> partialMatchH3Ids = new HashSet<>(getPartialMatchH3Ids(_lowerBound)); + if (Double.isNaN(_upperBound)) { + // Lower bound only + + List<Long> alwaysNotMatchH3Ids = getAlwaysMatchH3Ids(_lowerBound); + Set<Long> possibleNotMatchH3Ids = new HashSet<>(getPossibleMatchH3Ids(_lowerBound)); + + // Flip the result of possible not match doc ids to get the full match doc ids + MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap(); + for (long partialMatchH3Id : possibleNotMatchH3Ids) { + fullMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id)); + } + fullMatchDocIds.flip(0L, _numDocs); + + // Remove the always not match H3 ids from possible not match H3 ids to get the partial match H3 ids + possibleNotMatchH3Ids.removeAll(alwaysNotMatchH3Ids); + MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap(); + for (long partialMatchH3Id : possibleNotMatchH3Ids) { + partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id)); + } + + return getFilterBlock(fullMatchDocIds, partialMatchDocIds); + } + // Both lower bound and upper bound exist + List<Long> lowerAlwaysMatchH3Ids = getAlwaysMatchH3Ids(_lowerBound); + List<Long> lowerPossibleMatchH3Ids = getPossibleMatchH3Ids(_lowerBound); + List<Long> upperAlwaysMatchH3Ids = getAlwaysMatchH3Ids(_upperBound); + List<Long> upperPossibleMatchH3Ids = getPossibleMatchH3Ids(_upperBound); + + // Remove the possible match H3 ids for the lower bound from the always match H3 ids for the upper bound to get + // the full match H3 ids + Set<Long> fullMatchH3Ids; + if (upperAlwaysMatchH3Ids.size() > lowerPossibleMatchH3Ids.size()) { + fullMatchH3Ids = new HashSet<>(upperAlwaysMatchH3Ids); + fullMatchH3Ids.removeAll(lowerPossibleMatchH3Ids); + } else { + fullMatchH3Ids = Collections.emptySet(); + } MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap(); - for (long partialMatchH3Id : partialMatchH3Ids) { - fullMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id)); + for (long fullMatchH3Id : fullMatchH3Ids) { + fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id)); } - fullMatchDocIds.flip(0L, _numDocs); - partialMatchH3Ids.removeAll(notMatchH3Ids); + // Remove the always match H3 ids for the lower bound (always not match H3 ids) and the full match H3 ids from the + // possible match H3 ids for the upper bound to get the partial match H3 ids + Set<Long> partialMatchH3Ids = new HashSet<>(upperPossibleMatchH3Ids); + partialMatchH3Ids.removeAll(lowerAlwaysMatchH3Ids); + partialMatchH3Ids.removeAll(fullMatchH3Ids); MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap(); for (long partialMatchH3Id : partialMatchH3Ids) { partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id)); } return getFilterBlock(fullMatchDocIds, partialMatchDocIds); + } catch (Exception e) { + // Fall back to ExpressionFilterOperator when the execution encounters exception (e.g. numRings is too large) + return new ExpressionFilterOperator(_segment, _predicate, _numDocs).getNextBlock(); } - - // Both lower bound and upper bound exist - List<Long> lowerFullMatchH3Ids = getFullMatchH3Ids(_lowerBound); - List<Long> lowerPartialMatchH3Ids = getPartialMatchH3Ids(_lowerBound); - List<Long> upperFullMatchH3Ids = getFullMatchH3Ids(_upperBound); - List<Long> upperPartialMatchH3Ids = getPartialMatchH3Ids(_upperBound); - - Set<Long> fullMatchH3Ids; - if (upperFullMatchH3Ids.size() > lowerPartialMatchH3Ids.size()) { - fullMatchH3Ids = new HashSet<>(upperFullMatchH3Ids); - fullMatchH3Ids.removeAll(lowerPartialMatchH3Ids); - } else { - fullMatchH3Ids = Collections.emptySet(); - } - MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap(); - for (long fullMatchH3Id : fullMatchH3Ids) { - fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id)); - } - - Set<Long> partialMatchH3Ids = new HashSet<>(upperPartialMatchH3Ids); - partialMatchH3Ids.removeAll(lowerFullMatchH3Ids); - partialMatchH3Ids.removeAll(fullMatchH3Ids); - MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap(); - for (long partialMatchH3Id : partialMatchH3Ids) { - partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id)); - } - - return getFilterBlock(fullMatchDocIds, partialMatchDocIds); } /** * Returns the H3 ids that is ALWAYS fully covered by the circle with the given distance as the radius and a point * within the _h3Id hexagon as the center. + * <p>The farthest distance from the center of the center hexagon to the center of a hexagon in the nth ring is + * {@code sqrt(3) * n * edgeLength}. Counting the distance from the center to a point in the hexagon, which is up + * to the edge length, it is guaranteed that the hexagons in the nth ring are always fully covered if: + * {@code distance >= (sqrt(3) * n + 2) * edgeLength}. */ - private List<Long> getFullMatchH3Ids(double distance) { + private List<Long> getAlwaysMatchH3Ids(double distance) { // NOTE: Pick a constant slightly larger than sqrt(3) to be conservative int numRings = (int) Math.floor((distance / _edgeLength - 2) / 1.7321); - if (numRings >= 0) { - return H3Utils.H3_CORE.kRing(_h3Id, numRings); - } else { - return Collections.emptyList(); - } + return numRings >= 0 ? getH3Ids(numRings) : Collections.emptyList(); } /** - * Returns the H3 ids that MIGHT BE partially/fully covered by the circle with the given distance as the radius and a + * Returns the H3 ids that MIGHT BE fully/partially covered by the circle with the given distance as the radius and a * point within the _h3Id hexagon as the center. + * <p>The shortest distance from the center of the center hexagon to the center of a hexagon in the nth ring is + * {@code >= 1.5 * n * edgeLength}. Counting the distance from the center to a point in the hexagon, which is up + * to the edge length, it is guaranteed that the hexagons in the nth ring are always not fully/partially covered if: + * {@code distance < (1.5 * n - 2) * edgeLength}. */ - private List<Long> getPartialMatchH3Ids(double distance) { + private List<Long> getPossibleMatchH3Ids(double distance) { // NOTE: Add a small delta (0.001) to be conservative int numRings = (int) Math.floor((distance / _edgeLength + 2) / 1.5 + 0.001); + return getH3Ids(numRings); + } + + /** + * Returns the H3 ids for the given number of rings around the _h3Id. + * IMPORTANT: Throw exception when number of rings is too large because H3 library might send SIGILL for large number + * of rings, which can terminate the JVM. Also, the result won't be accurate when the distance is too large. In such + * case, the operator will fallback to ExpressionFilterOperator. + */ + private List<Long> getH3Ids(int numRings) { + Preconditions.checkState(numRings <= 100, "Expect numRings <= 100, got: %s", numRings); Review comment: Yes, as mentioned in the comments ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org