yupeng9 commented on a change in pull request #7252: URL: https://github.com/apache/pinot/pull/7252#discussion_r683588017
########## File path: pinot-core/src/main/java/org/apache/pinot/core/geospatial/transform/function/ScalarFunctions.java ########## @@ -111,4 +138,35 @@ public static String stAsText(byte[] bytes) { public static long geoToH3(double longitude, double latitude, int resolution) { return H3Utils.H3_CORE.geoToH3(latitude, longitude, resolution); } + + public static List<Long> polygonToH3(List<Coordinate> region, int res) { Review comment: javadocs ########## File path: pinot-core/src/main/java/org/apache/pinot/core/geospatial/transform/function/ScalarFunctions.java ########## @@ -111,4 +138,35 @@ public static String stAsText(byte[] bytes) { public static long geoToH3(double longitude, double latitude, int resolution) { return H3Utils.H3_CORE.geoToH3(latitude, longitude, resolution); } + + public static List<Long> polygonToH3(List<Coordinate> region, int res) { + return H3Utils.H3_CORE.polyfill( + region.stream() + .map(coord -> new GeoCoord(coord.x, coord.y)) + .collect(Collectors.toUnmodifiableList()), + ImmutableList.of(), + res); + } + + public static int calcResFromMaxDist(double maxDist, int minHexagonEdges) { + return RESOLUTIONS.get(Collections.min(RESOLUTIONS.keySet().stream() + .filter(edgeLen -> edgeLen < maxDist / minHexagonEdges) + .collect(Collectors.toUnmodifiableSet()))); + } + + public static double maxDist(List<Coordinate> points) { + int n = points.size(); + double max = 0; + + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + max = Math.max(max, dist(points.get(i), points.get(j))); + } + } + return Math.sqrt(max); + } + + public static double dist(Coordinate a, Coordinate b) { Review comment: either make the unit an argument or add the unit as part of the func name ########## File path: pinot-core/src/main/java/org/apache/pinot/core/geospatial/transform/function/ScalarFunctions.java ########## @@ -33,6 +40,26 @@ */ public class ScalarFunctions { + // from https://h3geo.org/docs/core-library/restable + private static final ImmutableMap<Double, Integer> RESOLUTIONS = ImmutableMap.<Double, Integer>builder() Review comment: plz move this to geo util class ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java ########## @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.filter; + +import java.util.*; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.core.geospatial.transform.function.ScalarFunctions; +import org.apache.pinot.core.operator.blocks.FilterBlock; +import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator; +import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; +import org.apache.pinot.segment.local.utils.GeometrySerializer; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.index.reader.H3IndexReader; +import org.apache.pinot.spi.utils.BytesUtils; +import org.locationtech.jts.geom.Coordinate; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +/** + * A filter operator that uses H3 index for geospatial data inclusion Review comment: plz add comments about the algorithm used in this operator ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java ########## @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.filter; + +import java.util.*; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.core.geospatial.transform.function.ScalarFunctions; +import org.apache.pinot.core.operator.blocks.FilterBlock; +import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator; +import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; +import org.apache.pinot.segment.local.utils.GeometrySerializer; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.index.reader.H3IndexReader; +import org.apache.pinot.spi.utils.BytesUtils; +import org.locationtech.jts.geom.Coordinate; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +/** + * A filter operator that uses H3 index for geospatial data inclusion + */ +public class H3InclusionIndexFilterOperator extends BaseFilterOperator { + private static final String OPERATOR_NAME = "H3IndexFilterOperator"; + + private final IndexSegment _segment; + private final Predicate _predicate; + private final int _numDocs; + private final H3IndexReader _h3IndexReader; + private final List<Long> _h3Ids; + + public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate predicate, int numDocs) { + _segment = segment; + _predicate = predicate; + _numDocs = numDocs; + + List<ExpressionContext> arguments = predicate.getLhs().getFunction().getArguments(); + Coordinate[] coordinates; + if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER) { + // look up arg0's h3 indices + _h3IndexReader = segment.getDataSource(arguments.get(0).getIdentifier()).getH3Index(); + // arg1 is the literal + coordinates = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(1).getLiteral())).getCoordinates(); + } else { + // look up arg1's h3 indices + _h3IndexReader = segment.getDataSource(arguments.get(1).getIdentifier()).getH3Index(); + // arg0 is the literal + coordinates = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(0).getLiteral())).getCoordinates(); + } + // must be some h3 index + assert _h3IndexReader != null; + + // look up all hexagons for provided coordinates + List<Coordinate> coordinateList = Arrays.asList(coordinates); + _h3Ids = ScalarFunctions.polygonToH3(coordinateList, + ScalarFunctions.calcResFromMaxDist(ScalarFunctions.maxDist(coordinateList), 50)); + } + + @Override + protected FilterBlock getNextBlock() { + // have list of h3 hashes for polygon provided + // return filtered num_docs + MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap(); + for (long docId : _h3Ids) { + fullMatchDocIds.or(_h3IndexReader.getDocIds(docId)); Review comment: how do you determine full matches? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/geospatial/transform/function/ScalarFunctions.java ########## @@ -33,6 +40,26 @@ */ public class ScalarFunctions { + // from https://h3geo.org/docs/core-library/restable + private static final ImmutableMap<Double, Integer> RESOLUTIONS = ImmutableMap.<Double, Integer>builder() Review comment: Also add more comments about the meaning of the value ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java ########## @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.filter; + +import java.util.*; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.core.geospatial.transform.function.ScalarFunctions; +import org.apache.pinot.core.operator.blocks.FilterBlock; +import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator; +import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; +import org.apache.pinot.segment.local.utils.GeometrySerializer; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.index.reader.H3IndexReader; +import org.apache.pinot.spi.utils.BytesUtils; +import org.locationtech.jts.geom.Coordinate; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +/** + * A filter operator that uses H3 index for geospatial data inclusion + */ +public class H3InclusionIndexFilterOperator extends BaseFilterOperator { + private static final String OPERATOR_NAME = "H3IndexFilterOperator"; + + private final IndexSegment _segment; + private final Predicate _predicate; + private final int _numDocs; + private final H3IndexReader _h3IndexReader; + private final List<Long> _h3Ids; + + public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate predicate, int numDocs) { + _segment = segment; + _predicate = predicate; + _numDocs = numDocs; + + List<ExpressionContext> arguments = predicate.getLhs().getFunction().getArguments(); + Coordinate[] coordinates; + if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER) { + // look up arg0's h3 indices + _h3IndexReader = segment.getDataSource(arguments.get(0).getIdentifier()).getH3Index(); + // arg1 is the literal + coordinates = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(1).getLiteral())).getCoordinates(); + } else { + // look up arg1's h3 indices + _h3IndexReader = segment.getDataSource(arguments.get(1).getIdentifier()).getH3Index(); + // arg0 is the literal + coordinates = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(0).getLiteral())).getCoordinates(); + } + // must be some h3 index + assert _h3IndexReader != null; + + // look up all hexagons for provided coordinates + List<Coordinate> coordinateList = Arrays.asList(coordinates); + _h3Ids = ScalarFunctions.polygonToH3(coordinateList, + ScalarFunctions.calcResFromMaxDist(ScalarFunctions.maxDist(coordinateList), 50)); + } + + @Override + protected FilterBlock getNextBlock() { + // have list of h3 hashes for polygon provided + // return filtered num_docs + MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap(); + for (long docId : _h3Ids) { + fullMatchDocIds.or(_h3IndexReader.getDocIds(docId)); Review comment: please add unit tests to verify the algorithm ########## File path: pinot-core/src/main/java/org/apache/pinot/core/geospatial/transform/function/StContainsFunction.java ########## @@ -83,9 +82,6 @@ public TransformResultMetadata getResultMetadata() { for (int i = 0; i < projectionBlock.getNumDocs(); i++) { Geometry firstGeometry = GeometrySerializer.deserialize(firstValues[i]); Geometry secondGeometry = GeometrySerializer.deserialize(secondValues[i]); - if (GeometryUtils.isGeography(firstGeometry) || GeometryUtils.isGeography(secondGeometry)) { Review comment: this check is needed, the current implementation does not handle geography correctly. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org