This is an automated email from the ASF dual-hosted git repository. kishoreg pushed a commit to branch h3-index in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/h3-index by this push: new bab10b9 Removing H3WITHIN UDF and adding logic to use h3 index for st_distance udf bab10b9 is described below commit bab10b9536c22cce3e21a974077860e79148bb24 Author: kishoreg <g.kish...@gmail.com> AuthorDate: Mon Dec 21 09:16:46 2020 -0800 Removing H3WITHIN UDF and adding logic to use h3 index for st_distance udf --- .../operator/filter/H3IndexFilterOperator.java | 73 +++++++++++++++++----- .../org/apache/pinot/core/plan/FilterPlanNode.java | 32 +++++----- .../request/context/predicate/GeoPredicate.java | 12 +--- .../creator/impl/geospatial/H3IndexCreator.java | 10 +++ .../batch/starbucksStores/rawdata/data.csv | 2 +- 5 files changed, 87 insertions(+), 42 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java index 7528b7e..0b65dd7 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java @@ -23,10 +23,19 @@ import com.uber.h3core.LengthUnit; import java.io.IOException; import java.util.List; import org.apache.pinot.core.common.DataSource; +import org.apache.pinot.core.geospatial.serde.GeometrySerializer; +import org.apache.pinot.core.geospatial.transform.function.StPointFunction; +import org.apache.pinot.core.indexsegment.IndexSegment; import org.apache.pinot.core.operator.blocks.FilterBlock; import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; +import org.apache.pinot.core.query.request.context.ExpressionContext; +import org.apache.pinot.core.query.request.context.FunctionContext; import org.apache.pinot.core.query.request.context.predicate.GeoPredicate; +import org.apache.pinot.core.query.request.context.predicate.Predicate; +import org.apache.pinot.core.query.request.context.predicate.RangePredicate; import org.apache.pinot.core.segment.index.readers.geospatial.H3IndexReader; +import org.apache.pinot.spi.utils.BytesUtils; +import org.locationtech.jts.geom.Geometry; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; @@ -36,14 +45,51 @@ public class H3IndexFilterOperator extends BaseFilterOperator { // NOTE: Range index can only apply to dictionary-encoded columns for now // TODO: Support raw index columns - private final GeoPredicate _geoPredicate; - private final DataSource _dataSource; private final int _numDocs; private final H3Core _h3Core; + private final H3IndexReader _h3IndexReader; + private Geometry _geometry; + private double _distance; - public H3IndexFilterOperator(GeoPredicate geoPredicate, DataSource dataSource, int numDocs) { - _geoPredicate = geoPredicate; - _dataSource = dataSource; + public H3IndexFilterOperator(Predicate predicate, IndexSegment indexSegment, int numDocs) { + FunctionContext function = predicate.getLhs().getFunction(); + String columnName; + + if (function.getArguments().get(0).getType() == ExpressionContext.Type.IDENTIFIER) { + columnName = function.getArguments().get(0).getIdentifier(); + byte[] bytes = BytesUtils.toBytes(function.getArguments().get(1).getLiteral()); + _geometry = GeometrySerializer.deserialize(bytes); + } else if (function.getArguments().get(1).getType() == ExpressionContext.Type.IDENTIFIER) { + columnName = function.getArguments().get(1).getIdentifier(); + byte[] bytes = BytesUtils.toBytes(function.getArguments().get(0).getLiteral()); + _geometry = GeometrySerializer.deserialize(bytes); + } else { + throw new RuntimeException("Expecting one of the arguments of ST_DISTANCE to be an identifier"); + } + DataSource dataSource = indexSegment.getDataSource(columnName); + _h3IndexReader = dataSource.getH3Index(); + switch (predicate.getType()) { + case EQ: + break; + case NOT_EQ: + break; + case IN: + break; + case NOT_IN: + break; + case RANGE: + RangePredicate rangePredicate = (RangePredicate) predicate; + _distance = Double.parseDouble(rangePredicate.getUpperBound()); + break; + case REGEXP_LIKE: + break; + case TEXT_MATCH: + break; + case IS_NULL: + break; + case IS_NOT_NULL: + break; + } _numDocs = numDocs; try { _h3Core = H3Core.newInstance(); @@ -54,34 +100,31 @@ public class H3IndexFilterOperator extends BaseFilterOperator { @Override protected FilterBlock getNextBlock() { - H3IndexReader h3IndexReader = _dataSource.getH3Index(); //todo: this needs to come from somewhere? int resolution = 5; - long h3Id = _h3Core - .geoToH3(_geoPredicate.getGeometry().getCoordinate().x, _geoPredicate.getGeometry().getCoordinate().y, - resolution); - assert h3IndexReader != null; + long h3Id = _h3Core.geoToH3(_geometry.getCoordinate().x, _geometry.getCoordinate().y, resolution); + assert _h3IndexReader != null; - //find the number of rings based on geopredicate.distance + //find the number of rings based on distance //FullMatch double edgeLength = _h3Core.edgeLength(resolution, LengthUnit.km); - int numFullMatchedRings = (int) (_geoPredicate.getDistance() / edgeLength); + int numFullMatchedRings = (int) (_distance / edgeLength); List<Long> fullMatchRings = _h3Core.kRing(h3Id, numFullMatchedRings); fullMatchRings.add(h3Id); MutableRoaringBitmap fullMatchedDocIds = new MutableRoaringBitmap(); for (long id : fullMatchRings) { - ImmutableRoaringBitmap docIds = h3IndexReader.getDocIds(id); + ImmutableRoaringBitmap docIds = _h3IndexReader.getDocIds(id); fullMatchedDocIds.or(docIds); } //partial matchedRings - int numPartialMatchedRings = (int) (_geoPredicate.getDistance() / edgeLength); + int numPartialMatchedRings = (int) ((_distance + edgeLength) / edgeLength); List<Long> partialMatchedRings = _h3Core.kRing(h3Id, numPartialMatchedRings); partialMatchedRings.add(h3Id); final MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap(); partialMatchedRings.removeAll(fullMatchRings); for (long id : partialMatchedRings) { - ImmutableRoaringBitmap docIds = h3IndexReader.getDocIds(id); + ImmutableRoaringBitmap docIds = _h3IndexReader.getDocIds(id); partialMatchDocIds.or(docIds); } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java index 43b2174..4455a24 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java @@ -23,8 +23,10 @@ import java.util.Arrays; import java.util.List; import java.util.Map; import javax.annotation.Nullable; +import org.apache.pinot.common.request.Identifier; import org.apache.pinot.core.common.DataSource; import org.apache.pinot.core.geospatial.GeometryUtils; +import org.apache.pinot.core.geospatial.transform.function.StDistanceFunction; import org.apache.pinot.core.indexsegment.IndexSegment; import org.apache.pinot.core.operator.filter.BaseFilterOperator; import org.apache.pinot.core.operator.filter.BitmapBasedFilterOperator; @@ -128,24 +130,22 @@ public class FilterPlanNode implements PlanNode { ExpressionContext lhs = predicate.getLhs(); if (lhs.getType() == ExpressionContext.Type.FUNCTION) { FunctionContext function = lhs.getFunction(); - if (function.getFunctionName().equalsIgnoreCase("H3_WITHIN")) { + + boolean canApplyH3Index = false; + if (function.getFunctionName().equalsIgnoreCase(StDistanceFunction.FUNCTION_NAME)) { String columnName = function.getArguments().get(0).getIdentifier(); - GeoPredicate geoPredicate = new GeoPredicate(); - geoPredicate.setType(GeoPredicate.Type.WITHIN); - float lat = Float.parseFloat(function.getArguments().get(1).getLiteral()); - float lon = Float.parseFloat(function.getArguments().get(2).getLiteral()); - float distance = Float.parseFloat(function.getArguments().get(3).getLiteral()); -// float resolution =Float.parseFloat(function.getArguments().get(4).getLiteral()); - Point point = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(lat, lon)); - geoPredicate.setGeometry(point); - geoPredicate.setDistance(distance); - //set geo predicate - return new H3IndexFilterOperator(geoPredicate, _indexSegment.getDataSource(columnName), _numDocs); - } else { - // TODO: ExpressionFilterOperator does not support predicate types without PredicateEvaluator (IS_NULL, - // IS_NOT_NULL, TEXT_MATCH) - return new ExpressionFilterOperator(_indexSegment, predicate, _numDocs); + DataSource dataSource = _indexSegment.getDataSource(columnName); + if (dataSource.getH3Index() != null) { + canApplyH3Index = true; + } + } + + if (canApplyH3Index) { + return new H3IndexFilterOperator(predicate, _indexSegment, _numDocs); } + // TODO: ExpressionFilterOperator does not support predicate types without PredicateEvaluator (IS_NULL, + // IS_NOT_NULL, TEXT_MATCH) + return new ExpressionFilterOperator(_indexSegment, predicate, _numDocs); } else { DataSource dataSource = _indexSegment.getDataSource(lhs.getIdentifier()); switch (predicate.getType()) { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java index f47ace2..473be7c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java @@ -10,13 +10,13 @@ public class GeoPredicate { //this is the column name ExpressionContext _lhs; - Type type; + Predicate type; Geometry _geometry; double _distance; - public enum Type { + public enum Pre { WITHIN, OVERLAP; } @@ -28,14 +28,6 @@ public class GeoPredicate { _lhs = lhs; } - public Type getType() { - return type; - } - - public void setType(Type type) { - this.type = type; - } - public Geometry getGeometry() { return _geometry; } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java index 67b3fc9..ec7ef6f 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java @@ -18,11 +18,15 @@ import java.util.PriorityQueue; import java.util.Random; import java.util.TreeMap; import org.apache.commons.io.FileUtils; +import org.apache.pinot.core.geospatial.GeometryUtils; import org.apache.pinot.core.segment.creator.GeoSpatialIndexCreator; import org.apache.pinot.core.segment.index.readers.geospatial.H3IndexReader; import org.apache.pinot.core.segment.memory.PinotDataBuffer; import org.apache.pinot.spi.data.DimensionFieldSpec; import org.apache.pinot.spi.data.FieldSpec; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.Point; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; @@ -272,6 +276,10 @@ public class H3IndexCreator implements GeoSpatialIndexCreator { public static void main(String[] args) throws Exception { + Point point1 = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(37.3861, -122.0839)); + Point point2 = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(37.368832, -122.036346)); + System.out.println("point1.distance(point2) = " + point1.distance(point2)); + System.exit(0); File indexDir = new File(System.getProperty("java.io.tmpdir"), "h3IndexDir"); FileUtils.deleteDirectory(indexDir); indexDir.mkdirs(); @@ -310,5 +318,7 @@ public class H3IndexCreator implements GeoSpatialIndexCreator { System.out.printf("Matched: expected: %d actual: %d for h3:%d \n", map.get(h3), docIds.getCardinality(), h3); } } + + } } diff --git a/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv b/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv index 3fee0f5..646aac6 100644 --- a/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv +++ b/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv @@ -1,4 +1,4 @@ -lat,long,name,address +lon,lat,name,address -149.8935557,61.21759217,Starbucks - AK - Anchorage 00001,"601 West Street_601 West 5th Avenue_Anchorage, Alaska 99501_907-277-2477" -149.9054948,61.19533942,Starbucks - AK - Anchorage 00002,"Carrs-Anchorage #1805_1650 W Northern Lights Blvd_Anchorage, Alaska 99503_907-339-0500" -149.7522,61.2297,Starbucks - AK - Anchorage 00003,"Elmendorf AFB_Bldg 5800 Westover Avenue_Anchorage, Alaska 99506" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org