This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 498387a323 Support st_contains using H3 index (#8498) 498387a323 is described below commit 498387a3239d611539aad1badbbabb5d5f66b75c Author: WangCHX <wcxz...@gmail.com> AuthorDate: Sat Apr 30 09:06:12 2022 +0800 Support st_contains using H3 index (#8498) The idea is to convert input geometry into a list of h3 cells by using polyfill. But h3 polyfill only fills with the hexagons whose centers are contained by the geometry. so creating a method coverGeometryInH3 to return the set of H3 cells at the specified resolution which completely cover the input shape. --- .../filter/H3InclusionIndexFilterOperator.java | 152 ++++++++++++++ .../org/apache/pinot/core/plan/FilterPlanNode.java | 59 +++++- .../apache/pinot/queries/H3IndexQueriesTest.java | 226 +++++++++++++++++++-- .../apache/pinot/segment/local/utils/H3Utils.java | 92 +++++++++ pom.xml | 2 +- 5 files changed, 501 insertions(+), 30 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java new file mode 100644 index 0000000000..c64d511697 --- /dev/null +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.filter; + +import it.unimi.dsi.fastutil.longs.LongIterator; +import it.unimi.dsi.fastutil.longs.LongSet; +import java.util.Collections; +import java.util.List; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.predicate.EqPredicate; +import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.core.common.Operator; +import org.apache.pinot.core.operator.blocks.FilterBlock; +import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator; +import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; +import org.apache.pinot.segment.local.utils.GeometrySerializer; +import org.apache.pinot.segment.local.utils.H3Utils; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.index.reader.H3IndexReader; +import org.apache.pinot.spi.utils.BooleanUtils; +import org.apache.pinot.spi.utils.BytesUtils; +import org.locationtech.jts.geom.Geometry; +import org.roaringbitmap.buffer.BufferFastAggregation; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + + +/** + * A filter operator that uses H3 index for geospatial data inclusion + */ +public class H3InclusionIndexFilterOperator extends BaseFilterOperator { + + private static final String EXPLAIN_NAME = "INCLUSION_FILTER_H3_INDEX"; + + private final IndexSegment _segment; + private final Predicate _predicate; + private final int _numDocs; + private final H3IndexReader _h3IndexReader; + private final Geometry _geometry; + private final boolean _isPositiveCheck; + + public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate predicate, int numDocs) { + _segment = segment; + _predicate = predicate; + _numDocs = numDocs; + + List<ExpressionContext> arguments = predicate.getLhs().getFunction().getArguments(); + EqPredicate eqPredicate = (EqPredicate) predicate; + _isPositiveCheck = BooleanUtils.toBoolean(eqPredicate.getValue()); + + if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER) { + _h3IndexReader = segment.getDataSource(arguments.get(0).getIdentifier()).getH3Index(); + _geometry = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(1).getLiteral())); + } else { + _h3IndexReader = segment.getDataSource(arguments.get(1).getIdentifier()).getH3Index(); + _geometry = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(0).getLiteral())); + } + // must be some h3 index + assert _h3IndexReader != null : "the column must have H3 index setup."; + } + + @Override + protected FilterBlock getNextBlock() { + // get the set of H3 cells at the specified resolution which completely cover the input shape and potential cover. + Pair<LongSet, LongSet> fullCoverAndPotentialCoverCells = + H3Utils.coverGeometryInH3(_geometry, _h3IndexReader.getH3IndexResolution().getLowestResolution()); + LongSet fullyCoverH3Cells = fullCoverAndPotentialCoverCells.getLeft(); + LongSet potentialCoverH3Cells = fullCoverAndPotentialCoverCells.getRight(); + + // have list of h3 cell ids for polygon provided + // return filtered num_docs + ImmutableRoaringBitmap[] potentialMatchDocIds = new ImmutableRoaringBitmap[potentialCoverH3Cells.size()]; + int i = 0; + LongIterator potentialCoverH3CellsIterator = potentialCoverH3Cells.iterator(); + while (potentialCoverH3CellsIterator.hasNext()) { + potentialMatchDocIds[i++] = _h3IndexReader.getDocIds(potentialCoverH3CellsIterator.nextLong()); + } + MutableRoaringBitmap potentialMatchMutableRoaringBitmap = BufferFastAggregation.or(potentialMatchDocIds); + if (_isPositiveCheck) { + ImmutableRoaringBitmap[] fullMatchDocIds = new ImmutableRoaringBitmap[fullyCoverH3Cells.size()]; + i = 0; + LongIterator fullyCoverH3CellsIterator = fullyCoverH3Cells.iterator(); + while (fullyCoverH3CellsIterator.hasNext()) { + fullMatchDocIds[i++] = _h3IndexReader.getDocIds(fullyCoverH3CellsIterator.nextLong()); + } + MutableRoaringBitmap fullMatchMutableRoaringBitmap = BufferFastAggregation.or(fullMatchDocIds); + return getFilterBlock(fullMatchMutableRoaringBitmap, potentialMatchMutableRoaringBitmap); + } else { + i = 0; + // remove full match from potential match to get potential not match cells. + potentialCoverH3Cells.removeAll(fullyCoverH3Cells); + ImmutableRoaringBitmap[] potentialNotMatchMutableRoaringBitmap = + new ImmutableRoaringBitmap[potentialCoverH3Cells.size()]; + LongIterator potentialNotMatchH3CellsIterator = potentialCoverH3Cells.iterator(); + while (potentialNotMatchH3CellsIterator.hasNext()) { + potentialNotMatchMutableRoaringBitmap[i++] = + _h3IndexReader.getDocIds(potentialNotMatchH3CellsIterator.nextLong()); + } + MutableRoaringBitmap potentialNotMatch = BufferFastAggregation.or(potentialNotMatchMutableRoaringBitmap); + // flip potential match bit map to get exactly not match bitmap. + potentialMatchMutableRoaringBitmap.flip(0L, _numDocs); + return getFilterBlock(potentialMatchMutableRoaringBitmap, potentialNotMatch); + } + } + + /** + * Returns the filter block based on the given the partial match doc ids. + */ + private FilterBlock getFilterBlock(MutableRoaringBitmap fullMatchDocIds, MutableRoaringBitmap partialMatchDocIds) { + ExpressionFilterOperator expressionFilterOperator = new ExpressionFilterOperator(_segment, _predicate, _numDocs); + ScanBasedDocIdIterator docIdIterator = + (ScanBasedDocIdIterator) expressionFilterOperator.getNextBlock().getBlockDocIdSet().iterator(); + MutableRoaringBitmap result = docIdIterator.applyAnd(partialMatchDocIds); + result.or(fullMatchDocIds); + return new FilterBlock(new BitmapDocIdSet(result, _numDocs) { + @Override + public long getNumEntriesScannedInFilter() { + return docIdIterator.getNumEntriesScanned(); + } + }); + } + + @Override + public List<Operator> getChildOperators() { + return Collections.emptyList(); + } + + @Override + public String toExplainString() { + StringBuilder stringBuilder = new StringBuilder(EXPLAIN_NAME).append("(inclusionIndex:h3_index"); + stringBuilder.append(",operator:").append(_predicate.getType()); + stringBuilder.append(",predicate:").append(_predicate.toString()); + return stringBuilder.append(')').toString(); + } +} diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java index c43c5b5912..716810e62f 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java @@ -38,6 +38,7 @@ import org.apache.pinot.core.operator.filter.BitmapBasedFilterOperator; import org.apache.pinot.core.operator.filter.EmptyFilterOperator; import org.apache.pinot.core.operator.filter.ExpressionFilterOperator; import org.apache.pinot.core.operator.filter.FilterOperatorUtils; +import org.apache.pinot.core.operator.filter.H3InclusionIndexFilterOperator; import org.apache.pinot.core.operator.filter.H3IndexFilterOperator; import org.apache.pinot.core.operator.filter.JsonMatchFilterOperator; import org.apache.pinot.core.operator.filter.MatchAllFilterOperator; @@ -56,6 +57,7 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap; public class FilterPlanNode implements PlanNode { + private final IndexSegment _indexSegment; private final QueryContext _queryContext; private final FilterContext _filter; @@ -106,7 +108,7 @@ public class FilterPlanNode implements PlanNode { } /** - * H3 index can be applied iff: + * H3 index can be applied on ST_Distance iff: * <ul> * <li>Predicate is of type RANGE</li> * <li>Left-hand-side of the predicate is an ST_Distance function</li> @@ -114,7 +116,7 @@ public class FilterPlanNode implements PlanNode { * <li>The identifier column has H3 index</li> * </ul> */ - private boolean canApplyH3Index(Predicate predicate, FunctionContext function) { + private boolean canApplyH3IndexForDistanceCheck(Predicate predicate, FunctionContext function) { if (predicate.getType() != Predicate.Type.RANGE) { return false; } @@ -139,6 +141,46 @@ public class FilterPlanNode implements PlanNode { return columnName != null && _indexSegment.getDataSource(columnName).getH3Index() != null && findLiteral; } + /** + * H3 index can be applied for inclusion check iff: + * <ul> + * <li>Predicate is of type EQ</li> + * <li>Left-hand-side of the predicate is an ST_Within or ST_Contains function</li> + * <li>For ST_Within, the first argument is an identifier, the second argument is literal</li> + * <li>For ST_Contains function the first argument is literal, the second argument is an identifier</li> + * <li>The identifier column has H3 index</li> + * </ul> + */ + private boolean canApplyH3IndexForInclusionCheck(Predicate predicate, FunctionContext function) { + if (predicate.getType() != Predicate.Type.EQ) { + return false; + } + String functionName = function.getFunctionName(); + if (!functionName.equals("stwithin") && !functionName.equals("stcontains")) { + return false; + } + List<ExpressionContext> arguments = function.getArguments(); + if (arguments.size() != 2) { + throw new BadQueryRequestException("Expect 2 arguments for function: " + functionName); + } + // TODO: handle nested geography/geometry conversion functions + if (functionName.equals("stwithin")) { + if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER + && arguments.get(1).getType() == ExpressionContext.Type.LITERAL) { + String columnName = arguments.get(0).getIdentifier(); + return _indexSegment.getDataSource(columnName).getH3Index() != null; + } + return false; + } else { + if (arguments.get(1).getType() == ExpressionContext.Type.IDENTIFIER + && arguments.get(0).getType() == ExpressionContext.Type.LITERAL) { + String columnName = arguments.get(1).getIdentifier(); + return _indexSegment.getDataSource(columnName).getH3Index() != null; + } + return false; + } + } + /** * Helper method to build the operator tree from the filter. */ @@ -181,12 +223,15 @@ public class FilterPlanNode implements PlanNode { Predicate predicate = filter.getPredicate(); ExpressionContext lhs = predicate.getLhs(); if (lhs.getType() == ExpressionContext.Type.FUNCTION) { - if (canApplyH3Index(predicate, lhs.getFunction())) { - return new H3IndexFilterOperator(_indexSegment, predicate, numDocs); + if (canApplyH3IndexForDistanceCheck(predicate, lhs.getFunction())) { + return new H3IndexFilterOperator(_indexSegment, predicate, numDocs); + } else if (canApplyH3IndexForInclusionCheck(predicate, lhs.getFunction())) { + return new H3InclusionIndexFilterOperator(_indexSegment, predicate, numDocs); + } else { + // TODO: ExpressionFilterOperator does not support predicate types without PredicateEvaluator (IS_NULL, + // IS_NOT_NULL, TEXT_MATCH) + return new ExpressionFilterOperator(_indexSegment, predicate, numDocs); } - // TODO: ExpressionFilterOperator does not support predicate types without PredicateEvaluator (IS_NULL, - // IS_NOT_NULL, TEXT_MATCH) - return new ExpressionFilterOperator(_indexSegment, predicate, numDocs); } else { String column = lhs.getIdentifier(); DataSource dataSource = _indexSegment.getDataSource(column); diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java index 3d39b31495..907f6b9ec6 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java @@ -18,6 +18,8 @@ */ package org.apache.pinot.queries; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -48,7 +50,6 @@ import org.apache.pinot.spi.utils.builder.TableConfigBuilder; import org.locationtech.jts.geom.Coordinate; import org.testng.Assert; import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -64,15 +65,21 @@ public class H3IndexQueriesTest extends BaseQueriesTest { private static final int NUM_RECORDS = 10000; private static final String H3_INDEX_COLUMN = "h3Column"; + private static final String H3_INDEX_GEOMETRY_COLUMN = "h3Column_geometry"; private static final String NON_H3_INDEX_COLUMN = "nonH3Column"; + private static final String NON_H3_INDEX_GEOMETRY_COLUMN = "nonH3Column_geometry"; private static final Schema SCHEMA = new Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME).addSingleValueDimension(H3_INDEX_COLUMN, DataType.BYTES) - .addSingleValueDimension(NON_H3_INDEX_COLUMN, DataType.BYTES).build(); + .addSingleValueDimension(NON_H3_INDEX_COLUMN, DataType.BYTES) + .addSingleValueDimension(H3_INDEX_GEOMETRY_COLUMN, DataType.BYTES) + .addSingleValueDimension(NON_H3_INDEX_GEOMETRY_COLUMN, DataType.BYTES).build(); private static final Map<String, String> H3_INDEX_PROPERTIES = Collections.singletonMap("resolutions", "5"); private static final TableConfig TABLE_CONFIG = new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME) - .setFieldConfigList(Collections.singletonList( - new FieldConfig(H3_INDEX_COLUMN, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3, null, - H3_INDEX_PROPERTIES))).build(); + .setFieldConfigList(ImmutableList + .of(new FieldConfig(H3_INDEX_COLUMN, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3, null, + H3_INDEX_PROPERTIES), + new FieldConfig(H3_INDEX_GEOMETRY_COLUMN, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3, + null, H3_INDEX_PROPERTIES))).build(); private IndexSegment _indexSegment; @@ -91,23 +98,10 @@ public class H3IndexQueriesTest extends BaseQueriesTest { throw new UnsupportedOperationException(); } - @BeforeClass - public void setUp() + public void setUp(List<GenericRow> records) throws Exception { FileUtils.deleteDirectory(INDEX_DIR); - List<GenericRow> records = new ArrayList<>(NUM_RECORDS); - for (int i = 0; i < NUM_RECORDS; i++) { - double longitude = -122.5 + RANDOM.nextDouble(); - double latitude = 37 + RANDOM.nextDouble(); - byte[] value = GeometrySerializer - .serialize(GeometryUtils.GEOGRAPHY_FACTORY.createPoint(new Coordinate(longitude, latitude))); - GenericRow record = new GenericRow(); - record.putValue(H3_INDEX_COLUMN, value); - record.putValue(NON_H3_INDEX_COLUMN, value); - records.add(record); - } - SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA); segmentGeneratorConfig.setTableName(RAW_TABLE_NAME); segmentGeneratorConfig.setSegmentName(SEGMENT_NAME); @@ -118,14 +112,36 @@ public class H3IndexQueriesTest extends BaseQueriesTest { driver.build(); IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); - indexLoadingConfig - .setH3IndexConfigs(Collections.singletonMap(H3_INDEX_COLUMN, new H3IndexConfig(H3_INDEX_PROPERTIES))); + indexLoadingConfig.setH3IndexConfigs(ImmutableMap + .of(H3_INDEX_COLUMN, new H3IndexConfig(H3_INDEX_PROPERTIES), H3_INDEX_GEOMETRY_COLUMN, + new H3IndexConfig(H3_INDEX_PROPERTIES))); _indexSegment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig); } + private void addRecord(List<GenericRow> records, double longitude, double latitude) { + byte[] value = + GeometrySerializer.serialize(GeometryUtils.GEOGRAPHY_FACTORY.createPoint(new Coordinate(longitude, latitude))); + byte[] geometryValue = + GeometrySerializer.serialize(GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(longitude, latitude))); + GenericRow record = new GenericRow(); + record.putValue(H3_INDEX_COLUMN, value); + record.putValue(NON_H3_INDEX_COLUMN, value); + record.putValue(H3_INDEX_GEOMETRY_COLUMN, geometryValue); + record.putValue(NON_H3_INDEX_GEOMETRY_COLUMN, geometryValue); + records.add(record); + } + @Test public void testH3Index() - throws IOException { + throws Exception { + List<GenericRow> records = new ArrayList<>(NUM_RECORDS); + for (int i = 0; i < NUM_RECORDS; i++) { + double longitude = -122.5 + RANDOM.nextDouble(); + double latitude = 37 + RANDOM.nextDouble(); + addRecord(records, longitude, latitude); + } + setUp(records); + // Invalid upper bound { for (String query : Arrays @@ -210,11 +226,177 @@ public class H3IndexQueriesTest extends BaseQueriesTest { Assert.assertNotNull(aggregationResult); Assert.assertEquals((long) aggregationResult.get(0), NUM_RECORDS); } + + { + // Test st contains in polygon + testQueryStContain("SELECT COUNT(*) FROM testTable WHERE ST_Contains(ST_GeomFromText('POLYGON ((\n" + + " -122.0008564 37.5004316, \n" + + " -121.9991291 37.5005168, \n" + + " -121.9990325 37.4995294, \n" + + " -122.0001268 37.4993506, \n" + + " -122.0008564 37.5004316))'), %s) = 1"); + + // negative test + testQueryStContain("SELECT COUNT(*) FROM testTable WHERE ST_Contains(ST_GeomFromText('POLYGON ((\n" + + " -122.0008564 37.5004316, \n" + + " -121.9991291 37.5005168, \n" + + " -121.9990325 37.4995294, \n" + + " -122.0001268 37.4993506, \n" + + " -122.0008564 37.5004316))'), %s) = 0"); + } + { + // Test st contains in polygon, doesn't have + String query = "SELECT COUNT(*) FROM testTable WHERE ST_Contains(ST_GeomFromText('POLYGON ((\n" + + " 122.0008564 -37.5004316, \n" + + " 121.9991291 -37.5005168, \n" + + " 121.9990325 -37.4995294, \n" + + " 122.0001268 -37.4993506, \n" + + " 122.0008564 -37.5004316))'), h3Column_geometry) = 1"; + AggregationOperator aggregationOperator = getOperator(query); + IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock(); + // Expect 0 entries scanned in filter + QueriesTestUtils + .testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 0, 0, 0, + NUM_RECORDS); + List<Object> aggregationResult = resultsBlock.getAggregationResult(); + Assert.assertNotNull(aggregationResult); + Assert.assertEquals((long) aggregationResult.get(0), 0); + } + + { + // Test st within in polygon + testQueryStContain("SELECT COUNT(*) FROM testTable WHERE ST_Within(%s, ST_GeomFromText('POLYGON ((\n" + + " -122.0008564 37.5004316, \n" + + " -121.9991291 37.5005168, \n" + + " -121.9990325 37.4995294, \n" + + " -122.0001268 37.4993506, \n" + + " -122.0008564 37.5004316))')) = 1"); + + // negative test + testQueryStContain("SELECT COUNT(*) FROM testTable WHERE ST_Within(%s, ST_GeomFromText('POLYGON ((\n" + + " -122.0008564 37.5004316, \n" + + " -121.9991291 37.5005168, \n" + + " -121.9990325 37.4995294, \n" + + " -122.0001268 37.4993506, \n" + + " -122.0008564 37.5004316))')) = 0"); + } + { + // Test st within in polygon, doesn't have + String query = "SELECT COUNT(*) FROM testTable WHERE ST_Within(h3Column_geometry, ST_GeomFromText('POLYGON ((\n" + + " 122.0008564 -37.5004316, \n" + + " 121.9991291 -37.5005168, \n" + + " 121.9990325 -37.4995294, \n" + + " 122.0001268 -37.4993506, \n" + + " 122.0008564 -37.5004316))')) = 1"; + AggregationOperator aggregationOperator = getOperator(query); + IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock(); + // Expect 0 entries scanned in filter + QueriesTestUtils + .testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 0, 0, 0, + NUM_RECORDS); + List<Object> aggregationResult = resultsBlock.getAggregationResult(); + Assert.assertNotNull(aggregationResult); + Assert.assertEquals((long) aggregationResult.get(0), 0); + } + } + + @Test + public void stContainPointVeryCloseToBorderTest() + throws Exception { + List<GenericRow> records = new ArrayList<>(1); + addRecord(records, -122.0008081, 37.5004231); + setUp(records); + // Test point is closed to border of a polygon but inside. + String query = "SELECT COUNT(*) FROM testTable WHERE ST_Contains(ST_GeomFromText('POLYGON ((\n" + + " -122.0008564 37.5004316, \n" + + " -121.9991291 37.5005168, \n" + + " -121.9990325 37.4995294, \n" + + " -122.0001268 37.4993506, \n" + + " -122.0008564 37.5004316))'), h3Column_geometry) = 1"; + AggregationOperator aggregationOperator = getOperator(query); + IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock(); + QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 1, 1, 0, 1); + List<Object> aggregationResult = resultsBlock.getAggregationResult(); + Assert.assertNotNull(aggregationResult); + Assert.assertEquals((long) aggregationResult.get(0), 1); + } + + @Test + public void stWithinPointVeryCloseToBorderTest() + throws Exception { + List<GenericRow> records = new ArrayList<>(1); + addRecord(records, -122.0008081, 37.5004231); + setUp(records); + // Test point is closed to border of a polygon but inside. + String query = "SELECT COUNT(*) FROM testTable WHERE ST_Within(h3Column_geometry, ST_GeomFromText('POLYGON ((\n" + + " -122.0008564 37.5004316, \n" + + " -121.9991291 37.5005168, \n" + + " -121.9990325 37.4995294, \n" + + " -122.0001268 37.4993506, \n" + + " -122.0008564 37.5004316))')) = 1"; + AggregationOperator aggregationOperator = getOperator(query); + IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock(); + QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 1, 1, 0, 1); + List<Object> aggregationResult = resultsBlock.getAggregationResult(); + Assert.assertNotNull(aggregationResult); + Assert.assertEquals((long) aggregationResult.get(0), 1); + } + + @Test + public void stContainPointVeryCloseToBorderButOutsideTest() + throws Exception { + List<GenericRow> records = new ArrayList<>(1); + addRecord(records, -122.0007277, 37.5005785); + setUp(records); + // Test point is closed to border of a polygon but outside. + String query = "SELECT COUNT(*) FROM testTable WHERE ST_Contains(ST_GeomFromText('POLYGON ((\n" + + " -122.0008564 37.5004316, \n" + + " -121.9991291 37.5005168, \n" + + " -121.9990325 37.4995294, \n" + + " -122.0001268 37.4993506, \n" + + " -122.0008564 37.5004316))'), h3Column_geometry) = 1"; + AggregationOperator aggregationOperator = getOperator(query); + IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock(); + QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 0, 1, 0, 1); + List<Object> aggregationResult = resultsBlock.getAggregationResult(); + Assert.assertNotNull(aggregationResult); + Assert.assertEquals((long) aggregationResult.get(0), 0); + } + + @Test + public void stWithinPointVeryCloseToBorderButOutsideTest() + throws Exception { + List<GenericRow> records = new ArrayList<>(1); + addRecord(records, -122.0007277, 37.5005785); + setUp(records); + // Test point is closed to border of a polygon but outside. + String query = "SELECT COUNT(*) FROM testTable WHERE ST_Within(h3Column_geometry, ST_GeomFromText('POLYGON ((\n" + + " -122.0008564 37.5004316, \n" + + " -121.9991291 37.5005168, \n" + + " -121.9990325 37.4995294, \n" + + " -122.0001268 37.4993506, \n" + + " -122.0008564 37.5004316))')) = 1"; + AggregationOperator aggregationOperator = getOperator(query); + IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock(); + QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 0, 1, 0, 1); + List<Object> aggregationResult = resultsBlock.getAggregationResult(); + Assert.assertNotNull(aggregationResult); + Assert.assertEquals((long) aggregationResult.get(0), 0); } private void testQuery(String queryTemplate) { String h3IndexQuery = String.format(queryTemplate, H3_INDEX_COLUMN); String nonH3IndexQuery = String.format(queryTemplate, NON_H3_INDEX_COLUMN); + validateQueryResult(h3IndexQuery, nonH3IndexQuery); + } + + private void testQueryStContain(String queryTemplate) { + String h3IndexQuery = String.format(queryTemplate, H3_INDEX_GEOMETRY_COLUMN); + String nonH3IndexQuery = String.format(queryTemplate, NON_H3_INDEX_GEOMETRY_COLUMN); + validateQueryResult(h3IndexQuery, nonH3IndexQuery); + } + + private void validateQueryResult(String h3IndexQuery, String nonH3IndexQuery) { AggregationOperator h3IndexOperator = getOperator(h3IndexQuery); AggregationOperator nonH3IndexOperator = getOperator(nonH3IndexQuery); IntermediateResultsBlock h3IndexResultsBlock = h3IndexOperator.nextBlock(); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java index 165304e0a2..e971d07b8d 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java @@ -19,10 +19,28 @@ package org.apache.pinot.segment.local.utils; import com.uber.h3core.H3Core; +import com.uber.h3core.exceptions.LineUndefinedException; +import com.uber.h3core.util.GeoCoord; +import it.unimi.dsi.fastutil.longs.LongArrayList; +import it.unimi.dsi.fastutil.longs.LongList; +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import it.unimi.dsi.fastutil.longs.LongSet; import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Pair; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.GeometryCollection; +import org.locationtech.jts.geom.LineString; +import org.locationtech.jts.geom.Point; +import org.locationtech.jts.geom.Polygon; public class H3Utils { + private H3Utils() { } @@ -35,4 +53,78 @@ public class H3Utils { throw new RuntimeException("Failed to instantiate H3 instance", e); } } + + private static LongSet coverLineInH3(LineString lineString, int resolution) { + LongSet coveringH3Cells = new LongOpenHashSet(); + LongList endpointH3Cells = new LongArrayList(); + for (Coordinate endpoint : lineString.getCoordinates()) { + endpointH3Cells.add(H3_CORE.geoToH3(endpoint.y, endpoint.x, resolution)); + } + for (int i = 0; i < endpointH3Cells.size() - 1; i++) { + try { + coveringH3Cells.addAll(H3_CORE.h3Line(endpointH3Cells.getLong(i), endpointH3Cells.getLong(i + 1))); + } catch (LineUndefinedException e) { + throw new RuntimeException(e); + } + } + return coveringH3Cells; + } + + private static Pair<LongSet, LongSet> coverPolygonInH3(Polygon polygon, int resolution) { + List<Long> polyfillCells = H3_CORE.polyfill(Arrays.stream(polygon.getExteriorRing().getCoordinates()) + .map(coordinate -> new GeoCoord(coordinate.y, coordinate.x)).collect(Collectors.toList()), + Collections.emptyList(), resolution); + // TODO: this can be further optimized to use native H3 implementation. They have plan to support natively. + // https://github.com/apache/pinot/issues/8547 + LongSet potentialH3Cells = new LongOpenHashSet(); + if (polyfillCells.isEmpty()) { + // If the polyfill cells are empty, meaning the polygon might be smaller than a single cell in the H3 system. + // So just get whatever one. here choose the first one. the follow up kRing(firstCell, 1) will cover the whole + // polygon if there is potential not covered by the first point's belonging cell. + // ref: https://github.com/uber/h3/issues/456#issuecomment-827760163 + Coordinate represent = polygon.getCoordinate(); + potentialH3Cells.add(H3_CORE.geoToH3(represent.getY(), represent.getX(), resolution)); + } else { + potentialH3Cells.addAll(polyfillCells); + } + potentialH3Cells + .addAll(potentialH3Cells.stream().flatMap(cell -> H3_CORE.kRing(cell, 1).stream()).collect(Collectors.toSet())); + LongSet fullyContainedCell = new LongOpenHashSet( + potentialH3Cells.stream().filter(h3Cell -> polygon.contains(createPolygonFromH3Cell(h3Cell))) + .collect(Collectors.toSet())); + return Pair.of(fullyContainedCell, potentialH3Cells); + } + + private static Polygon createPolygonFromH3Cell(long h3Cell) { + List<GeoCoord> boundary = H3_CORE.h3ToGeoBoundary(h3Cell); + boundary.add(boundary.get(0)); + return GeometryUtils.GEOMETRY_FACTORY.createPolygon( + boundary.stream().map(geoCoord -> new Coordinate(geoCoord.lng, geoCoord.lat)).toArray(Coordinate[]::new)); + } + + // Return a pair of cell ids: The first fully contain, the second is potential contain. + // potential contains contain the fully contain. + public static Pair<LongSet, LongSet> coverGeometryInH3(Geometry geometry, int resolution) { + if (geometry instanceof Point) { + LongSet potentialCover = new LongOpenHashSet(); + potentialCover.add(H3_CORE.geoToH3(geometry.getCoordinate().y, geometry.getCoordinate().x, resolution)); + return Pair.of(new LongOpenHashSet(), potentialCover); + } else if (geometry instanceof LineString) { + LongSet potentialCover = new LongOpenHashSet(); + potentialCover.addAll(coverLineInH3(((LineString) geometry), resolution)); + return Pair.of(new LongOpenHashSet(), potentialCover); + } else if (geometry instanceof Polygon) { + return coverPolygonInH3(((Polygon) geometry), resolution); + } else if (geometry instanceof GeometryCollection) { + LongOpenHashSet fullCover = new LongOpenHashSet(); + LongOpenHashSet potentialCover = new LongOpenHashSet(); + for (int i = 0; i < geometry.getNumGeometries(); i++) { + fullCover.addAll(coverGeometryInH3(geometry.getGeometryN(i), resolution).getLeft()); + potentialCover.addAll(coverGeometryInH3(geometry.getGeometryN(i), resolution).getRight()); + } + return Pair.of(fullCover, potentialCover); + } else { + throw new UnsupportedOperationException("Unexpected type: " + geometry.getGeometryType()); + } + } } diff --git a/pom.xml b/pom.xml index 647dd5f049..81440daec1 100644 --- a/pom.xml +++ b/pom.xml @@ -151,7 +151,7 @@ <netty.version>4.1.74.Final</netty.version> <reactivestreams.version>1.0.3</reactivestreams.version> <jts.version>1.16.1</jts.version> - <h3.version>3.0.3</h3.version> + <h3.version>3.7.0</h3.version> <jmh.version>1.26</jmh.version> <audienceannotations.version>0.13.0</audienceannotations.version> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org