This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 498387a323 Support st_contains using H3 index (#8498)
498387a323 is described below

commit 498387a3239d611539aad1badbbabb5d5f66b75c
Author: WangCHX <wcxz...@gmail.com>
AuthorDate: Sat Apr 30 09:06:12 2022 +0800

    Support st_contains using H3 index (#8498)
    
    The idea is to convert input geometry into a list of h3 cells by using 
polyfill. But h3 polyfill only fills with the hexagons whose centers are 
contained by the geometry.
    so creating a method coverGeometryInH3 to return the set of H3 cells at the 
specified resolution which completely cover the input shape.
---
 .../filter/H3InclusionIndexFilterOperator.java     | 152 ++++++++++++++
 .../org/apache/pinot/core/plan/FilterPlanNode.java |  59 +++++-
 .../apache/pinot/queries/H3IndexQueriesTest.java   | 226 +++++++++++++++++++--
 .../apache/pinot/segment/local/utils/H3Utils.java  |  92 +++++++++
 pom.xml                                            |   2 +-
 5 files changed, 501 insertions(+), 30 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java
new file mode 100644
index 0000000000..c64d511697
--- /dev/null
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import it.unimi.dsi.fastutil.longs.LongIterator;
+import it.unimi.dsi.fastutil.longs.LongSet;
+import java.util.Collections;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.predicate.EqPredicate;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.blocks.FilterBlock;
+import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
+import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
+import org.apache.pinot.segment.local.utils.GeometrySerializer;
+import org.apache.pinot.segment.local.utils.H3Utils;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.index.reader.H3IndexReader;
+import org.apache.pinot.spi.utils.BooleanUtils;
+import org.apache.pinot.spi.utils.BytesUtils;
+import org.locationtech.jts.geom.Geometry;
+import org.roaringbitmap.buffer.BufferFastAggregation;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * A filter operator that uses H3 index for geospatial data inclusion
+ */
+public class H3InclusionIndexFilterOperator extends BaseFilterOperator {
+
+  private static final String EXPLAIN_NAME = "INCLUSION_FILTER_H3_INDEX";
+
+  private final IndexSegment _segment;
+  private final Predicate _predicate;
+  private final int _numDocs;
+  private final H3IndexReader _h3IndexReader;
+  private final Geometry _geometry;
+  private final boolean _isPositiveCheck;
+
+  public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate 
predicate, int numDocs) {
+    _segment = segment;
+    _predicate = predicate;
+    _numDocs = numDocs;
+
+    List<ExpressionContext> arguments = 
predicate.getLhs().getFunction().getArguments();
+    EqPredicate eqPredicate = (EqPredicate) predicate;
+    _isPositiveCheck = BooleanUtils.toBoolean(eqPredicate.getValue());
+
+    if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER) {
+      _h3IndexReader = 
segment.getDataSource(arguments.get(0).getIdentifier()).getH3Index();
+      _geometry = 
GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(1).getLiteral()));
+    } else {
+      _h3IndexReader = 
segment.getDataSource(arguments.get(1).getIdentifier()).getH3Index();
+      _geometry = 
GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(0).getLiteral()));
+    }
+    // must be some h3 index
+    assert _h3IndexReader != null : "the column must have H3 index setup.";
+  }
+
+  @Override
+  protected FilterBlock getNextBlock() {
+    // get the set of H3 cells at the specified resolution which completely 
cover the input shape and potential cover.
+    Pair<LongSet, LongSet> fullCoverAndPotentialCoverCells =
+        H3Utils.coverGeometryInH3(_geometry, 
_h3IndexReader.getH3IndexResolution().getLowestResolution());
+    LongSet fullyCoverH3Cells = fullCoverAndPotentialCoverCells.getLeft();
+    LongSet potentialCoverH3Cells = fullCoverAndPotentialCoverCells.getRight();
+
+    // have list of h3 cell ids for polygon provided
+    // return filtered num_docs
+    ImmutableRoaringBitmap[] potentialMatchDocIds = new 
ImmutableRoaringBitmap[potentialCoverH3Cells.size()];
+    int i = 0;
+    LongIterator potentialCoverH3CellsIterator = 
potentialCoverH3Cells.iterator();
+    while (potentialCoverH3CellsIterator.hasNext()) {
+      potentialMatchDocIds[i++] = 
_h3IndexReader.getDocIds(potentialCoverH3CellsIterator.nextLong());
+    }
+    MutableRoaringBitmap potentialMatchMutableRoaringBitmap = 
BufferFastAggregation.or(potentialMatchDocIds);
+    if (_isPositiveCheck) {
+      ImmutableRoaringBitmap[] fullMatchDocIds = new 
ImmutableRoaringBitmap[fullyCoverH3Cells.size()];
+      i = 0;
+      LongIterator fullyCoverH3CellsIterator = fullyCoverH3Cells.iterator();
+      while (fullyCoverH3CellsIterator.hasNext()) {
+        fullMatchDocIds[i++] = 
_h3IndexReader.getDocIds(fullyCoverH3CellsIterator.nextLong());
+      }
+      MutableRoaringBitmap fullMatchMutableRoaringBitmap = 
BufferFastAggregation.or(fullMatchDocIds);
+      return getFilterBlock(fullMatchMutableRoaringBitmap, 
potentialMatchMutableRoaringBitmap);
+    } else {
+      i = 0;
+      // remove full match from potential match to get potential not match 
cells.
+      potentialCoverH3Cells.removeAll(fullyCoverH3Cells);
+      ImmutableRoaringBitmap[] potentialNotMatchMutableRoaringBitmap =
+          new ImmutableRoaringBitmap[potentialCoverH3Cells.size()];
+      LongIterator potentialNotMatchH3CellsIterator = 
potentialCoverH3Cells.iterator();
+      while (potentialNotMatchH3CellsIterator.hasNext()) {
+        potentialNotMatchMutableRoaringBitmap[i++] =
+            
_h3IndexReader.getDocIds(potentialNotMatchH3CellsIterator.nextLong());
+      }
+      MutableRoaringBitmap potentialNotMatch = 
BufferFastAggregation.or(potentialNotMatchMutableRoaringBitmap);
+      // flip potential match bit map to get exactly not match bitmap.
+      potentialMatchMutableRoaringBitmap.flip(0L, _numDocs);
+      return getFilterBlock(potentialMatchMutableRoaringBitmap, 
potentialNotMatch);
+    }
+  }
+
+  /**
+   * Returns the filter block based on the given the partial match doc ids.
+   */
+  private FilterBlock getFilterBlock(MutableRoaringBitmap fullMatchDocIds, 
MutableRoaringBitmap partialMatchDocIds) {
+    ExpressionFilterOperator expressionFilterOperator = new 
ExpressionFilterOperator(_segment, _predicate, _numDocs);
+    ScanBasedDocIdIterator docIdIterator =
+        (ScanBasedDocIdIterator) 
expressionFilterOperator.getNextBlock().getBlockDocIdSet().iterator();
+    MutableRoaringBitmap result = docIdIterator.applyAnd(partialMatchDocIds);
+    result.or(fullMatchDocIds);
+    return new FilterBlock(new BitmapDocIdSet(result, _numDocs) {
+      @Override
+      public long getNumEntriesScannedInFilter() {
+        return docIdIterator.getNumEntriesScanned();
+      }
+    });
+  }
+
+  @Override
+  public List<Operator> getChildOperators() {
+    return Collections.emptyList();
+  }
+
+  @Override
+  public String toExplainString() {
+    StringBuilder stringBuilder = new 
StringBuilder(EXPLAIN_NAME).append("(inclusionIndex:h3_index");
+    stringBuilder.append(",operator:").append(_predicate.getType());
+    stringBuilder.append(",predicate:").append(_predicate.toString());
+    return stringBuilder.append(')').toString();
+  }
+}
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java 
b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
index c43c5b5912..716810e62f 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
@@ -38,6 +38,7 @@ import 
org.apache.pinot.core.operator.filter.BitmapBasedFilterOperator;
 import org.apache.pinot.core.operator.filter.EmptyFilterOperator;
 import org.apache.pinot.core.operator.filter.ExpressionFilterOperator;
 import org.apache.pinot.core.operator.filter.FilterOperatorUtils;
+import org.apache.pinot.core.operator.filter.H3InclusionIndexFilterOperator;
 import org.apache.pinot.core.operator.filter.H3IndexFilterOperator;
 import org.apache.pinot.core.operator.filter.JsonMatchFilterOperator;
 import org.apache.pinot.core.operator.filter.MatchAllFilterOperator;
@@ -56,6 +57,7 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 
 public class FilterPlanNode implements PlanNode {
+
   private final IndexSegment _indexSegment;
   private final QueryContext _queryContext;
   private final FilterContext _filter;
@@ -106,7 +108,7 @@ public class FilterPlanNode implements PlanNode {
   }
 
   /**
-   * H3 index can be applied iff:
+   * H3 index can be applied on ST_Distance iff:
    * <ul>
    *   <li>Predicate is of type RANGE</li>
    *   <li>Left-hand-side of the predicate is an ST_Distance function</li>
@@ -114,7 +116,7 @@ public class FilterPlanNode implements PlanNode {
    *   <li>The identifier column has H3 index</li>
    * </ul>
    */
-  private boolean canApplyH3Index(Predicate predicate, FunctionContext 
function) {
+  private boolean canApplyH3IndexForDistanceCheck(Predicate predicate, 
FunctionContext function) {
     if (predicate.getType() != Predicate.Type.RANGE) {
       return false;
     }
@@ -139,6 +141,46 @@ public class FilterPlanNode implements PlanNode {
     return columnName != null && 
_indexSegment.getDataSource(columnName).getH3Index() != null && findLiteral;
   }
 
+  /**
+   * H3 index can be applied for inclusion check iff:
+   * <ul>
+   *   <li>Predicate is of type EQ</li>
+   *   <li>Left-hand-side of the predicate is an ST_Within or ST_Contains 
function</li>
+   *   <li>For ST_Within, the first argument is an identifier, the second 
argument is literal</li>
+   *   <li>For ST_Contains function the first argument is literal, the second 
argument is an identifier</li>
+   *   <li>The identifier column has H3 index</li>
+   * </ul>
+   */
+  private boolean canApplyH3IndexForInclusionCheck(Predicate predicate, 
FunctionContext function) {
+    if (predicate.getType() != Predicate.Type.EQ) {
+      return false;
+    }
+    String functionName = function.getFunctionName();
+    if (!functionName.equals("stwithin") && 
!functionName.equals("stcontains")) {
+      return false;
+    }
+    List<ExpressionContext> arguments = function.getArguments();
+    if (arguments.size() != 2) {
+      throw new BadQueryRequestException("Expect 2 arguments for function: " + 
functionName);
+    }
+    // TODO: handle nested geography/geometry conversion functions
+    if (functionName.equals("stwithin")) {
+      if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER
+          && arguments.get(1).getType() == ExpressionContext.Type.LITERAL) {
+        String columnName = arguments.get(0).getIdentifier();
+        return _indexSegment.getDataSource(columnName).getH3Index() != null;
+      }
+      return false;
+    } else {
+      if (arguments.get(1).getType() == ExpressionContext.Type.IDENTIFIER
+          && arguments.get(0).getType() == ExpressionContext.Type.LITERAL) {
+        String columnName = arguments.get(1).getIdentifier();
+        return _indexSegment.getDataSource(columnName).getH3Index() != null;
+      }
+      return false;
+    }
+  }
+
   /**
    * Helper method to build the operator tree from the filter.
    */
@@ -181,12 +223,15 @@ public class FilterPlanNode implements PlanNode {
         Predicate predicate = filter.getPredicate();
         ExpressionContext lhs = predicate.getLhs();
         if (lhs.getType() == ExpressionContext.Type.FUNCTION) {
-          if (canApplyH3Index(predicate, lhs.getFunction())) {
-            return new H3IndexFilterOperator(_indexSegment, predicate, 
numDocs);
+          if (canApplyH3IndexForDistanceCheck(predicate, lhs.getFunction())) {
+              return new H3IndexFilterOperator(_indexSegment, predicate, 
numDocs);
+          } else if (canApplyH3IndexForInclusionCheck(predicate, 
lhs.getFunction())) {
+            return new H3InclusionIndexFilterOperator(_indexSegment, 
predicate, numDocs);
+          } else {
+            // TODO: ExpressionFilterOperator does not support predicate types 
without PredicateEvaluator (IS_NULL,
+            //       IS_NOT_NULL, TEXT_MATCH)
+            return new ExpressionFilterOperator(_indexSegment, predicate, 
numDocs);
           }
-          // TODO: ExpressionFilterOperator does not support predicate types 
without PredicateEvaluator (IS_NULL,
-          //       IS_NOT_NULL, TEXT_MATCH)
-          return new ExpressionFilterOperator(_indexSegment, predicate, 
numDocs);
         } else {
           String column = lhs.getIdentifier();
           DataSource dataSource = _indexSegment.getDataSource(column);
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java 
b/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java
index 3d39b31495..907f6b9ec6 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java
@@ -18,6 +18,8 @@
  */
 package org.apache.pinot.queries;
 
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -48,7 +50,6 @@ import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
 import org.locationtech.jts.geom.Coordinate;
 import org.testng.Assert;
 import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 
@@ -64,15 +65,21 @@ public class H3IndexQueriesTest extends BaseQueriesTest {
   private static final int NUM_RECORDS = 10000;
 
   private static final String H3_INDEX_COLUMN = "h3Column";
+  private static final String H3_INDEX_GEOMETRY_COLUMN = "h3Column_geometry";
   private static final String NON_H3_INDEX_COLUMN = "nonH3Column";
+  private static final String NON_H3_INDEX_GEOMETRY_COLUMN = 
"nonH3Column_geometry";
   private static final Schema SCHEMA =
       new 
Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME).addSingleValueDimension(H3_INDEX_COLUMN,
 DataType.BYTES)
-          .addSingleValueDimension(NON_H3_INDEX_COLUMN, 
DataType.BYTES).build();
+          .addSingleValueDimension(NON_H3_INDEX_COLUMN, DataType.BYTES)
+          .addSingleValueDimension(H3_INDEX_GEOMETRY_COLUMN, DataType.BYTES)
+          .addSingleValueDimension(NON_H3_INDEX_GEOMETRY_COLUMN, 
DataType.BYTES).build();
   private static final Map<String, String> H3_INDEX_PROPERTIES = 
Collections.singletonMap("resolutions", "5");
   private static final TableConfig TABLE_CONFIG = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME)
-      .setFieldConfigList(Collections.singletonList(
-          new FieldConfig(H3_INDEX_COLUMN, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3, null,
-              H3_INDEX_PROPERTIES))).build();
+      .setFieldConfigList(ImmutableList
+          .of(new FieldConfig(H3_INDEX_COLUMN, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3, null,
+                  H3_INDEX_PROPERTIES),
+              new FieldConfig(H3_INDEX_GEOMETRY_COLUMN, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3,
+                  null, H3_INDEX_PROPERTIES))).build();
 
   private IndexSegment _indexSegment;
 
@@ -91,23 +98,10 @@ public class H3IndexQueriesTest extends BaseQueriesTest {
     throw new UnsupportedOperationException();
   }
 
-  @BeforeClass
-  public void setUp()
+  public void setUp(List<GenericRow> records)
       throws Exception {
     FileUtils.deleteDirectory(INDEX_DIR);
 
-    List<GenericRow> records = new ArrayList<>(NUM_RECORDS);
-    for (int i = 0; i < NUM_RECORDS; i++) {
-      double longitude = -122.5 + RANDOM.nextDouble();
-      double latitude = 37 + RANDOM.nextDouble();
-      byte[] value = GeometrySerializer
-          .serialize(GeometryUtils.GEOGRAPHY_FACTORY.createPoint(new 
Coordinate(longitude, latitude)));
-      GenericRow record = new GenericRow();
-      record.putValue(H3_INDEX_COLUMN, value);
-      record.putValue(NON_H3_INDEX_COLUMN, value);
-      records.add(record);
-    }
-
     SegmentGeneratorConfig segmentGeneratorConfig = new 
SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA);
     segmentGeneratorConfig.setTableName(RAW_TABLE_NAME);
     segmentGeneratorConfig.setSegmentName(SEGMENT_NAME);
@@ -118,14 +112,36 @@ public class H3IndexQueriesTest extends BaseQueriesTest {
     driver.build();
 
     IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
-    indexLoadingConfig
-        .setH3IndexConfigs(Collections.singletonMap(H3_INDEX_COLUMN, new 
H3IndexConfig(H3_INDEX_PROPERTIES)));
+    indexLoadingConfig.setH3IndexConfigs(ImmutableMap
+        .of(H3_INDEX_COLUMN, new H3IndexConfig(H3_INDEX_PROPERTIES), 
H3_INDEX_GEOMETRY_COLUMN,
+            new H3IndexConfig(H3_INDEX_PROPERTIES)));
     _indexSegment = ImmutableSegmentLoader.load(new File(INDEX_DIR, 
SEGMENT_NAME), indexLoadingConfig);
   }
 
+  private void addRecord(List<GenericRow> records, double longitude, double 
latitude) {
+    byte[] value =
+        
GeometrySerializer.serialize(GeometryUtils.GEOGRAPHY_FACTORY.createPoint(new 
Coordinate(longitude, latitude)));
+    byte[] geometryValue =
+        
GeometrySerializer.serialize(GeometryUtils.GEOMETRY_FACTORY.createPoint(new 
Coordinate(longitude, latitude)));
+    GenericRow record = new GenericRow();
+    record.putValue(H3_INDEX_COLUMN, value);
+    record.putValue(NON_H3_INDEX_COLUMN, value);
+    record.putValue(H3_INDEX_GEOMETRY_COLUMN, geometryValue);
+    record.putValue(NON_H3_INDEX_GEOMETRY_COLUMN, geometryValue);
+    records.add(record);
+  }
+
   @Test
   public void testH3Index()
-      throws IOException {
+      throws Exception {
+    List<GenericRow> records = new ArrayList<>(NUM_RECORDS);
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      double longitude = -122.5 + RANDOM.nextDouble();
+      double latitude = 37 + RANDOM.nextDouble();
+      addRecord(records, longitude, latitude);
+    }
+    setUp(records);
+
     // Invalid upper bound
     {
       for (String query : Arrays
@@ -210,11 +226,177 @@ public class H3IndexQueriesTest extends BaseQueriesTest {
       Assert.assertNotNull(aggregationResult);
       Assert.assertEquals((long) aggregationResult.get(0), NUM_RECORDS);
     }
+
+    {
+      // Test st contains in polygon
+      testQueryStContain("SELECT COUNT(*) FROM testTable WHERE 
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+          + "             -122.0008564 37.5004316, \n"
+          + "             -121.9991291 37.5005168, \n"
+          + "             -121.9990325 37.4995294, \n"
+          + "             -122.0001268 37.4993506,  \n"
+          + "             -122.0008564 37.5004316))'), %s) = 1");
+
+      // negative test
+      testQueryStContain("SELECT COUNT(*) FROM testTable WHERE 
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+          + "             -122.0008564 37.5004316, \n"
+          + "             -121.9991291 37.5005168, \n"
+          + "             -121.9990325 37.4995294, \n"
+          + "             -122.0001268 37.4993506,  \n"
+          + "             -122.0008564 37.5004316))'), %s) = 0");
+    }
+    {
+      // Test st contains in polygon, doesn't have
+      String query = "SELECT COUNT(*) FROM testTable WHERE 
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+          + "             122.0008564 -37.5004316, \n"
+          + "             121.9991291 -37.5005168, \n"
+          + "             121.9990325 -37.4995294, \n"
+          + "             122.0001268 -37.4993506,  \n"
+          + "             122.0008564 -37.5004316))'), h3Column_geometry) = 1";
+      AggregationOperator aggregationOperator = getOperator(query);
+      IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+      // Expect 0 entries scanned in filter
+      QueriesTestUtils
+          
.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
 0, 0, 0,
+              NUM_RECORDS);
+      List<Object> aggregationResult = resultsBlock.getAggregationResult();
+      Assert.assertNotNull(aggregationResult);
+      Assert.assertEquals((long) aggregationResult.get(0), 0);
+    }
+
+    {
+      // Test st within in polygon
+      testQueryStContain("SELECT COUNT(*) FROM testTable WHERE ST_Within(%s, 
ST_GeomFromText('POLYGON ((\n"
+          + "             -122.0008564 37.5004316, \n"
+          + "             -121.9991291 37.5005168, \n"
+          + "             -121.9990325 37.4995294, \n"
+          + "             -122.0001268 37.4993506,  \n"
+          + "             -122.0008564 37.5004316))')) = 1");
+
+      // negative test
+      testQueryStContain("SELECT COUNT(*) FROM testTable WHERE ST_Within(%s, 
ST_GeomFromText('POLYGON ((\n"
+          + "             -122.0008564 37.5004316, \n"
+          + "             -121.9991291 37.5005168, \n"
+          + "             -121.9990325 37.4995294, \n"
+          + "             -122.0001268 37.4993506,  \n"
+          + "             -122.0008564 37.5004316))')) = 0");
+    }
+    {
+      // Test st within in polygon, doesn't have
+      String query = "SELECT COUNT(*) FROM testTable WHERE 
ST_Within(h3Column_geometry, ST_GeomFromText('POLYGON ((\n"
+          + "             122.0008564 -37.5004316, \n"
+          + "             121.9991291 -37.5005168, \n"
+          + "             121.9990325 -37.4995294, \n"
+          + "             122.0001268 -37.4993506,  \n"
+          + "             122.0008564 -37.5004316))')) = 1";
+      AggregationOperator aggregationOperator = getOperator(query);
+      IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+      // Expect 0 entries scanned in filter
+      QueriesTestUtils
+          
.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
 0, 0, 0,
+              NUM_RECORDS);
+      List<Object> aggregationResult = resultsBlock.getAggregationResult();
+      Assert.assertNotNull(aggregationResult);
+      Assert.assertEquals((long) aggregationResult.get(0), 0);
+    }
+  }
+
+  @Test
+  public void stContainPointVeryCloseToBorderTest()
+      throws Exception {
+    List<GenericRow> records = new ArrayList<>(1);
+    addRecord(records, -122.0008081, 37.5004231);
+    setUp(records);
+    // Test point is closed to border of a polygon but inside.
+    String query = "SELECT COUNT(*) FROM testTable WHERE 
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+        + "             -122.0008564 37.5004316, \n"
+        + "             -121.9991291 37.5005168, \n"
+        + "             -121.9990325 37.4995294, \n"
+        + "             -122.0001268 37.4993506,  \n"
+        + "             -122.0008564 37.5004316))'), h3Column_geometry) = 1";
+    AggregationOperator aggregationOperator = getOperator(query);
+    IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+    
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
 1, 1, 0, 1);
+    List<Object> aggregationResult = resultsBlock.getAggregationResult();
+    Assert.assertNotNull(aggregationResult);
+    Assert.assertEquals((long) aggregationResult.get(0), 1);
+  }
+
+  @Test
+  public void stWithinPointVeryCloseToBorderTest()
+      throws Exception {
+    List<GenericRow> records = new ArrayList<>(1);
+    addRecord(records, -122.0008081, 37.5004231);
+    setUp(records);
+    // Test point is closed to border of a polygon but inside.
+    String query = "SELECT COUNT(*) FROM testTable WHERE 
ST_Within(h3Column_geometry, ST_GeomFromText('POLYGON ((\n"
+        + "             -122.0008564 37.5004316, \n"
+        + "             -121.9991291 37.5005168, \n"
+        + "             -121.9990325 37.4995294, \n"
+        + "             -122.0001268 37.4993506,  \n"
+        + "             -122.0008564 37.5004316))')) = 1";
+    AggregationOperator aggregationOperator = getOperator(query);
+    IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+    
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
 1, 1, 0, 1);
+    List<Object> aggregationResult = resultsBlock.getAggregationResult();
+    Assert.assertNotNull(aggregationResult);
+    Assert.assertEquals((long) aggregationResult.get(0), 1);
+  }
+
+  @Test
+  public void stContainPointVeryCloseToBorderButOutsideTest()
+      throws Exception {
+    List<GenericRow> records = new ArrayList<>(1);
+    addRecord(records, -122.0007277, 37.5005785);
+    setUp(records);
+    // Test point is closed to border of a polygon but outside.
+    String query = "SELECT COUNT(*) FROM testTable WHERE 
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+        + "             -122.0008564 37.5004316, \n"
+        + "             -121.9991291 37.5005168, \n"
+        + "             -121.9990325 37.4995294, \n"
+        + "             -122.0001268 37.4993506,  \n"
+        + "             -122.0008564 37.5004316))'), h3Column_geometry) = 1";
+    AggregationOperator aggregationOperator = getOperator(query);
+    IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+    
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
 0, 1, 0, 1);
+    List<Object> aggregationResult = resultsBlock.getAggregationResult();
+    Assert.assertNotNull(aggregationResult);
+    Assert.assertEquals((long) aggregationResult.get(0), 0);
+  }
+
+  @Test
+  public void stWithinPointVeryCloseToBorderButOutsideTest()
+      throws Exception {
+    List<GenericRow> records = new ArrayList<>(1);
+    addRecord(records, -122.0007277, 37.5005785);
+    setUp(records);
+    // Test point is closed to border of a polygon but outside.
+    String query = "SELECT COUNT(*) FROM testTable WHERE 
ST_Within(h3Column_geometry, ST_GeomFromText('POLYGON ((\n"
+        + "             -122.0008564 37.5004316, \n"
+        + "             -121.9991291 37.5005168, \n"
+        + "             -121.9990325 37.4995294, \n"
+        + "             -122.0001268 37.4993506,  \n"
+        + "             -122.0008564 37.5004316))')) = 1";
+    AggregationOperator aggregationOperator = getOperator(query);
+    IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+    
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
 0, 1, 0, 1);
+    List<Object> aggregationResult = resultsBlock.getAggregationResult();
+    Assert.assertNotNull(aggregationResult);
+    Assert.assertEquals((long) aggregationResult.get(0), 0);
   }
 
   private void testQuery(String queryTemplate) {
     String h3IndexQuery = String.format(queryTemplate, H3_INDEX_COLUMN);
     String nonH3IndexQuery = String.format(queryTemplate, NON_H3_INDEX_COLUMN);
+    validateQueryResult(h3IndexQuery, nonH3IndexQuery);
+  }
+
+  private void testQueryStContain(String queryTemplate) {
+    String h3IndexQuery = String.format(queryTemplate, 
H3_INDEX_GEOMETRY_COLUMN);
+    String nonH3IndexQuery = String.format(queryTemplate, 
NON_H3_INDEX_GEOMETRY_COLUMN);
+    validateQueryResult(h3IndexQuery, nonH3IndexQuery);
+  }
+
+  private void validateQueryResult(String h3IndexQuery, String 
nonH3IndexQuery) {
     AggregationOperator h3IndexOperator = getOperator(h3IndexQuery);
     AggregationOperator nonH3IndexOperator = getOperator(nonH3IndexQuery);
     IntermediateResultsBlock h3IndexResultsBlock = h3IndexOperator.nextBlock();
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
index 165304e0a2..e971d07b8d 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
@@ -19,10 +19,28 @@
 package org.apache.pinot.segment.local.utils;
 
 import com.uber.h3core.H3Core;
+import com.uber.h3core.exceptions.LineUndefinedException;
+import com.uber.h3core.util.GeoCoord;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
+import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
+import it.unimi.dsi.fastutil.longs.LongSet;
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.commons.lang3.tuple.Pair;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.GeometryCollection;
+import org.locationtech.jts.geom.LineString;
+import org.locationtech.jts.geom.Point;
+import org.locationtech.jts.geom.Polygon;
 
 
 public class H3Utils {
+
   private H3Utils() {
   }
 
@@ -35,4 +53,78 @@ public class H3Utils {
       throw new RuntimeException("Failed to instantiate H3 instance", e);
     }
   }
+
+  private static LongSet coverLineInH3(LineString lineString, int resolution) {
+    LongSet coveringH3Cells = new LongOpenHashSet();
+    LongList endpointH3Cells = new LongArrayList();
+    for (Coordinate endpoint : lineString.getCoordinates()) {
+      endpointH3Cells.add(H3_CORE.geoToH3(endpoint.y, endpoint.x, resolution));
+    }
+    for (int i = 0; i < endpointH3Cells.size() - 1; i++) {
+      try {
+        coveringH3Cells.addAll(H3_CORE.h3Line(endpointH3Cells.getLong(i), 
endpointH3Cells.getLong(i + 1)));
+      } catch (LineUndefinedException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    return coveringH3Cells;
+  }
+
+  private static Pair<LongSet, LongSet> coverPolygonInH3(Polygon polygon, int 
resolution) {
+    List<Long> polyfillCells = 
H3_CORE.polyfill(Arrays.stream(polygon.getExteriorRing().getCoordinates())
+            .map(coordinate -> new GeoCoord(coordinate.y, 
coordinate.x)).collect(Collectors.toList()),
+        Collections.emptyList(), resolution);
+    // TODO: this can be further optimized to use native H3 implementation. 
They have plan to support natively.
+    // https://github.com/apache/pinot/issues/8547
+    LongSet potentialH3Cells = new LongOpenHashSet();
+    if (polyfillCells.isEmpty()) {
+      // If the polyfill cells are empty, meaning the polygon might be smaller 
than a single cell in the H3 system.
+      // So just get whatever one. here choose the first one. the follow up 
kRing(firstCell, 1) will cover the whole
+      // polygon if there is potential not covered by the first point's 
belonging cell.
+      // ref: https://github.com/uber/h3/issues/456#issuecomment-827760163
+      Coordinate represent = polygon.getCoordinate();
+      potentialH3Cells.add(H3_CORE.geoToH3(represent.getY(), represent.getX(), 
resolution));
+    } else {
+      potentialH3Cells.addAll(polyfillCells);
+    }
+    potentialH3Cells
+        .addAll(potentialH3Cells.stream().flatMap(cell -> H3_CORE.kRing(cell, 
1).stream()).collect(Collectors.toSet()));
+    LongSet fullyContainedCell = new LongOpenHashSet(
+        potentialH3Cells.stream().filter(h3Cell -> 
polygon.contains(createPolygonFromH3Cell(h3Cell)))
+            .collect(Collectors.toSet()));
+    return Pair.of(fullyContainedCell, potentialH3Cells);
+  }
+
+  private static Polygon createPolygonFromH3Cell(long h3Cell) {
+    List<GeoCoord> boundary = H3_CORE.h3ToGeoBoundary(h3Cell);
+    boundary.add(boundary.get(0));
+    return GeometryUtils.GEOMETRY_FACTORY.createPolygon(
+        boundary.stream().map(geoCoord -> new Coordinate(geoCoord.lng, 
geoCoord.lat)).toArray(Coordinate[]::new));
+  }
+
+  // Return a pair of cell ids: The first fully contain, the second is 
potential contain.
+  // potential contains contain the fully contain.
+  public static Pair<LongSet, LongSet> coverGeometryInH3(Geometry geometry, 
int resolution) {
+    if (geometry instanceof Point) {
+      LongSet potentialCover = new LongOpenHashSet();
+      potentialCover.add(H3_CORE.geoToH3(geometry.getCoordinate().y, 
geometry.getCoordinate().x, resolution));
+      return Pair.of(new LongOpenHashSet(), potentialCover);
+    } else if (geometry instanceof LineString) {
+      LongSet potentialCover = new LongOpenHashSet();
+      potentialCover.addAll(coverLineInH3(((LineString) geometry), 
resolution));
+      return Pair.of(new LongOpenHashSet(), potentialCover);
+    } else if (geometry instanceof Polygon) {
+      return coverPolygonInH3(((Polygon) geometry), resolution);
+    } else if (geometry instanceof GeometryCollection) {
+      LongOpenHashSet fullCover = new LongOpenHashSet();
+      LongOpenHashSet potentialCover = new LongOpenHashSet();
+      for (int i = 0; i < geometry.getNumGeometries(); i++) {
+        fullCover.addAll(coverGeometryInH3(geometry.getGeometryN(i), 
resolution).getLeft());
+        potentialCover.addAll(coverGeometryInH3(geometry.getGeometryN(i), 
resolution).getRight());
+      }
+      return Pair.of(fullCover, potentialCover);
+    } else {
+      throw new UnsupportedOperationException("Unexpected type: " + 
geometry.getGeometryType());
+    }
+  }
 }
diff --git a/pom.xml b/pom.xml
index 647dd5f049..81440daec1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -151,7 +151,7 @@
     <netty.version>4.1.74.Final</netty.version>
     <reactivestreams.version>1.0.3</reactivestreams.version>
     <jts.version>1.16.1</jts.version>
-    <h3.version>3.0.3</h3.version>
+    <h3.version>3.7.0</h3.version>
     <jmh.version>1.26</jmh.version>
     <audienceannotations.version>0.13.0</audienceannotations.version>
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to