Jackie-Jiang commented on a change in pull request #6409:
URL: https://github.com/apache/incubator-pinot/pull/6409#discussion_r557825606



##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java
##########
@@ -90,110 +92,139 @@ public H3IndexFilterOperator(IndexSegment segment, 
Predicate predicate, int numD
   @Override
   protected FilterBlock getNextBlock() {
     if (_upperBound < 0 || _lowerBound > _upperBound) {
-      // Invalid upper bound
-
+      // Invalid upper bound, return an empty block
       return new FilterBlock(EmptyDocIdSet.getInstance());
     }
 
-    if (Double.isNaN(_lowerBound) || _lowerBound < 0) {
-      // No lower bound
+    try {
+      if (Double.isNaN(_lowerBound) || _lowerBound < 0) {
+        // No lower bound
 
-      if (Double.isNaN(_upperBound)) {
-        // No upper bound
-        return new FilterBlock(new MatchAllDocIdSet(_numDocs));
-      }
-
-      List<Long> fullMatchH3Ids = getFullMatchH3Ids(_upperBound);
-      HashSet<Long> partialMatchH3Ids = new 
HashSet<>(getPartialMatchH3Ids(_upperBound));
-      partialMatchH3Ids.removeAll(fullMatchH3Ids);
+        if (Double.isNaN(_upperBound)) {
+          // No bound, return a match-all block
+          return new FilterBlock(new MatchAllDocIdSet(_numDocs));
+        }
 
-      MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
-      for (long fullMatchH3Id : fullMatchH3Ids) {
-        fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id));
-      }
+        // Upper bound only
+        List<Long> fullMatchH3Ids = getAlwaysMatchH3Ids(_upperBound);
+        HashSet<Long> partialMatchH3Ids = new 
HashSet<>(getPossibleMatchH3Ids(_upperBound));
+        partialMatchH3Ids.removeAll(fullMatchH3Ids);
 
-      MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
-      for (long partialMatchH3Id : partialMatchH3Ids) {
-        partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
-      }
+        MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
+        for (long fullMatchH3Id : fullMatchH3Ids) {
+          fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id));
+        }
 
-      return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
-    }
+        MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
+        for (long partialMatchH3Id : partialMatchH3Ids) {
+          partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
+        }
 
-    if (Double.isNaN(_upperBound)) {
-      // No upper bound
+        return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
+      }
 
-      List<Long> notMatchH3Ids = getFullMatchH3Ids(_lowerBound);
-      Set<Long> partialMatchH3Ids = new 
HashSet<>(getPartialMatchH3Ids(_lowerBound));
+      if (Double.isNaN(_upperBound)) {
+        // Lower bound only
+
+        List<Long> alwaysNotMatchH3Ids = getAlwaysMatchH3Ids(_lowerBound);
+        Set<Long> possibleNotMatchH3Ids = new 
HashSet<>(getPossibleMatchH3Ids(_lowerBound));
+
+        // Flip the result of possible not match doc ids to get the full match 
doc ids
+        MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
+        for (long partialMatchH3Id : possibleNotMatchH3Ids) {
+          fullMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
+        }
+        fullMatchDocIds.flip(0L, _numDocs);
+
+        // Remove the always not match H3 ids from possible not match H3 ids 
to get the partial match H3 ids
+        possibleNotMatchH3Ids.removeAll(alwaysNotMatchH3Ids);
+        MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
+        for (long partialMatchH3Id : possibleNotMatchH3Ids) {
+          partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
+        }
+
+        return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
+      }
 
+      // Both lower bound and upper bound exist
+      List<Long> lowerAlwaysMatchH3Ids = getAlwaysMatchH3Ids(_lowerBound);
+      List<Long> lowerPossibleMatchH3Ids = getPossibleMatchH3Ids(_lowerBound);
+      List<Long> upperAlwaysMatchH3Ids = getAlwaysMatchH3Ids(_upperBound);
+      List<Long> upperPossibleMatchH3Ids = getPossibleMatchH3Ids(_upperBound);
+
+      // Remove the possible match H3 ids for the lower bound from the always 
match H3 ids for the upper bound to get
+      // the full match H3 ids
+      Set<Long> fullMatchH3Ids;
+      if (upperAlwaysMatchH3Ids.size() > lowerPossibleMatchH3Ids.size()) {
+        fullMatchH3Ids = new HashSet<>(upperAlwaysMatchH3Ids);
+        fullMatchH3Ids.removeAll(lowerPossibleMatchH3Ids);
+      } else {
+        fullMatchH3Ids = Collections.emptySet();
+      }
       MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
-      for (long partialMatchH3Id : partialMatchH3Ids) {
-        fullMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
+      for (long fullMatchH3Id : fullMatchH3Ids) {
+        fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id));
       }
-      fullMatchDocIds.flip(0L, _numDocs);
 
-      partialMatchH3Ids.removeAll(notMatchH3Ids);
+      // Remove the always match H3 ids for the lower bound (always not match 
H3 ids) and the full match H3 ids from the
+      // possible match H3 ids for the upper bound to get the partial match H3 
ids
+      Set<Long> partialMatchH3Ids = new HashSet<>(upperPossibleMatchH3Ids);
+      partialMatchH3Ids.removeAll(lowerAlwaysMatchH3Ids);
+      partialMatchH3Ids.removeAll(fullMatchH3Ids);
       MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
       for (long partialMatchH3Id : partialMatchH3Ids) {
         partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
       }
 
       return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
+    } catch (Exception e) {
+      // Fall back to ExpressionFilterOperator when the execution encounters 
exception (e.g. numRings is too large)
+      return new ExpressionFilterOperator(_segment, _predicate, 
_numDocs).getNextBlock();
     }
-
-    // Both lower bound and upper bound exist
-    List<Long> lowerFullMatchH3Ids = getFullMatchH3Ids(_lowerBound);
-    List<Long> lowerPartialMatchH3Ids = getPartialMatchH3Ids(_lowerBound);
-    List<Long> upperFullMatchH3Ids = getFullMatchH3Ids(_upperBound);
-    List<Long> upperPartialMatchH3Ids = getPartialMatchH3Ids(_upperBound);
-
-    Set<Long> fullMatchH3Ids;
-    if (upperFullMatchH3Ids.size() > lowerPartialMatchH3Ids.size()) {
-      fullMatchH3Ids = new HashSet<>(upperFullMatchH3Ids);
-      fullMatchH3Ids.removeAll(lowerPartialMatchH3Ids);
-    } else {
-      fullMatchH3Ids = Collections.emptySet();
-    }
-    MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
-    for (long fullMatchH3Id : fullMatchH3Ids) {
-      fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id));
-    }
-
-    Set<Long> partialMatchH3Ids = new HashSet<>(upperPartialMatchH3Ids);
-    partialMatchH3Ids.removeAll(lowerFullMatchH3Ids);
-    partialMatchH3Ids.removeAll(fullMatchH3Ids);
-    MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
-    for (long partialMatchH3Id : partialMatchH3Ids) {
-      partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
-    }
-
-    return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
   }
 
   /**
    * Returns the H3 ids that is ALWAYS fully covered by the circle with the 
given distance as the radius and a point
    * within the _h3Id hexagon as the center.
+   * <p>The farthest distance from the center of the center hexagon to the 
center of a hexagon in the nth ring is
+   * {@code sqrt(3) * n * edgeLength}. Counting the distance from the center 
to a point in the hexagon, which is up
+   * to the edge length, it is guaranteed that the hexagons in the nth ring 
are always fully covered if:
+   * {@code distance >= (sqrt(3) * n + 2) * edgeLength}.
    */
-  private List<Long> getFullMatchH3Ids(double distance) {
+  private List<Long> getAlwaysMatchH3Ids(double distance) {
     // NOTE: Pick a constant slightly larger than sqrt(3) to be conservative
     int numRings = (int) Math.floor((distance / _edgeLength - 2) / 1.7321);
-    if (numRings >= 0) {
-      return H3Utils.H3_CORE.kRing(_h3Id, numRings);
-    } else {
-      return Collections.emptyList();
-    }
+    return numRings >= 0 ? getH3Ids(numRings) : Collections.emptyList();
   }
 
   /**
-   * Returns the H3 ids that MIGHT BE partially/fully covered by the circle 
with the given distance as the radius and a
+   * Returns the H3 ids that MIGHT BE fully/partially covered by the circle 
with the given distance as the radius and a
    * point within the _h3Id hexagon as the center.
+   * <p>The shortest distance from the center of the center hexagon to the 
center of a hexagon in the nth ring is
+   * {@code >= 1.5 * n * edgeLength}. Counting the distance from the center to 
a point in the hexagon, which is up
+   * to the edge length, it is guaranteed that the hexagons in the nth ring 
are always not fully/partially covered if:
+   * {@code distance < (1.5 * n - 2) * edgeLength}.
    */
-  private List<Long> getPartialMatchH3Ids(double distance) {
+  private List<Long> getPossibleMatchH3Ids(double distance) {
     // NOTE: Add a small delta (0.001) to be conservative
     int numRings = (int) Math.floor((distance / _edgeLength + 2) / 1.5 + 
0.001);
+    return getH3Ids(numRings);
+  }
+
+  /**
+   * Returns the H3 ids for the given number of rings around the _h3Id.
+   * IMPORTANT: Throw exception when number of rings is too large because H3 
library might send SIGILL for large number
+   * of rings, which can terminate the JVM. Also, the result won't be accurate 
when the distance is too large. In such
+   * case, the operator will fallback to ExpressionFilterOperator.
+   */
+  private List<Long> getH3Ids(int numRings) {
+    Preconditions.checkState(numRings <= 100, "Expect numRings <= 100, got: 
%s", numRings);

Review comment:
       Yes, as mentioned in the comments




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to