This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new d016df3091 Allow usage of star-tree index with null handling enabled 
when no null values in segment columns (#14177)
d016df3091 is described below

commit d016df3091483d4430c668e0b30f6381bd0ba56a
Author: Yash Mayya <yash.ma...@gmail.com>
AuthorDate: Thu Oct 10 03:12:07 2024 +0530

    Allow usage of star-tree index with null handling enabled when no null 
values in segment columns (#14177)
---
 .../apache/pinot/core/startree/StarTreeUtils.java  | 48 +++++++++++++++++++++-
 .../tests/StarTreeClusterIntegrationTest.java      | 17 +++++++-
 2 files changed, 62 insertions(+), 3 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java 
b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java
index 039da20db0..d518028595 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java
@@ -46,6 +46,8 @@ import 
org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair
 import org.apache.pinot.segment.spi.index.startree.AggregationSpec;
 import org.apache.pinot.segment.spi.index.startree.StarTreeV2;
 import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 @SuppressWarnings("rawtypes")
@@ -53,6 +55,8 @@ public class StarTreeUtils {
   private StarTreeUtils() {
   }
 
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(StarTreeUtils.class);
+
   /**
    * Extracts the {@link AggregationFunctionColumnPair}s from the given {@link 
AggregationFunction}s. Returns
    * {@code null} if any {@link AggregationFunction} cannot be represented as 
an {@link AggregationFunctionColumnPair}
@@ -354,7 +358,7 @@ public class StarTreeUtils {
       QueryContext queryContext, AggregationFunction[] aggregationFunctions, 
@Nullable FilterContext filter,
       List<Pair<Predicate, PredicateEvaluator>> predicateEvaluators) {
     List<StarTreeV2> starTrees = indexSegment.getStarTrees();
-    if (starTrees == null || queryContext.isSkipStarTree() || 
queryContext.isNullHandlingEnabled()) {
+    if (starTrees == null || queryContext.isSkipStarTree()) {
       return null;
     }
 
@@ -363,15 +367,57 @@ public class StarTreeUtils {
     if (aggregationFunctionColumnPairs == null) {
       return null;
     }
+
     Map<String, List<CompositePredicateEvaluator>> predicateEvaluatorsMap =
         extractPredicateEvaluatorsMap(indexSegment, filter, 
predicateEvaluators);
     if (predicateEvaluatorsMap == null) {
       return null;
     }
+
     ExpressionContext[] groupByExpressions =
         queryContext.getGroupByExpressions() != null ? 
queryContext.getGroupByExpressions()
             .toArray(new ExpressionContext[0]) : null;
 
+    if (queryContext.isNullHandlingEnabled()) {
+      // We can still use the star-tree index if there aren't actually any 
null values in this segment for all the
+      // metrics being aggregated, all the dimensions being filtered on / 
grouped by.
+      for (AggregationFunctionColumnPair aggregationFunctionColumnPair : 
aggregationFunctionColumnPairs) {
+        if (aggregationFunctionColumnPair == 
AggregationFunctionColumnPair.COUNT_STAR) {
+          // Null handling is irrelevant for COUNT(*)
+          continue;
+        }
+
+        String column = aggregationFunctionColumnPair.getColumn();
+        DataSource dataSource = indexSegment.getDataSource(column);
+        if (dataSource.getNullValueVector() != null && 
!dataSource.getNullValueVector().getNullBitmap().isEmpty()) {
+          LOGGER.debug("Cannot use star-tree index because aggregation column: 
'{}' has null values", column);
+          return null;
+        }
+      }
+
+      for (String column : predicateEvaluatorsMap.keySet()) {
+        DataSource dataSource = indexSegment.getDataSource(column);
+        if (dataSource.getNullValueVector() != null && 
!dataSource.getNullValueVector().getNullBitmap().isEmpty()) {
+          LOGGER.debug("Cannot use star-tree index because filter column: '{}' 
has null values", column);
+          return null;
+        }
+      }
+
+      Set<String> groupByColumns = new HashSet<>();
+      if (groupByExpressions != null) {
+        for (ExpressionContext groupByExpression : groupByExpressions) {
+          groupByExpression.getColumns(groupByColumns);
+        }
+      }
+      for (String column : groupByColumns) {
+        DataSource dataSource = indexSegment.getDataSource(column);
+        if (dataSource.getNullValueVector() != null && 
!dataSource.getNullValueVector().getNullBitmap().isEmpty()) {
+          LOGGER.debug("Cannot use star-tree index because group-by column: 
'{}' has null values", column);
+          return null;
+        }
+      }
+    }
+
     List<Pair<AggregationFunction, AggregationFunctionColumnPair>> 
aggregations =
         new ArrayList<>(aggregationFunctions.length);
     for (int i = 0; i < aggregationFunctions.length; i++) {
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java
index 66c8fa4e65..276ffe53b3 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java
@@ -159,8 +159,8 @@ public class StarTreeClusterIntegrationTest extends 
BaseClusterIntegrationTest {
       }
       for (String metric : metrics) {
         aggregationConfigs.add(
-            new StarTreeAggregationConfig(metric, functionType.name(), null, 
CompressionCodec.LZ4,
-                false, 4, null, null));
+            new StarTreeAggregationConfig(metric, functionType.name(), null, 
CompressionCodec.LZ4, false, 4, null,
+                null));
       }
     }
     return new StarTreeIndexConfig(dimensions, null, null, aggregationConfigs, 
maxLeafRecords);
@@ -213,22 +213,35 @@ public class StarTreeClusterIntegrationTest extends 
BaseClusterIntegrationTest {
       throws Exception {
     String explain = "EXPLAIN PLAN FOR ";
     String disableStarTree = "SET useStarTree = false; ";
+    // The star-tree index doesn't currently support null values, but we 
should still be able to use the star-tree index
+    // here since there aren't actually any null values in the dataset.
+    String nullHandlingEnabled = "SET enableNullHandling = true; ";
 
     if (verifyPlan) {
       JsonNode starPlan = postQuery(explain + starQuery);
       JsonNode referencePlan = postQuery(disableStarTree + explain + 
starQuery);
+      JsonNode nullHandlingEnabledPlan = postQuery(nullHandlingEnabled + 
explain + starQuery);
       assertTrue(starPlan.toString().contains(FILTER_STARTREE_INDEX) || 
starPlan.toString().contains("FILTER_EMPTY")
               || starPlan.toString().contains("ALL_SEGMENTS_PRUNED_ON_SERVER"),
           "StarTree query did not indicate use of StarTree index in query 
plan. Plan: " + starPlan);
       assertFalse(referencePlan.toString().contains(FILTER_STARTREE_INDEX),
           "Reference query indicated use of StarTree index in query plan. 
Plan: " + referencePlan);
+      assertTrue(
+          nullHandlingEnabledPlan.toString().contains(FILTER_STARTREE_INDEX) 
|| nullHandlingEnabledPlan.toString()
+              .contains("FILTER_EMPTY") || 
nullHandlingEnabledPlan.toString().contains("ALL_SEGMENTS_PRUNED_ON_SERVER"),
+          "StarTree query with null handling enabled did not indicate use of 
StarTree index in query plan. Plan: "
+              + nullHandlingEnabledPlan);
     }
 
     JsonNode starResponse = postQuery(starQuery);
     String referenceQuery = disableStarTree + starQuery;
     JsonNode referenceResponse = postQuery(referenceQuery);
+    // Don't compare the actual response values since they could differ (e.g. 
"null" vs "Infinity" for MIN
+    // aggregation function with no values aggregated)
+    JsonNode nullHandlingEnabledResponse = postQuery(nullHandlingEnabled + 
starQuery);
     assertEquals(starResponse.get("exceptions").size(), 0);
     assertEquals(referenceResponse.get("exceptions").size(), 0);
+    assertEquals(nullHandlingEnabledResponse.get("exceptions").size(), 0);
     assertEquals(starResponse.get("resultTable"), 
referenceResponse.get("resultTable"), String.format(
         "Query comparison failed for: \n"
             + "Star Query: %s\nStar Response: %s\nReference Query: 
%s\nReference Response: %s\nRandom Seed: %d",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to