This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new d016df3091 Allow usage of star-tree index with null handling enabled when no null values in segment columns (#14177) d016df3091 is described below commit d016df3091483d4430c668e0b30f6381bd0ba56a Author: Yash Mayya <yash.ma...@gmail.com> AuthorDate: Thu Oct 10 03:12:07 2024 +0530 Allow usage of star-tree index with null handling enabled when no null values in segment columns (#14177) --- .../apache/pinot/core/startree/StarTreeUtils.java | 48 +++++++++++++++++++++- .../tests/StarTreeClusterIntegrationTest.java | 17 +++++++- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java index 039da20db0..d518028595 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java @@ -46,6 +46,8 @@ import org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair import org.apache.pinot.segment.spi.index.startree.AggregationSpec; import org.apache.pinot.segment.spi.index.startree.StarTreeV2; import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @SuppressWarnings("rawtypes") @@ -53,6 +55,8 @@ public class StarTreeUtils { private StarTreeUtils() { } + private static final Logger LOGGER = LoggerFactory.getLogger(StarTreeUtils.class); + /** * Extracts the {@link AggregationFunctionColumnPair}s from the given {@link AggregationFunction}s. Returns * {@code null} if any {@link AggregationFunction} cannot be represented as an {@link AggregationFunctionColumnPair} @@ -354,7 +358,7 @@ public class StarTreeUtils { QueryContext queryContext, AggregationFunction[] aggregationFunctions, @Nullable FilterContext filter, List<Pair<Predicate, PredicateEvaluator>> predicateEvaluators) { List<StarTreeV2> starTrees = indexSegment.getStarTrees(); - if (starTrees == null || queryContext.isSkipStarTree() || queryContext.isNullHandlingEnabled()) { + if (starTrees == null || queryContext.isSkipStarTree()) { return null; } @@ -363,15 +367,57 @@ public class StarTreeUtils { if (aggregationFunctionColumnPairs == null) { return null; } + Map<String, List<CompositePredicateEvaluator>> predicateEvaluatorsMap = extractPredicateEvaluatorsMap(indexSegment, filter, predicateEvaluators); if (predicateEvaluatorsMap == null) { return null; } + ExpressionContext[] groupByExpressions = queryContext.getGroupByExpressions() != null ? queryContext.getGroupByExpressions() .toArray(new ExpressionContext[0]) : null; + if (queryContext.isNullHandlingEnabled()) { + // We can still use the star-tree index if there aren't actually any null values in this segment for all the + // metrics being aggregated, all the dimensions being filtered on / grouped by. + for (AggregationFunctionColumnPair aggregationFunctionColumnPair : aggregationFunctionColumnPairs) { + if (aggregationFunctionColumnPair == AggregationFunctionColumnPair.COUNT_STAR) { + // Null handling is irrelevant for COUNT(*) + continue; + } + + String column = aggregationFunctionColumnPair.getColumn(); + DataSource dataSource = indexSegment.getDataSource(column); + if (dataSource.getNullValueVector() != null && !dataSource.getNullValueVector().getNullBitmap().isEmpty()) { + LOGGER.debug("Cannot use star-tree index because aggregation column: '{}' has null values", column); + return null; + } + } + + for (String column : predicateEvaluatorsMap.keySet()) { + DataSource dataSource = indexSegment.getDataSource(column); + if (dataSource.getNullValueVector() != null && !dataSource.getNullValueVector().getNullBitmap().isEmpty()) { + LOGGER.debug("Cannot use star-tree index because filter column: '{}' has null values", column); + return null; + } + } + + Set<String> groupByColumns = new HashSet<>(); + if (groupByExpressions != null) { + for (ExpressionContext groupByExpression : groupByExpressions) { + groupByExpression.getColumns(groupByColumns); + } + } + for (String column : groupByColumns) { + DataSource dataSource = indexSegment.getDataSource(column); + if (dataSource.getNullValueVector() != null && !dataSource.getNullValueVector().getNullBitmap().isEmpty()) { + LOGGER.debug("Cannot use star-tree index because group-by column: '{}' has null values", column); + return null; + } + } + } + List<Pair<AggregationFunction, AggregationFunctionColumnPair>> aggregations = new ArrayList<>(aggregationFunctions.length); for (int i = 0; i < aggregationFunctions.length; i++) { diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java index 66c8fa4e65..276ffe53b3 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/StarTreeClusterIntegrationTest.java @@ -159,8 +159,8 @@ public class StarTreeClusterIntegrationTest extends BaseClusterIntegrationTest { } for (String metric : metrics) { aggregationConfigs.add( - new StarTreeAggregationConfig(metric, functionType.name(), null, CompressionCodec.LZ4, - false, 4, null, null)); + new StarTreeAggregationConfig(metric, functionType.name(), null, CompressionCodec.LZ4, false, 4, null, + null)); } } return new StarTreeIndexConfig(dimensions, null, null, aggregationConfigs, maxLeafRecords); @@ -213,22 +213,35 @@ public class StarTreeClusterIntegrationTest extends BaseClusterIntegrationTest { throws Exception { String explain = "EXPLAIN PLAN FOR "; String disableStarTree = "SET useStarTree = false; "; + // The star-tree index doesn't currently support null values, but we should still be able to use the star-tree index + // here since there aren't actually any null values in the dataset. + String nullHandlingEnabled = "SET enableNullHandling = true; "; if (verifyPlan) { JsonNode starPlan = postQuery(explain + starQuery); JsonNode referencePlan = postQuery(disableStarTree + explain + starQuery); + JsonNode nullHandlingEnabledPlan = postQuery(nullHandlingEnabled + explain + starQuery); assertTrue(starPlan.toString().contains(FILTER_STARTREE_INDEX) || starPlan.toString().contains("FILTER_EMPTY") || starPlan.toString().contains("ALL_SEGMENTS_PRUNED_ON_SERVER"), "StarTree query did not indicate use of StarTree index in query plan. Plan: " + starPlan); assertFalse(referencePlan.toString().contains(FILTER_STARTREE_INDEX), "Reference query indicated use of StarTree index in query plan. Plan: " + referencePlan); + assertTrue( + nullHandlingEnabledPlan.toString().contains(FILTER_STARTREE_INDEX) || nullHandlingEnabledPlan.toString() + .contains("FILTER_EMPTY") || nullHandlingEnabledPlan.toString().contains("ALL_SEGMENTS_PRUNED_ON_SERVER"), + "StarTree query with null handling enabled did not indicate use of StarTree index in query plan. Plan: " + + nullHandlingEnabledPlan); } JsonNode starResponse = postQuery(starQuery); String referenceQuery = disableStarTree + starQuery; JsonNode referenceResponse = postQuery(referenceQuery); + // Don't compare the actual response values since they could differ (e.g. "null" vs "Infinity" for MIN + // aggregation function with no values aggregated) + JsonNode nullHandlingEnabledResponse = postQuery(nullHandlingEnabled + starQuery); assertEquals(starResponse.get("exceptions").size(), 0); assertEquals(referenceResponse.get("exceptions").size(), 0); + assertEquals(nullHandlingEnabledResponse.get("exceptions").size(), 0); assertEquals(starResponse.get("resultTable"), referenceResponse.get("resultTable"), String.format( "Query comparison failed for: \n" + "Star Query: %s\nStar Response: %s\nReference Query: %s\nReference Response: %s\nRandom Seed: %d", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org