This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new 79b12ed Fix empty data table for distinct query (#6363) 79b12ed is described below commit 79b12ed36fccd28cd68937ad53d9df76cd29c9b3 Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> AuthorDate: Thu Dec 17 13:39:07 2020 -0800 Fix empty data table for distinct query (#6363) Support building empty data table for distinct query. Currently it will throw exception for distinct queries when all the segments are pruned --- .../core/common/datatable/DataTableUtils.java | 95 ++++++++++++++++------ .../core/common/datatable/DataTableUtilsTest.java | 28 +++++-- 2 files changed, 94 insertions(+), 29 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java index b3dea8a..859a65c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java @@ -23,10 +23,14 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; import org.apache.pinot.common.utils.DataTable; import org.apache.pinot.core.query.aggregation.function.AggregationFunction; +import org.apache.pinot.core.query.aggregation.function.DistinctAggregationFunction; +import org.apache.pinot.core.query.distinct.DistinctTable; import org.apache.pinot.core.query.request.context.ExpressionContext; import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.core.query.request.context.utils.QueryContextUtils; import org.apache.pinot.core.util.QueryOptions; @@ -86,40 +90,56 @@ public class DataTableUtils { */ public static DataTable buildEmptyDataTable(QueryContext queryContext) throws IOException { - AggregationFunction[] aggregationFunctions = queryContext.getAggregationFunctions(); + if (QueryContextUtils.isSelectionQuery(queryContext)) { + return buildEmptyDataTableForSelectionQuery(queryContext); + } else if (QueryContextUtils.isAggregationQuery(queryContext)) { + return buildEmptyDataTableForAggregationQuery(queryContext); + } else { + assert QueryContextUtils.isDistinctQuery(queryContext); + return buildEmptyDataTableForDistinctQuery(queryContext); + } + } - // Selection query. - if (aggregationFunctions == null) { - List<ExpressionContext> selectExpressions = queryContext.getSelectExpressions(); - int numSelectExpressions = selectExpressions.size(); - String[] columnNames = new String[numSelectExpressions]; - for (int i = 0; i < numSelectExpressions; i++) { - columnNames[i] = selectExpressions.get(i).toString(); - } - DataSchema.ColumnDataType[] columnDataTypes = new DataSchema.ColumnDataType[numSelectExpressions]; - // NOTE: Use STRING column data type as default for selection query. - Arrays.fill(columnDataTypes, DataSchema.ColumnDataType.STRING); - DataSchema dataSchema = new DataSchema(columnNames, columnDataTypes); - return new DataTableBuilder(dataSchema).build(); + /** + * Helper method to build an empty data table for selection query. + */ + private static DataTable buildEmptyDataTableForSelectionQuery(QueryContext queryContext) { + List<ExpressionContext> selectExpressions = queryContext.getSelectExpressions(); + int numSelectExpressions = selectExpressions.size(); + String[] columnNames = new String[numSelectExpressions]; + for (int i = 0; i < numSelectExpressions; i++) { + columnNames[i] = selectExpressions.get(i).toString(); } + ColumnDataType[] columnDataTypes = new ColumnDataType[numSelectExpressions]; + // NOTE: Use STRING column data type as default for selection query + Arrays.fill(columnDataTypes, ColumnDataType.STRING); + DataSchema dataSchema = new DataSchema(columnNames, columnDataTypes); + return new DataTableBuilder(dataSchema).build(); + } - // Aggregation query. + /** + * Helper method to build an empty data table for aggregation query. + */ + private static DataTable buildEmptyDataTableForAggregationQuery(QueryContext queryContext) + throws IOException { + AggregationFunction[] aggregationFunctions = queryContext.getAggregationFunctions(); + assert aggregationFunctions != null; int numAggregations = aggregationFunctions.length; List<ExpressionContext> groupByExpressions = queryContext.getGroupByExpressions(); if (groupByExpressions != null) { - // Aggregation group-by query. + // Aggregation group-by query if (new QueryOptions(queryContext.getQueryOptions()).isGroupByModeSQL()) { // SQL format int numColumns = groupByExpressions.size() + numAggregations; String[] columnNames = new String[numColumns]; - DataSchema.ColumnDataType[] columnDataTypes = new DataSchema.ColumnDataType[numColumns]; + ColumnDataType[] columnDataTypes = new ColumnDataType[numColumns]; int index = 0; for (ExpressionContext groupByExpression : groupByExpressions) { columnNames[index] = groupByExpression.toString(); // Use STRING column data type as default for group-by expressions - columnDataTypes[index] = DataSchema.ColumnDataType.STRING; + columnDataTypes[index] = ColumnDataType.STRING; index++; } for (AggregationFunction aggregationFunction : aggregationFunctions) { @@ -133,10 +153,9 @@ public class DataTableUtils { // PQL format String[] columnNames = new String[]{"functionName", "GroupByResultMap"}; - DataSchema.ColumnDataType[] columnDataTypes = - new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.OBJECT}; + ColumnDataType[] columnDataTypes = new ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.OBJECT}; - // Build the data table. + // Build the data table DataTableBuilder dataTableBuilder = new DataTableBuilder(new DataSchema(columnNames, columnDataTypes)); for (AggregationFunction aggregationFunction : aggregationFunctions) { dataTableBuilder.startRow(); @@ -148,10 +167,10 @@ public class DataTableUtils { return dataTableBuilder.build(); } } else { - // Aggregation only query. + // Aggregation only query String[] aggregationColumnNames = new String[numAggregations]; - DataSchema.ColumnDataType[] columnDataTypes = new DataSchema.ColumnDataType[numAggregations]; + ColumnDataType[] columnDataTypes = new ColumnDataType[numAggregations]; Object[] aggregationResults = new Object[numAggregations]; for (int i = 0; i < numAggregations; i++) { AggregationFunction aggregationFunction = aggregationFunctions[i]; @@ -162,7 +181,7 @@ public class DataTableUtils { aggregationFunction.extractAggregationResult(aggregationFunction.createAggregationResultHolder()); } - // Build the data table. + // Build the data table DataTableBuilder dataTableBuilder = new DataTableBuilder(new DataSchema(aggregationColumnNames, columnDataTypes)); dataTableBuilder.startRow(); for (int i = 0; i < numAggregations; i++) { @@ -186,4 +205,32 @@ public class DataTableUtils { return dataTableBuilder.build(); } } + + /** + * Helper method to build an empty data table for distinct query. + */ + private static DataTable buildEmptyDataTableForDistinctQuery(QueryContext queryContext) + throws IOException { + AggregationFunction[] aggregationFunctions = queryContext.getAggregationFunctions(); + assert aggregationFunctions != null && aggregationFunctions.length == 1 + && aggregationFunctions[0] instanceof DistinctAggregationFunction; + DistinctAggregationFunction distinctAggregationFunction = (DistinctAggregationFunction) aggregationFunctions[0]; + + // Create the distinct table + String[] columnNames = distinctAggregationFunction.getColumns(); + ColumnDataType[] columnDataTypes = new ColumnDataType[columnNames.length]; + // NOTE: Use STRING column data type as default for distinct query + Arrays.fill(columnDataTypes, ColumnDataType.STRING); + DistinctTable distinctTable = + new DistinctTable(new DataSchema(columnNames, columnDataTypes), Collections.emptySet()); + + // Build the data table + DataTableBuilder dataTableBuilder = new DataTableBuilder( + new DataSchema(new String[]{distinctAggregationFunction.getColumnName()}, + new ColumnDataType[]{ColumnDataType.OBJECT})); + dataTableBuilder.startRow(); + dataTableBuilder.setColumn(0, distinctTable); + dataTableBuilder.finishRow(); + return dataTableBuilder.build(); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java index 2b2a2a3..f2095ea 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java @@ -21,12 +21,15 @@ package org.apache.pinot.core.common.datatable; import java.io.IOException; import java.util.Collections; import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; import org.apache.pinot.common.utils.DataTable; +import org.apache.pinot.core.query.distinct.DistinctTable; import org.apache.pinot.core.query.request.context.QueryContext; import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils; import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; public class DataTableUtilsTest { @@ -40,7 +43,7 @@ public class DataTableUtilsTest { DataTable dataTable = DataTableUtils.buildEmptyDataTable(queryContext); DataSchema dataSchema = dataTable.getDataSchema(); assertEquals(dataSchema.getColumnNames(), new String[]{"*"}); - assertEquals(dataSchema.getColumnDataTypes(), new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING}); + assertEquals(dataSchema.getColumnDataTypes(), new ColumnDataType[]{ColumnDataType.STRING}); assertEquals(dataTable.getNumberOfRows(), 0); // Aggregation @@ -50,7 +53,7 @@ public class DataTableUtilsTest { dataSchema = dataTable.getDataSchema(); assertEquals(dataSchema.getColumnNames(), new String[]{"count_star", "sum_a", "max_b"}); assertEquals(dataSchema.getColumnDataTypes(), - new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.LONG, DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.DOUBLE}); + new ColumnDataType[]{ColumnDataType.LONG, ColumnDataType.DOUBLE, ColumnDataType.DOUBLE}); assertEquals(dataTable.getNumberOfRows(), 1); assertEquals(dataTable.getLong(0, 0), 0L); assertEquals(dataTable.getDouble(0, 1), 0.0); @@ -62,8 +65,7 @@ public class DataTableUtilsTest { dataTable = DataTableUtils.buildEmptyDataTable(queryContext); dataSchema = dataTable.getDataSchema(); assertEquals(dataSchema.getColumnNames(), new String[]{"functionName", "GroupByResultMap"}); - assertEquals(dataSchema.getColumnDataTypes(), - new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.OBJECT}); + assertEquals(dataSchema.getColumnDataTypes(), new ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.OBJECT}); assertEquals(dataTable.getNumberOfRows(), 3); assertEquals(dataTable.getString(0, 0), "count_star"); assertEquals(dataTable.getObject(0, 1), Collections.emptyMap()); @@ -79,7 +81,23 @@ public class DataTableUtilsTest { dataSchema = dataTable.getDataSchema(); assertEquals(dataSchema.getColumnNames(), new String[]{"c", "d", "count(*)", "sum(a)", "max(b)"}); assertEquals(dataSchema.getColumnDataTypes(), - new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.LONG, DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.DOUBLE}); + new ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.STRING, ColumnDataType.LONG, ColumnDataType.DOUBLE, ColumnDataType.DOUBLE}); assertEquals(dataTable.getNumberOfRows(), 0); + + // Distinct + queryContext = + QueryContextConverterUtils.getQueryContextFromPQL("SELECT DISTINCT(a, b) FROM table WHERE foo = 'bar'"); + dataTable = DataTableUtils.buildEmptyDataTable(queryContext); + dataSchema = dataTable.getDataSchema(); + assertEquals(dataSchema.getColumnNames(), new String[]{"distinct_a:b"}); + assertEquals(dataSchema.getColumnDataTypes(), new ColumnDataType[]{ColumnDataType.OBJECT}); + assertEquals(dataTable.getNumberOfRows(), 1); + Object firstObject = dataTable.getObject(0, 0); + assertTrue(firstObject instanceof DistinctTable); + DistinctTable distinctTable = (DistinctTable) firstObject; + assertEquals(distinctTable.size(), 0); + assertEquals(distinctTable.getDataSchema().getColumnNames(), new String[]{"a", "b"}); + assertEquals(distinctTable.getDataSchema().getColumnDataTypes(), + new ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.STRING}); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org