This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 79b12ed  Fix empty data table for distinct query (#6363)
79b12ed is described below

commit 79b12ed36fccd28cd68937ad53d9df76cd29c9b3
Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com>
AuthorDate: Thu Dec 17 13:39:07 2020 -0800

    Fix empty data table for distinct query (#6363)
    
    Support building empty data table for distinct query. Currently it will 
throw exception for distinct queries when all the segments are pruned
---
 .../core/common/datatable/DataTableUtils.java      | 95 ++++++++++++++++------
 .../core/common/datatable/DataTableUtilsTest.java  | 28 +++++--
 2 files changed, 94 insertions(+), 29 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java
index b3dea8a..859a65c 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java
@@ -23,10 +23,14 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
 import org.apache.pinot.common.utils.DataTable;
 import org.apache.pinot.core.query.aggregation.function.AggregationFunction;
+import 
org.apache.pinot.core.query.aggregation.function.DistinctAggregationFunction;
+import org.apache.pinot.core.query.distinct.DistinctTable;
 import org.apache.pinot.core.query.request.context.ExpressionContext;
 import org.apache.pinot.core.query.request.context.QueryContext;
+import org.apache.pinot.core.query.request.context.utils.QueryContextUtils;
 import org.apache.pinot.core.util.QueryOptions;
 
 
@@ -86,40 +90,56 @@ public class DataTableUtils {
    */
   public static DataTable buildEmptyDataTable(QueryContext queryContext)
       throws IOException {
-    AggregationFunction[] aggregationFunctions = 
queryContext.getAggregationFunctions();
+    if (QueryContextUtils.isSelectionQuery(queryContext)) {
+      return buildEmptyDataTableForSelectionQuery(queryContext);
+    } else if (QueryContextUtils.isAggregationQuery(queryContext)) {
+      return buildEmptyDataTableForAggregationQuery(queryContext);
+    } else {
+      assert QueryContextUtils.isDistinctQuery(queryContext);
+      return buildEmptyDataTableForDistinctQuery(queryContext);
+    }
+  }
 
-    // Selection query.
-    if (aggregationFunctions == null) {
-      List<ExpressionContext> selectExpressions = 
queryContext.getSelectExpressions();
-      int numSelectExpressions = selectExpressions.size();
-      String[] columnNames = new String[numSelectExpressions];
-      for (int i = 0; i < numSelectExpressions; i++) {
-        columnNames[i] = selectExpressions.get(i).toString();
-      }
-      DataSchema.ColumnDataType[] columnDataTypes = new 
DataSchema.ColumnDataType[numSelectExpressions];
-      // NOTE: Use STRING column data type as default for selection query.
-      Arrays.fill(columnDataTypes, DataSchema.ColumnDataType.STRING);
-      DataSchema dataSchema = new DataSchema(columnNames, columnDataTypes);
-      return new DataTableBuilder(dataSchema).build();
+  /**
+   * Helper method to build an empty data table for selection query.
+   */
+  private static DataTable buildEmptyDataTableForSelectionQuery(QueryContext 
queryContext) {
+    List<ExpressionContext> selectExpressions = 
queryContext.getSelectExpressions();
+    int numSelectExpressions = selectExpressions.size();
+    String[] columnNames = new String[numSelectExpressions];
+    for (int i = 0; i < numSelectExpressions; i++) {
+      columnNames[i] = selectExpressions.get(i).toString();
     }
+    ColumnDataType[] columnDataTypes = new 
ColumnDataType[numSelectExpressions];
+    // NOTE: Use STRING column data type as default for selection query
+    Arrays.fill(columnDataTypes, ColumnDataType.STRING);
+    DataSchema dataSchema = new DataSchema(columnNames, columnDataTypes);
+    return new DataTableBuilder(dataSchema).build();
+  }
 
-    // Aggregation query.
+  /**
+   * Helper method to build an empty data table for aggregation query.
+   */
+  private static DataTable buildEmptyDataTableForAggregationQuery(QueryContext 
queryContext)
+      throws IOException {
+    AggregationFunction[] aggregationFunctions = 
queryContext.getAggregationFunctions();
+    assert aggregationFunctions != null;
     int numAggregations = aggregationFunctions.length;
     List<ExpressionContext> groupByExpressions = 
queryContext.getGroupByExpressions();
     if (groupByExpressions != null) {
-      // Aggregation group-by query.
+      // Aggregation group-by query
 
       if (new QueryOptions(queryContext.getQueryOptions()).isGroupByModeSQL()) 
{
         // SQL format
 
         int numColumns = groupByExpressions.size() + numAggregations;
         String[] columnNames = new String[numColumns];
-        DataSchema.ColumnDataType[] columnDataTypes = new 
DataSchema.ColumnDataType[numColumns];
+        ColumnDataType[] columnDataTypes = new ColumnDataType[numColumns];
         int index = 0;
         for (ExpressionContext groupByExpression : groupByExpressions) {
           columnNames[index] = groupByExpression.toString();
           // Use STRING column data type as default for group-by expressions
-          columnDataTypes[index] = DataSchema.ColumnDataType.STRING;
+          columnDataTypes[index] = ColumnDataType.STRING;
           index++;
         }
         for (AggregationFunction aggregationFunction : aggregationFunctions) {
@@ -133,10 +153,9 @@ public class DataTableUtils {
         // PQL format
 
         String[] columnNames = new String[]{"functionName", 
"GroupByResultMap"};
-        DataSchema.ColumnDataType[] columnDataTypes =
-            new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, 
DataSchema.ColumnDataType.OBJECT};
+        ColumnDataType[] columnDataTypes = new 
ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.OBJECT};
 
-        // Build the data table.
+        // Build the data table
         DataTableBuilder dataTableBuilder = new DataTableBuilder(new 
DataSchema(columnNames, columnDataTypes));
         for (AggregationFunction aggregationFunction : aggregationFunctions) {
           dataTableBuilder.startRow();
@@ -148,10 +167,10 @@ public class DataTableUtils {
         return dataTableBuilder.build();
       }
     } else {
-      // Aggregation only query.
+      // Aggregation only query
 
       String[] aggregationColumnNames = new String[numAggregations];
-      DataSchema.ColumnDataType[] columnDataTypes = new 
DataSchema.ColumnDataType[numAggregations];
+      ColumnDataType[] columnDataTypes = new ColumnDataType[numAggregations];
       Object[] aggregationResults = new Object[numAggregations];
       for (int i = 0; i < numAggregations; i++) {
         AggregationFunction aggregationFunction = aggregationFunctions[i];
@@ -162,7 +181,7 @@ public class DataTableUtils {
             
aggregationFunction.extractAggregationResult(aggregationFunction.createAggregationResultHolder());
       }
 
-      // Build the data table.
+      // Build the data table
       DataTableBuilder dataTableBuilder = new DataTableBuilder(new 
DataSchema(aggregationColumnNames, columnDataTypes));
       dataTableBuilder.startRow();
       for (int i = 0; i < numAggregations; i++) {
@@ -186,4 +205,32 @@ public class DataTableUtils {
       return dataTableBuilder.build();
     }
   }
+
+  /**
+   * Helper method to build an empty data table for distinct query.
+   */
+  private static DataTable buildEmptyDataTableForDistinctQuery(QueryContext 
queryContext)
+      throws IOException {
+    AggregationFunction[] aggregationFunctions = 
queryContext.getAggregationFunctions();
+    assert aggregationFunctions != null && aggregationFunctions.length == 1
+        && aggregationFunctions[0] instanceof DistinctAggregationFunction;
+    DistinctAggregationFunction distinctAggregationFunction = 
(DistinctAggregationFunction) aggregationFunctions[0];
+
+    // Create the distinct table
+    String[] columnNames = distinctAggregationFunction.getColumns();
+    ColumnDataType[] columnDataTypes = new ColumnDataType[columnNames.length];
+    // NOTE: Use STRING column data type as default for distinct query
+    Arrays.fill(columnDataTypes, ColumnDataType.STRING);
+    DistinctTable distinctTable =
+        new DistinctTable(new DataSchema(columnNames, columnDataTypes), 
Collections.emptySet());
+
+    // Build the data table
+    DataTableBuilder dataTableBuilder = new DataTableBuilder(
+        new DataSchema(new 
String[]{distinctAggregationFunction.getColumnName()},
+            new ColumnDataType[]{ColumnDataType.OBJECT}));
+    dataTableBuilder.startRow();
+    dataTableBuilder.setColumn(0, distinctTable);
+    dataTableBuilder.finishRow();
+    return dataTableBuilder.build();
+  }
 }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java
index 2b2a2a3..f2095ea 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java
@@ -21,12 +21,15 @@ package org.apache.pinot.core.common.datatable;
 import java.io.IOException;
 import java.util.Collections;
 import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
 import org.apache.pinot.common.utils.DataTable;
+import org.apache.pinot.core.query.distinct.DistinctTable;
 import org.apache.pinot.core.query.request.context.QueryContext;
 import 
org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils;
 import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
 
 
 public class DataTableUtilsTest {
@@ -40,7 +43,7 @@ public class DataTableUtilsTest {
     DataTable dataTable = DataTableUtils.buildEmptyDataTable(queryContext);
     DataSchema dataSchema = dataTable.getDataSchema();
     assertEquals(dataSchema.getColumnNames(), new String[]{"*"});
-    assertEquals(dataSchema.getColumnDataTypes(), new 
DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING});
+    assertEquals(dataSchema.getColumnDataTypes(), new 
ColumnDataType[]{ColumnDataType.STRING});
     assertEquals(dataTable.getNumberOfRows(), 0);
 
     // Aggregation
@@ -50,7 +53,7 @@ public class DataTableUtilsTest {
     dataSchema = dataTable.getDataSchema();
     assertEquals(dataSchema.getColumnNames(), new String[]{"count_star", 
"sum_a", "max_b"});
     assertEquals(dataSchema.getColumnDataTypes(),
-        new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.LONG, 
DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.DOUBLE});
+        new ColumnDataType[]{ColumnDataType.LONG, ColumnDataType.DOUBLE, 
ColumnDataType.DOUBLE});
     assertEquals(dataTable.getNumberOfRows(), 1);
     assertEquals(dataTable.getLong(0, 0), 0L);
     assertEquals(dataTable.getDouble(0, 1), 0.0);
@@ -62,8 +65,7 @@ public class DataTableUtilsTest {
     dataTable = DataTableUtils.buildEmptyDataTable(queryContext);
     dataSchema = dataTable.getDataSchema();
     assertEquals(dataSchema.getColumnNames(), new String[]{"functionName", 
"GroupByResultMap"});
-    assertEquals(dataSchema.getColumnDataTypes(),
-        new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, 
DataSchema.ColumnDataType.OBJECT});
+    assertEquals(dataSchema.getColumnDataTypes(), new 
ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.OBJECT});
     assertEquals(dataTable.getNumberOfRows(), 3);
     assertEquals(dataTable.getString(0, 0), "count_star");
     assertEquals(dataTable.getObject(0, 1), Collections.emptyMap());
@@ -79,7 +81,23 @@ public class DataTableUtilsTest {
     dataSchema = dataTable.getDataSchema();
     assertEquals(dataSchema.getColumnNames(), new String[]{"c", "d", 
"count(*)", "sum(a)", "max(b)"});
     assertEquals(dataSchema.getColumnDataTypes(),
-        new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, 
DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.LONG, 
DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.DOUBLE});
+        new ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.STRING, 
ColumnDataType.LONG, ColumnDataType.DOUBLE, ColumnDataType.DOUBLE});
     assertEquals(dataTable.getNumberOfRows(), 0);
+
+    // Distinct
+    queryContext =
+        QueryContextConverterUtils.getQueryContextFromPQL("SELECT DISTINCT(a, 
b) FROM table WHERE foo = 'bar'");
+    dataTable = DataTableUtils.buildEmptyDataTable(queryContext);
+    dataSchema = dataTable.getDataSchema();
+    assertEquals(dataSchema.getColumnNames(), new String[]{"distinct_a:b"});
+    assertEquals(dataSchema.getColumnDataTypes(), new 
ColumnDataType[]{ColumnDataType.OBJECT});
+    assertEquals(dataTable.getNumberOfRows(), 1);
+    Object firstObject = dataTable.getObject(0, 0);
+    assertTrue(firstObject instanceof DistinctTable);
+    DistinctTable distinctTable = (DistinctTable) firstObject;
+    assertEquals(distinctTable.size(), 0);
+    assertEquals(distinctTable.getDataSchema().getColumnNames(), new 
String[]{"a", "b"});
+    assertEquals(distinctTable.getDataSchema().getColumnDataTypes(),
+        new ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.STRING});
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to