This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch rewrite-non-groupby-to-distinct in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit b1c65e7746af7e310bf733127be5e84db407fdcd Author: Xiang Fu <fx19880...@gmail.com> AuthorDate: Wed Jul 8 21:29:41 2020 -0700 Rewrite non-aggregate group by query to distinct query --- .../apache/pinot/sql/parsers/CalciteSqlParser.java | 30 ++++++++++++ .../pinot/sql/parsers/CalciteSqlCompilerTest.java | 54 ++++++++++++++++++++++ .../tests/ClusterIntegrationTestUtils.java | 12 ++++- .../tests/OfflineClusterIntegrationTest.java | 41 ++++++++++++++++ 4 files changed, 136 insertions(+), 1 deletion(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java index de14a51..fffb5bd 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java @@ -19,6 +19,8 @@ package org.apache.pinot.sql.parsers; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -331,12 +333,40 @@ public class CalciteSqlParser { pinotQuery.setFilterExpression(updatedFilterExpression); } + // Rewrite GroupBy to Distinct + rewriteGroupByToDistinct(pinotQuery); + // Update alias Map<Identifier, Expression> aliasMap = extractAlias(pinotQuery.getSelectList()); applyAlias(aliasMap, pinotQuery); validate(aliasMap, pinotQuery); } + private static void rewriteGroupByToDistinct(PinotQuery pinotQuery) { + boolean hasAggregation = false; + for (Expression select : pinotQuery.getSelectList()) { + if (isAggregateExpression(select)) { + hasAggregation = true; + } + } + if (!hasAggregation && pinotQuery.getGroupByListSize() > 0) { + Set<String> selectIdentifiers = extractIdentifiers(pinotQuery.getSelectList()); + Set<String> groupByIdentifiers = extractIdentifiers(pinotQuery.getGroupByList()); + if (groupByIdentifiers.containsAll(selectIdentifiers)) { + Expression distinctExpression = RequestUtils.getFunctionExpression("DISTINCT"); + for (Expression select : pinotQuery.getSelectList()) { + distinctExpression.getFunctionCall().addToOperands(select); + } + pinotQuery.setSelectList(Arrays.asList(distinctExpression)); + pinotQuery.setGroupByList(Collections.emptyList()); + } else { + selectIdentifiers.removeAll(groupByIdentifiers); + throw new SqlCompilationException(String.format("For non-aggregation group by query, all the identifiers in select clause should be in groupBys. Found identifier: %s", + Arrays.toString(selectIdentifiers.toArray(new String[0])))); + } + } + } + private static void invokeCompileTimeFunctions(PinotQuery pinotQuery) { for (int i = 0; i < pinotQuery.getSelectListSize(); i++) { Expression expression = invokeCompileTimeFunctionExpression(pinotQuery.getSelectList().get(i)); diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java index 826741c..3b84937 100644 --- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java @@ -1724,4 +1724,58 @@ public class CalciteSqlCompilerTest { Assert.assertEquals(brokerRequest.getFilterQuery().getOperator(), FilterOperator.IS_NULL); Assert.assertEquals(brokerRequest.getFilterQuery().getColumn(), "col"); } + + @Test + public void testNonAggregationGroupByQuery() { + PinotQuery2BrokerRequestConverter converter = new PinotQuery2BrokerRequestConverter(); + String query = "SELECT col1 FROM foo GROUP BY col1"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + BrokerRequest brokerRequest = converter.convert(pinotQuery); + Assert.assertEquals(pinotQuery.getSelectListSize(), 1); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1"); + + Assert.assertEquals(brokerRequest.getAggregationsInfo().size(), 1); + Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationType().toUpperCase(), "DISTINCT"); + Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationParams().get("column"), "col1"); + + query = "SELECT col1, col2 FROM foo GROUP BY col1, col2"; + pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + brokerRequest = converter.convert(pinotQuery); + Assert.assertEquals(pinotQuery.getSelectListSize(), 1); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1"); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "col2"); + + Assert.assertEquals(brokerRequest.getAggregationsInfo().size(), 1); + Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationType().toUpperCase(), "DISTINCT"); + Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationParams().get("column"), "col1:col2"); + + query = "SELECT col1+col2*5 FROM foo GROUP BY col1, col2"; + pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + brokerRequest = converter.convert(pinotQuery); + Assert.assertEquals(pinotQuery.getSelectListSize(), 1); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperator(), "PLUS"); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1"); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperator(), "TIMES"); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col2"); + Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 5L); + + Assert.assertEquals(brokerRequest.getAggregationsInfo().size(), 1); + Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationType().toUpperCase(), "DISTINCT"); + Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationParams().get("column"), "plus(col1,times(col2,'5'))"); + } + + @Test(expectedExceptions = SqlCompilationException.class) + public void testInvalidNonAggregationGroupBy() { + // Not support Aggregation functions in case statements. + try { + CalciteSqlParser.compileToPinotQuery("SELECT col1+col2 FROM foo GROUP BY col1"); + } catch (SqlCompilationException e) { + Assert.assertEquals(e.getMessage(), + "For non-aggregation group by query, all the identifiers in select clause should be in groupBys. Found identifier: [col2]"); + throw e; + } + } } \ No newline at end of file diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java index 64b1c47..85c15e7 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java @@ -857,7 +857,7 @@ public class ClusterIntegrationTestUtils { // compare results BrokerRequest brokerRequest = PinotQueryParserFactory.get(CommonConstants.Broker.Request.SQL).compileToBrokerRequest(pinotQuery); - if (brokerRequest.getSelections() != null) { // selection + if (isSelectionQuery(brokerRequest)) { // selection // TODO: compare results for selection queries, w/o order by // Compare results for selection queries, with order by @@ -962,6 +962,16 @@ public class ClusterIntegrationTestUtils { } } + private static boolean isSelectionQuery(BrokerRequest brokerRequest) { + if (brokerRequest.getSelections() != null) { + return true; + } + if (brokerRequest.getAggregationsInfo() != null && brokerRequest.getAggregationsInfo().get(0).getAggregationType().equalsIgnoreCase("DISTINCT")) { + return true; + } + return false; + } + private static boolean fuzzyCompare(String h2Value, String brokerValue, String connectionValue) { // Fuzzy compare expected value and actual value boolean error = false; diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java index 3cde6b5..774f209 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java @@ -1074,18 +1074,59 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet String pql = "SELECT DISTINCT(Carrier) FROM mytable LIMIT 1000000"; String sql = "SELECT DISTINCT Carrier FROM mytable"; testQuery(pql, Collections.singletonList(sql)); + pql = "SELECT DISTINCT Carrier FROM mytable LIMIT 1000000"; + testSqlQuery(pql, Collections.singletonList(sql)); pql = "SELECT DISTINCT(Carrier, DestAirportID) FROM mytable LIMIT 1000000"; sql = "SELECT DISTINCT Carrier, DestAirportID FROM mytable"; testQuery(pql, Collections.singletonList(sql)); + pql = "SELECT DISTINCT Carrier, DestAirportID FROM mytable LIMIT 1000000"; + testSqlQuery(pql, Collections.singletonList(sql)); pql = "SELECT DISTINCT(Carrier, DestAirportID, DestStateName) FROM mytable LIMIT 1000000"; sql = "SELECT DISTINCT Carrier, DestAirportID, DestStateName FROM mytable"; testQuery(pql, Collections.singletonList(sql)); + pql = "SELECT DISTINCT Carrier, DestAirportID, DestStateName FROM mytable LIMIT 1000000"; + testSqlQuery(pql, Collections.singletonList(sql)); pql = "SELECT DISTINCT(Carrier, DestAirportID, DestCityName) FROM mytable LIMIT 1000000"; sql = "SELECT DISTINCT Carrier, DestAirportID, DestCityName FROM mytable"; testQuery(pql, Collections.singletonList(sql)); + pql = "SELECT DISTINCT Carrier, DestAirportID, DestCityName FROM mytable LIMIT 1000000"; + testSqlQuery(pql, Collections.singletonList(sql)); + } + + @Test + public void testNonAggregationGroupByQuery() + throws Exception { + // by default 10 rows will be returned, so use high limit + String pql = "SELECT Carrier FROM mytable GROUP BY Carrier LIMIT 1000000"; + String sql = "SELECT Carrier FROM mytable GROUP BY Carrier"; + testSqlQuery(pql, Collections.singletonList(sql)); + + pql = "SELECT Carrier, DestAirportID FROM mytable GROUP BY Carrier, DestAirportID LIMIT 1000000"; + sql = "SELECT Carrier, DestAirportID FROM mytable GROUP BY Carrier, DestAirportID"; + testSqlQuery(pql, Collections.singletonList(sql)); + + pql = "SELECT Carrier, DestAirportID, DestStateName FROM mytable GROUP BY Carrier, DestAirportID, DestStateName LIMIT 1000000"; + sql = "SELECT Carrier, DestAirportID, DestStateName FROM mytable GROUP BY Carrier, DestAirportID, DestStateName"; + testSqlQuery(pql, Collections.singletonList(sql)); + + pql = "SELECT Carrier, DestAirportID, DestCityName FROM mytable GROUP BY Carrier, DestAirportID, DestCityName LIMIT 1000000"; + sql = "SELECT Carrier, DestAirportID, DestCityName FROM mytable GROUP BY Carrier, DestAirportID, DestCityName"; + testSqlQuery(pql, Collections.singletonList(sql)); + + pql = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000"; + sql = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime"; + testSqlQuery(pql, Collections.singletonList(sql)); + + pql = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000"; + sql = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime"; + testSqlQuery(pql, Collections.singletonList(sql)); + + pql = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime LIMIT 1000000"; + sql = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime"; + testSqlQuery(pql, Collections.singletonList(sql)); } @Test --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org