This is an automated email from the ASF dual-hosted git repository. siddteotia pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new ea2f0aa [7867] Handle Select * with Extra Columns (#7959) ea2f0aa is described below commit ea2f0aa641e17301293662c8e79dfd94d8568438 Author: Prashant Pandey <84911643+suddend...@users.noreply.github.com> AuthorDate: Wed Feb 2 11:29:49 2022 +0530 [7867] Handle Select * with Extra Columns (#7959) * Expand '*' with other cols * added license header * Additional UTs * Additional UTs for happy case * linter * Checkstyle * Fix failing UT * Use imperative code instead of stream APIs for performance while expanding '*' * Expand * in natural ordering of columns * Linter fixes * Minor refactoring * Don't dedup columns, expand '*' request without other columns on broker * Add test description * Renamed from `expandStarExpressionToActualColumns` to `expandStarExpressionsToActualColumns` * Create new selections list instead of modifying the original * Update pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java Co-authored-by: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> Co-authored-by: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> --- .../requesthandler/BaseBrokerRequestHandler.java | 34 +++ .../SelectStarWithOtherColsRewriteTest.java | 276 +++++++++++++++++++++ 2 files changed, 310 insertions(+) diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java index 7d026af..192ddd1 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java @@ -106,6 +106,7 @@ public abstract class BaseBrokerRequestHandler implements BrokerRequestHandler { private static final String IN_SUBQUERY = "inSubquery"; private static final Expression FALSE = RequestUtils.getLiteralExpression(false); private static final Expression TRUE = RequestUtils.getLiteralExpression(true); + private static final Expression STAR = RequestUtils.getIdentifierExpression("*"); protected final PinotConfiguration _config; protected final RoutingManager _routingManager; @@ -1459,8 +1460,17 @@ public abstract class BaseBrokerRequestHandler implements BrokerRequestHandler { Map<String, String> columnNameMap) { Map<String, String> aliasMap = new HashMap<>(); if (pinotQuery != null) { + boolean hasStar = false; for (Expression expression : pinotQuery.getSelectList()) { fixColumnName(rawTableName, expression, columnNameMap, aliasMap, isCaseInsensitive); + //check if the select expression is '*' + if (!hasStar && expression.equals(STAR)) { + hasStar = true; + } + } + //if query has a '*' selection along with other columns + if (hasStar) { + expandStarExpressionsToActualColumns(pinotQuery, columnNameMap); } Expression filterExpression = pinotQuery.getFilterExpression(); if (filterExpression != null) { @@ -1487,6 +1497,30 @@ public abstract class BaseBrokerRequestHandler implements BrokerRequestHandler { } } + private static void expandStarExpressionsToActualColumns(PinotQuery pinotQuery, Map<String, String> columnNameMap) { + List<Expression> originalSelections = pinotQuery.getSelectList(); + //expand '*' + List<Expression> expandedSelections = new ArrayList<>(); + for (String tableCol : columnNameMap.values()) { + Expression newSelection = RequestUtils.createIdentifierExpression(tableCol); + //we exclude default virtual columns + if (tableCol.charAt(0) != '$') { + expandedSelections.add(newSelection); + } + } + //sort naturally + expandedSelections.sort(null); + List<Expression> newSelections = new ArrayList<>(); + for (Expression originalSelection : originalSelections) { + if (originalSelection.equals(STAR)) { + newSelections.addAll(expandedSelections); + } else { + newSelections.add(originalSelection); + } + } + pinotQuery.setSelectList(newSelections); + } + /** * Fixes the column names to the actual column names in the given broker request. */ diff --git a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/SelectStarWithOtherColsRewriteTest.java b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/SelectStarWithOtherColsRewriteTest.java new file mode 100644 index 0000000..90e0f12 --- /dev/null +++ b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/SelectStarWithOtherColsRewriteTest.java @@ -0,0 +1,276 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pinot.broker.requesthandler; + +import com.google.common.collect.ImmutableMap; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.pinot.common.request.Expression; +import org.apache.pinot.common.request.PinotQuery; +import org.apache.pinot.sql.parsers.CalciteSqlParser; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class SelectStarWithOtherColsRewriteTest { + + private static final Map<String, String> COL_MAP; + + static { + //build table schema + ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); + builder.put("playerID", "playerID"); + builder.put("homeRuns", "homeRuns"); + builder.put("playerStint", "playerStint"); + builder.put("groundedIntoDoublePlays", "groundedIntoDoublePlays"); + builder.put("G_old", "G_old"); + builder.put("$segmentName", "$segmentName"); + builder.put("$docId", "$docId"); + builder.put("$hostName", "$hostName"); + COL_MAP = builder.build(); + } + + /** + * When the query contains only '*', it should be expanded into columns. + */ + @Test + public void testShouldExpandWhenOnlyStarIsSelected() { + String sql = "SELECT * FROM baseballStats"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); + List<Expression> newSelections = pinotQuery.getSelectList(); + Map<String, Integer> countMap = new HashMap<>(); + for (Expression selection : newSelections) { + String col = selection.getIdentifier().getName(); + countMap.put(col, countMap.getOrDefault(col, 0) + 1); + } + Assert.assertEquals(countMap.size(), 5, "More new selections than expected"); + Assert.assertTrue(countMap.keySet().containsAll(COL_MAP.keySet().stream().filter(a -> !a.startsWith("$")).collect( + Collectors.toList())), "New selections contain virtual columns"); + countMap.forEach( + (key, value) -> Assert.assertEquals((int) value, 1, key + " has more than one occurrences in new selection")); + } + + /** + * Expansion should not contain any virtual columns + */ + @Test + public void testShouldNotReturnExtraDefaultColumns() { + String sql = "SELECT $docId,*,$segmentName FROM baseballStats"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); + List<Expression> newSelections = pinotQuery.getSelectList(); + int docIdCnt = 0; + int segmentNameCnt = 0; + for (Expression newSelection : newSelections) { + String colName = newSelection.getIdentifier().getName(); + switch (colName) { + case "$docId": + docIdCnt++; + break; + case "$segmentName": + segmentNameCnt++; + break; + case "$hostName": + throw new RuntimeException("Extra default column returned"); + default: + } + } + Assert.assertEquals(docIdCnt, 1); + Assert.assertEquals(segmentNameCnt, 1); + } + + /** + * Columns should not be deduped + */ + @Test + public void testShouldNotDedupMultipleRequestedColumns() { + String sql = "SELECT playerID,*,G_old FROM baseballStats"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); + List<Expression> newSelections = pinotQuery.getSelectList(); + int playerIdCnt = 0; + int goldCount = 0; + for (Expression newSelection : newSelections) { + String colName = newSelection.getIdentifier().getName(); + switch (colName) { + case "playerID": + playerIdCnt++; + break; + case "G_old": + goldCount++; + break; + default: + } + } + Assert.assertEquals(playerIdCnt, 2, "playerID does not occur once"); + Assert.assertEquals(goldCount, 2, "G_old occurs does not occur once"); + } + + /** + * Selections should be returned in the requested order + */ + @Test + public void testSelectionOrder() { + String sql = "SELECT playerID,*,G_old FROM baseballStats"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); + List<Expression> newSelections = pinotQuery.getSelectList(); + Assert.assertEquals(newSelections.get(0).getIdentifier().getName(), "playerID"); + Assert.assertEquals(newSelections.get(newSelections.size() - 1).getIdentifier().getName(), "G_old"); + //the expanded list should be alphabetically sorted + List<Expression> expandedSelections = newSelections.subList(1, newSelections.size() - 1); + List<Expression> assumedUnsortedSelections = new ArrayList<>(expandedSelections); + //sort alphabetically + assumedUnsortedSelections.sort(null); + Assert.assertEquals(expandedSelections, assumedUnsortedSelections, "Expanded selections not sorted alphabetically"); + } + + /** + * When the same column is requested twice, once with an alias and once as part of *, then it should be returned twice + */ + @Test + public void testAliasing() { + String sql = "SELECT playerID as pid,* FROM baseballStats"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); + List<Expression> newSelections = pinotQuery.getSelectList(); + Assert.assertTrue(newSelections.get(0).isSetFunctionCall()); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "AS"); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), + "playerID"); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "pid"); + boolean playerIdPresent = false; + for (int i = 1; i < newSelections.size(); i++) { + if (newSelections.get(i).getIdentifier().getName().equals("playerID")) { + playerIdPresent = true; + break; + } + } + Assert.assertTrue(playerIdPresent, "playerID col is missing"); + } + + /** + * When a function is applied to a column, then that col is returned along with the original column + */ + @Test + public void testFuncOnColumns1() { + String sql = "SELECT sqrt(homeRuns),* FROM baseballStats"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); + List<Expression> newSelections = pinotQuery.getSelectList(); + Assert.assertTrue(newSelections.get(0).isSetFunctionCall()); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "SQRT"); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), + "homeRuns"); + //homeRuns is returned as well + int homeRunsCnt = 0; + for (Expression selection : newSelections) { + if (selection.isSetIdentifier() && selection.getIdentifier().getName().equals("homeRuns")) { + homeRunsCnt++; + } + } + Assert.assertEquals(homeRunsCnt, 1); + } + + /** + * When a function is applied to a column, then that col is returned along with the original column + */ + @Test + public void testFuncOnColumns2() { + String sql = "SELECT add(homeRuns,groundedIntoDoublePlays),* FROM baseballStats"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); + List<Expression> newSelections = pinotQuery.getSelectList(); + Assert.assertTrue(newSelections.get(0).isSetFunctionCall()); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "ADD"); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), + "homeRuns"); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), + "groundedIntoDoublePlays"); + //homeRuns is returned as well + int homeRunsCnt = 0; + int groundedIntoDoublePlaysCnt = 0; + for (Expression selection : newSelections) { + if (selection.isSetIdentifier() && selection.getIdentifier().getName().equals("homeRuns")) { + homeRunsCnt++; + } else if (selection.isSetIdentifier() && selection.getIdentifier().getName().equals("groundedIntoDoublePlays")) { + groundedIntoDoublePlaysCnt++; + } + } + Assert.assertEquals(homeRunsCnt, 1); + Assert.assertEquals(groundedIntoDoublePlaysCnt, 1); + } + + /** + * When 'n' no. of unqualified * are present, then each column is returned 'n' times. + */ + @Test + public void testMultipleUnqualifiedStars() { + String sql = "SELECT *,* FROM baseballStats"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); + List<Expression> newSelections = pinotQuery.getSelectList(); + Assert.assertEquals(newSelections.get(0).getIdentifier().getName(), "G_old"); + Assert.assertEquals(newSelections.get(1).getIdentifier().getName(), "groundedIntoDoublePlays"); + Assert.assertEquals(newSelections.get(2).getIdentifier().getName(), "homeRuns"); + Assert.assertEquals(newSelections.get(3).getIdentifier().getName(), "playerID"); + Assert.assertEquals(newSelections.get(4).getIdentifier().getName(), "playerStint"); + Assert.assertEquals(newSelections.get(5).getIdentifier().getName(), "G_old"); + Assert.assertEquals(newSelections.get(6).getIdentifier().getName(), "groundedIntoDoublePlays"); + Assert.assertEquals(newSelections.get(7).getIdentifier().getName(), "homeRuns"); + Assert.assertEquals(newSelections.get(8).getIdentifier().getName(), "playerID"); + Assert.assertEquals(newSelections.get(9).getIdentifier().getName(), "playerStint"); + } + + @Test + public void testAll() { + String sql = + "SELECT abs(homeRuns),sqrt(groundedIntoDoublePlays),*,$segmentName,$hostName,playerStint as pstint,playerID " + + "FROM baseballStats"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); + List<Expression> newSelections = pinotQuery.getSelectList(); + Assert.assertTrue(newSelections.get(0).isSetFunctionCall()); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "ABS"); + Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), + "homeRuns"); + Assert.assertTrue(newSelections.get(1).isSetFunctionCall()); + Assert.assertEquals(newSelections.get(1).getFunctionCall().getOperator(), "SQRT"); + Assert.assertEquals(newSelections.get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), + "groundedIntoDoublePlays"); + Assert.assertEquals(newSelections.get(2).getIdentifier().getName(), "G_old"); + Assert.assertEquals(newSelections.get(3).getIdentifier().getName(), "groundedIntoDoublePlays"); + Assert.assertEquals(newSelections.get(4).getIdentifier().getName(), "homeRuns"); + Assert.assertEquals(newSelections.get(5).getIdentifier().getName(), "playerID"); + Assert.assertEquals(newSelections.get(6).getIdentifier().getName(), "playerStint"); + Assert.assertEquals(newSelections.get(7).getIdentifier().getName(), "$segmentName"); + Assert.assertEquals(newSelections.get(8).getIdentifier().getName(), "$hostName"); + Assert.assertEquals(newSelections.get(9).getFunctionCall().getOperator(), "AS"); + Assert.assertEquals(newSelections.get(9).getFunctionCall().getOperands().get(0).getIdentifier().getName(), + "playerStint"); + Assert.assertEquals(newSelections.get(9).getFunctionCall().getOperands().get(1).getIdentifier().getName(), + "pstint"); + Assert.assertEquals(newSelections.get(10).getIdentifier().getName(), "playerID"); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org