This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 1a701b7 Support NOT LIKE and NOT BETWEEN (#8331) 1a701b7 is described below commit 1a701b7d52c5d571689d4b2f8159f8c0cdda0acd Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> AuthorDate: Wed Mar 9 19:30:08 2022 -0800 Support NOT LIKE and NOT BETWEEN (#8331) --- .../apache/pinot/sql/parsers/CalciteSqlParser.java | 32 ++-- .../core/operator/filter/FilterOperatorUtils.java | 2 +- .../core/operator/filter/NotFilterOperator.java | 2 +- .../org/apache/pinot/queries/BaseQueriesTest.java | 9 -- .../pinot/queries/NotOperatorQueriesTest.java | 167 +++++++-------------- 5 files changed, 78 insertions(+), 134 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java index 5c21ef0..11678f9 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java @@ -42,7 +42,9 @@ import org.apache.calcite.sql.SqlNumericLiteral; import org.apache.calcite.sql.SqlOrderBy; import org.apache.calcite.sql.SqlSelect; import org.apache.calcite.sql.SqlSelectKeyword; +import org.apache.calcite.sql.fun.SqlBetweenOperator; import org.apache.calcite.sql.fun.SqlCase; +import org.apache.calcite.sql.fun.SqlLikeOperator; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl; @@ -55,6 +57,7 @@ import org.apache.pinot.common.request.ExpressionType; import org.apache.pinot.common.request.Function; import org.apache.pinot.common.request.PinotQuery; import org.apache.pinot.common.utils.request.RequestUtils; +import org.apache.pinot.pql.parsers.pql2.ast.FilterKind; import org.apache.pinot.segment.spi.AggregationFunctionType; import org.apache.pinot.spi.utils.Pairs; import org.apache.pinot.sql.parsers.rewriter.QueryRewriter; @@ -152,7 +155,7 @@ public class CalciteSqlParser { if (isAggregateExpression(selectExpression)) { aggregateExprCount++; } else if (hasGroupByClause && expressionOutsideGroupByList(selectExpression, groupByExprs)) { - throw new SqlCompilationException( + throw new SqlCompilationException( "'" + RequestUtils.prettyPrint(selectExpression) + "' should appear in GROUP BY clause."); } } @@ -686,19 +689,21 @@ public class CalciteSqlParser { private static Expression compileFunctionExpression(SqlBasicCall functionNode) { SqlKind functionKind = functionNode.getKind(); + boolean negated = false; String functionName; switch (functionKind) { case AND: return compileAndExpression(functionNode); case OR: return compileOrExpression(functionNode); - case COUNT: - SqlLiteral functionQuantifier = functionNode.getFunctionQuantifier(); - if (functionQuantifier != null && functionQuantifier.toValue().equalsIgnoreCase("DISTINCT")) { - functionName = AggregationFunctionType.DISTINCTCOUNT.name(); - } else { - functionName = AggregationFunctionType.COUNT.name(); - } + // BETWEEN and LIKE might be negated (NOT BETWEEN, NOT LIKE) + case BETWEEN: + negated = ((SqlBetweenOperator) functionNode.getOperator()).isNegated(); + functionName = SqlKind.BETWEEN.name(); + break; + case LIKE: + negated = ((SqlLikeOperator) functionNode.getOperator()).isNegated(); + functionName = SqlKind.LIKE.name(); break; case OTHER: case OTHER_FUNCTION: @@ -740,7 +745,16 @@ public class CalciteSqlParser { validateFunction(functionName, operands); Expression functionExpression = RequestUtils.getFunctionExpression(functionName); functionExpression.getFunctionCall().setOperands(operands); - return functionExpression; + if (negated) { + Expression negatedFunctionExpression = RequestUtils.getFunctionExpression(FilterKind.NOT.name()); + // Do not use `Collections.singletonList()` because we might modify the operand later + List<Expression> negatedFunctionOperands = new ArrayList<>(1); + negatedFunctionOperands.add(functionExpression); + negatedFunctionExpression.getFunctionCall().setOperands(negatedFunctionOperands); + return negatedFunctionExpression; + } else { + return functionExpression; + } } /** diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java index 4b6feb0..4fa91aa 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java @@ -180,7 +180,7 @@ public class FilterOperatorUtils { return 4; } if (filterOperator instanceof NotFilterOperator) { - return getPriority((NotFilterOperator) ((NotFilterOperator) filterOperator).getChildFilterOperator()); + return getPriority(((NotFilterOperator) filterOperator).getChildFilterOperator()); } if (filterOperator instanceof ScanBasedFilterOperator) { return getScanBasedFilterPriority((ScanBasedFilterOperator) filterOperator, 5, debugOptions); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java index 50a03b8..f1aeb50 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java @@ -58,7 +58,7 @@ public class NotFilterOperator extends BaseFilterOperator { return new FilterBlock(new NotDocIdSet(_filterOperator.nextBlock().getBlockDocIdSet(), _numDocs)); } - public Operator getChildFilterOperator() { + public BaseFilterOperator getChildFilterOperator() { return _filterOperator; } } diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java index 0333985..e283978 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java @@ -241,15 +241,6 @@ public abstract class BaseQueriesTest { return brokerResponse; } - /** - * Run optimized SQL query on multiple index segments. - * <p>Use this to test the whole flow from server to broker. - * <p>The result should be equivalent to querying 4 identical index segments. - */ - protected BrokerResponseNative getBrokerResponseForOptimizedSqlQuery(String sqlQuery, @Nullable Schema schema) { - return getBrokerResponseForOptimizedSqlQuery(sqlQuery, null, schema, PLAN_MAKER); - } - protected BrokerResponseNative getBrokerResponseForOptimizedSqlQuery(String sqlQuery, @Nullable TableConfig config, @Nullable Schema schema) { return getBrokerResponseForOptimizedSqlQuery(sqlQuery, config, schema, PLAN_MAKER); diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java index ca00dd5..ccb9626 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java @@ -19,42 +19,38 @@ package org.apache.pinot.queries; import java.io.File; -import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashSet; import java.util.List; -import java.util.Set; import org.apache.commons.io.FileUtils; import org.apache.pinot.common.response.broker.BrokerResponseNative; import org.apache.pinot.common.response.broker.ResultTable; -import org.apache.pinot.core.common.Operator; -import org.apache.pinot.core.operator.blocks.IntermediateResultsBlock; +import org.apache.pinot.core.operator.query.AggregationOperator; import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl; -import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader; import org.apache.pinot.segment.spi.ImmutableSegment; import org.apache.pinot.segment.spi.IndexSegment; import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; -import org.apache.pinot.spi.config.table.FSTType; -import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.readers.GenericRow; import org.apache.pinot.spi.data.readers.RecordReader; +import org.apache.pinot.spi.utils.ReadMode; import org.apache.pinot.spi.utils.builder.TableConfigBuilder; -import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + public class NotOperatorQueriesTest extends BaseQueriesTest { private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "NotOperatorQueriesTest"); - private static final String TABLE_NAME = "MyTable"; + private static final String TABLE_NAME = "testTable"; private static final String SEGMENT_NAME = "testSegment"; private static final String FIRST_INT_COL_NAME = "FIRST_INT_COL"; private static final String SECOND_INT_COL_NAME = "SECOND_INT_COL"; @@ -85,21 +81,10 @@ public class NotOperatorQueriesTest extends BaseQueriesTest { throws Exception { FileUtils.deleteQuietly(INDEX_DIR); - List<IndexSegment> segments = new ArrayList<>(); buildSegment(); - IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); - Set<String> invertedIndexCols = new HashSet<>(); - invertedIndexCols.add(FIRST_INT_COL_NAME); - indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols); - Set<String> fstIndexCols = new HashSet<>(); - fstIndexCols.add(DOMAIN_NAMES_COL); - indexLoadingConfig.setFSTIndexColumns(fstIndexCols); - indexLoadingConfig.setFSTIndexType(FSTType.LUCENE); - ImmutableSegment segment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig); - segments.add(segment); - - _indexSegment = segment; - _indexSegments = segments; + ImmutableSegment immutableSegment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), ReadMode.mmap); + _indexSegment = immutableSegment; + _indexSegments = Arrays.asList(immutableSegment, immutableSegment); } private List<String> getDomainNames() { @@ -109,15 +94,15 @@ public class NotOperatorQueriesTest extends BaseQueriesTest { "www.sd.domain2.co.bc", "www.sd.domain2.co.cd"); } - private List<GenericRow> createTestData(int numRows) { + private List<GenericRow> createTestData() { List<GenericRow> rows = new ArrayList<>(); List<String> domainNames = getDomainNames(); - for (int i = 0; i < numRows; i++) { + for (int i = 0; i < NUM_ROWS; i++) { String domain = domainNames.get(i % domainNames.size()); GenericRow row = new GenericRow(); - row.putField(FIRST_INT_COL_NAME, i); - row.putField(SECOND_INT_COL_NAME, INT_BASE_VALUE + i); - row.putField(DOMAIN_NAMES_COL, domain); + row.putValue(FIRST_INT_COL_NAME, i); + row.putValue(SECOND_INT_COL_NAME, INT_BASE_VALUE + i); + row.putValue(DOMAIN_NAMES_COL, domain); rows.add(row); } return rows; @@ -125,15 +110,8 @@ public class NotOperatorQueriesTest extends BaseQueriesTest { private void buildSegment() throws Exception { - List<GenericRow> rows = createTestData(NUM_ROWS); - List<FieldConfig> fieldConfigs = new ArrayList<>(); - fieldConfigs.add( - new FieldConfig(DOMAIN_NAMES_COL, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null, null)); - fieldConfigs.add( - new FieldConfig(FIRST_INT_COL_NAME, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.INVERTED, null, - null)); - TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) - .setInvertedIndexColumns(Arrays.asList(FIRST_INT_COL_NAME)).setFieldConfigList(fieldConfigs).build(); + List<GenericRow> rows = createTestData(); + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build(); Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) .addSingleValueDimension(FIRST_INT_COL_NAME, FieldSpec.DataType.INT) .addSingleValueDimension(DOMAIN_NAMES_COL, FieldSpec.DataType.STRING) @@ -156,92 +134,53 @@ public class NotOperatorQueriesTest extends BaseQueriesTest { FileUtils.deleteQuietly(INDEX_DIR); } - private void testSelectionResults(String query, int expectedResultSize, List<Serializable[]> expectedResults) { - Operator<IntermediateResultsBlock> operator = getOperatorForSqlQuery(query); - IntermediateResultsBlock operatorResult = operator.nextBlock(); - List<Object[]> resultset = (List<Object[]>) operatorResult.getSelectionResult(); - Assert.assertNotNull(resultset); - Assert.assertEquals(resultset.size(), expectedResultSize); - if (expectedResults != null) { - for (int i = 0; i < expectedResultSize; i++) { - Object[] actualRow = resultset.get(i); - Object[] expectedRow = expectedResults.get(i); - Assert.assertEquals(actualRow.length, expectedRow.length); - for (int j = 0; j < actualRow.length; j++) { - Object actualColValue = actualRow[j]; - Object expectedColValue = expectedRow[j]; - Assert.assertEquals(actualColValue, expectedColValue); - } - } - } - } - - private void testOptimizedQueryHelper(String query, int expectedResultSize, List<Serializable[]> expectedResults) { - BrokerResponseNative brokerResponseNative = getBrokerResponseForOptimizedSqlQuery(query, null); - ResultTable resultTable = brokerResponseNative.getResultTable(); - List<Object[]> results = resultTable.getRows(); - Assert.assertEquals(results.size(), expectedResultSize); - - if (expectedResults != null) { - for (int i = 0; i < expectedResults.size(); i++) { - Object[] actualRow = results.get(i); - Serializable[] expectedRow = expectedResults.get(i); - Assert.assertEquals(actualRow, expectedRow); - } - } + private void testNotOperator(String filter, long expectedSegmentResult) { + String query = "SELECT COUNT(*) FROM testTable WHERE " + filter; + AggregationOperator aggregationOperator = getOperatorForSqlQuery(query); + List<Object> segmentResults = aggregationOperator.nextBlock().getAggregationResult(); + assertNotNull(segmentResults); + assertEquals((long) segmentResults.get(0), expectedSegmentResult); + + BrokerResponseNative brokerResponse = getBrokerResponseForSqlQuery(query); + ResultTable resultTable = brokerResponse.getResultTable(); + Object[] brokerResults = resultTable.getRows().get(0); + assertEquals((long) brokerResults[0], 4 * expectedSegmentResult); } @Test - public void testLikeBasedNotOperator() { - String query = - "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.domain1.*') LIMIT " - + "50000"; - testSelectionResults(query, 768, null); - testOptimizedQueryHelper(query, 1536, null); - - query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.sd.domain1.*') " - + "LIMIT 50000"; - testSelectionResults(query, 768, null); - testOptimizedQueryHelper(query, 1536, null); - - query = - "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain1.*') LIMIT " - + "50000"; - testSelectionResults(query, 512, null); - testOptimizedQueryHelper(query, 1024, null); - - query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain.*') LIMIT " - + "50000"; - testSelectionResults(query, 0, null); - testOptimizedQueryHelper(query, 0, null); - - query = - "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT REGEXP_LIKE(DOMAIN_NAMES, '.*com') LIMIT 50000"; - testSelectionResults(query, 768, null); - testOptimizedQueryHelper(query, 1536, null); + public void testLikePredicates() { + testNotOperator("DOMAIN_NAMES NOT LIKE 'www.domain1%'", 768); + testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.domain1.*')", 768); + + testNotOperator("DOMAIN_NAMES NOT LIKE 'www.sd.domain1%'", 768); + testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.sd.domain1.*')", 768); + + testNotOperator("DOMAIN_NAMES NOT LIKE '%domain1%'", 512); + testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain1.*')", 512); + + testNotOperator("DOMAIN_NAMES NOT LIKE '%domain%'", 0); + testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain.*')", 0); + + testNotOperator("DOMAIN_NAMES NOT LIKE '%com'", 768); + testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, '.*com')", 768); } @Test - public void testWeirdPredicates() { - String query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT FIRST_INT_COL = 5 LIMIT 50000"; - testSelectionResults(query, 1023, null); + public void testRangePredicates() { + testNotOperator("NOT FIRST_INT_COL = 5", 1023); + testNotOperator("NOT FIRST_INT_COL < 5", 1019); + testNotOperator("NOT FIRST_INT_COL > 5", 6); - query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT FIRST_INT_COL < 5 LIMIT " + "50000"; - testSelectionResults(query, 1019, null); - - query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT FIRST_INT_COL > 5 LIMIT " + "50000"; - testSelectionResults(query, 6, null); + testNotOperator("FIRST_INT_COL NOT BETWEEN 10 AND 20", 1013); + testNotOperator("NOT FIRST_INT_COL BETWEEN 10 AND 20", 1013); } @Test public void testCompositePredicates() { - String query = - "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT (FIRST_INT_COL > 5 AND SECOND_INT_COL < 1009) " - + "LIMIT " + "50000"; - testSelectionResults(query, 1021, null); - - query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT (FIRST_INT_COL < 5 OR SECOND_INT_COL > 2000) " - + "LIMIT " + "50000"; - testSelectionResults(query, 996, null); + testNotOperator("NOT (FIRST_INT_COL > 5 AND SECOND_INT_COL < 1009)", 1021); + testNotOperator("NOT FIRST_INT_COL > 5 OR NOT SECOND_INT_COL < 1009", 1021); + + testNotOperator("NOT (FIRST_INT_COL < 5 OR SECOND_INT_COL > 2000)", 996); + testNotOperator("NOT FIRST_INT_COL < 5 AND NOT SECOND_INT_COL > 2000", 996); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org