This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 1a701b7  Support NOT LIKE and NOT BETWEEN (#8331)
1a701b7 is described below

commit 1a701b7d52c5d571689d4b2f8159f8c0cdda0acd
Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com>
AuthorDate: Wed Mar 9 19:30:08 2022 -0800

    Support NOT LIKE and NOT BETWEEN (#8331)
---
 .../apache/pinot/sql/parsers/CalciteSqlParser.java |  32 ++--
 .../core/operator/filter/FilterOperatorUtils.java  |   2 +-
 .../core/operator/filter/NotFilterOperator.java    |   2 +-
 .../org/apache/pinot/queries/BaseQueriesTest.java  |   9 --
 .../pinot/queries/NotOperatorQueriesTest.java      | 167 +++++++--------------
 5 files changed, 78 insertions(+), 134 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java 
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
index 5c21ef0..11678f9 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
@@ -42,7 +42,9 @@ import org.apache.calcite.sql.SqlNumericLiteral;
 import org.apache.calcite.sql.SqlOrderBy;
 import org.apache.calcite.sql.SqlSelect;
 import org.apache.calcite.sql.SqlSelectKeyword;
+import org.apache.calcite.sql.fun.SqlBetweenOperator;
 import org.apache.calcite.sql.fun.SqlCase;
+import org.apache.calcite.sql.fun.SqlLikeOperator;
 import org.apache.calcite.sql.parser.SqlParseException;
 import org.apache.calcite.sql.parser.SqlParser;
 import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl;
@@ -55,6 +57,7 @@ import org.apache.pinot.common.request.ExpressionType;
 import org.apache.pinot.common.request.Function;
 import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.common.utils.request.RequestUtils;
+import org.apache.pinot.pql.parsers.pql2.ast.FilterKind;
 import org.apache.pinot.segment.spi.AggregationFunctionType;
 import org.apache.pinot.spi.utils.Pairs;
 import org.apache.pinot.sql.parsers.rewriter.QueryRewriter;
@@ -152,7 +155,7 @@ public class CalciteSqlParser {
       if (isAggregateExpression(selectExpression)) {
         aggregateExprCount++;
       } else if (hasGroupByClause && 
expressionOutsideGroupByList(selectExpression, groupByExprs)) {
-          throw new SqlCompilationException(
+        throw new SqlCompilationException(
             "'" + RequestUtils.prettyPrint(selectExpression) + "' should 
appear in GROUP BY clause.");
       }
     }
@@ -686,19 +689,21 @@ public class CalciteSqlParser {
 
   private static Expression compileFunctionExpression(SqlBasicCall 
functionNode) {
     SqlKind functionKind = functionNode.getKind();
+    boolean negated = false;
     String functionName;
     switch (functionKind) {
       case AND:
         return compileAndExpression(functionNode);
       case OR:
         return compileOrExpression(functionNode);
-      case COUNT:
-        SqlLiteral functionQuantifier = functionNode.getFunctionQuantifier();
-        if (functionQuantifier != null && 
functionQuantifier.toValue().equalsIgnoreCase("DISTINCT")) {
-          functionName = AggregationFunctionType.DISTINCTCOUNT.name();
-        } else {
-          functionName = AggregationFunctionType.COUNT.name();
-        }
+      // BETWEEN and LIKE might be negated (NOT BETWEEN, NOT LIKE)
+      case BETWEEN:
+        negated = ((SqlBetweenOperator) 
functionNode.getOperator()).isNegated();
+        functionName = SqlKind.BETWEEN.name();
+        break;
+      case LIKE:
+        negated = ((SqlLikeOperator) functionNode.getOperator()).isNegated();
+        functionName = SqlKind.LIKE.name();
         break;
       case OTHER:
       case OTHER_FUNCTION:
@@ -740,7 +745,16 @@ public class CalciteSqlParser {
     validateFunction(functionName, operands);
     Expression functionExpression = 
RequestUtils.getFunctionExpression(functionName);
     functionExpression.getFunctionCall().setOperands(operands);
-    return functionExpression;
+    if (negated) {
+      Expression negatedFunctionExpression = 
RequestUtils.getFunctionExpression(FilterKind.NOT.name());
+      // Do not use `Collections.singletonList()` because we might modify the 
operand later
+      List<Expression> negatedFunctionOperands = new ArrayList<>(1);
+      negatedFunctionOperands.add(functionExpression);
+      
negatedFunctionExpression.getFunctionCall().setOperands(negatedFunctionOperands);
+      return negatedFunctionExpression;
+    } else {
+      return functionExpression;
+    }
   }
 
   /**
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
index 4b6feb0..4fa91aa 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
@@ -180,7 +180,7 @@ public class FilterOperatorUtils {
           return 4;
         }
         if (filterOperator instanceof NotFilterOperator) {
-          return getPriority((NotFilterOperator) ((NotFilterOperator) 
filterOperator).getChildFilterOperator());
+          return getPriority(((NotFilterOperator) 
filterOperator).getChildFilterOperator());
         }
         if (filterOperator instanceof ScanBasedFilterOperator) {
           return getScanBasedFilterPriority((ScanBasedFilterOperator) 
filterOperator, 5, debugOptions);
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java
index 50a03b8..f1aeb50 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/NotFilterOperator.java
@@ -58,7 +58,7 @@ public class NotFilterOperator extends BaseFilterOperator {
     return new FilterBlock(new 
NotDocIdSet(_filterOperator.nextBlock().getBlockDocIdSet(), _numDocs));
   }
 
-  public Operator getChildFilterOperator() {
+  public BaseFilterOperator getChildFilterOperator() {
     return _filterOperator;
   }
 }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java 
b/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java
index 0333985..e283978 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java
@@ -241,15 +241,6 @@ public abstract class BaseQueriesTest {
     return brokerResponse;
   }
 
-  /**
-   * Run optimized SQL query on multiple index segments.
-   * <p>Use this to test the whole flow from server to broker.
-   * <p>The result should be equivalent to querying 4 identical index segments.
-   */
-  protected BrokerResponseNative getBrokerResponseForOptimizedSqlQuery(String 
sqlQuery, @Nullable Schema schema) {
-    return getBrokerResponseForOptimizedSqlQuery(sqlQuery, null, schema, 
PLAN_MAKER);
-  }
-
   protected BrokerResponseNative getBrokerResponseForOptimizedSqlQuery(String 
sqlQuery, @Nullable TableConfig config,
       @Nullable Schema schema) {
     return getBrokerResponseForOptimizedSqlQuery(sqlQuery, config, schema, 
PLAN_MAKER);
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java 
b/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java
index ca00dd5..ccb9626 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/queries/NotOperatorQueriesTest.java
@@ -19,42 +19,38 @@
 package org.apache.pinot.queries;
 
 import java.io.File;
-import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 import org.apache.commons.io.FileUtils;
 import org.apache.pinot.common.response.broker.BrokerResponseNative;
 import org.apache.pinot.common.response.broker.ResultTable;
-import org.apache.pinot.core.common.Operator;
-import org.apache.pinot.core.operator.blocks.IntermediateResultsBlock;
+import org.apache.pinot.core.operator.query.AggregationOperator;
 import 
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
 import 
org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
-import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
 import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
 import org.apache.pinot.segment.spi.ImmutableSegment;
 import org.apache.pinot.segment.spi.IndexSegment;
 import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
-import org.apache.pinot.spi.config.table.FSTType;
-import org.apache.pinot.spi.config.table.FieldConfig;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.GenericRow;
 import org.apache.pinot.spi.data.readers.RecordReader;
+import org.apache.pinot.spi.utils.ReadMode;
 import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
-import org.testng.Assert;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+
 
 public class NotOperatorQueriesTest extends BaseQueriesTest {
   private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), 
"NotOperatorQueriesTest");
-  private static final String TABLE_NAME = "MyTable";
+  private static final String TABLE_NAME = "testTable";
   private static final String SEGMENT_NAME = "testSegment";
   private static final String FIRST_INT_COL_NAME = "FIRST_INT_COL";
   private static final String SECOND_INT_COL_NAME = "SECOND_INT_COL";
@@ -85,21 +81,10 @@ public class NotOperatorQueriesTest extends BaseQueriesTest 
{
       throws Exception {
     FileUtils.deleteQuietly(INDEX_DIR);
 
-    List<IndexSegment> segments = new ArrayList<>();
     buildSegment();
-    IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
-    Set<String> invertedIndexCols = new HashSet<>();
-    invertedIndexCols.add(FIRST_INT_COL_NAME);
-    indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
-    Set<String> fstIndexCols = new HashSet<>();
-    fstIndexCols.add(DOMAIN_NAMES_COL);
-    indexLoadingConfig.setFSTIndexColumns(fstIndexCols);
-    indexLoadingConfig.setFSTIndexType(FSTType.LUCENE);
-    ImmutableSegment segment = ImmutableSegmentLoader.load(new File(INDEX_DIR, 
SEGMENT_NAME), indexLoadingConfig);
-    segments.add(segment);
-
-    _indexSegment = segment;
-    _indexSegments = segments;
+    ImmutableSegment immutableSegment = ImmutableSegmentLoader.load(new 
File(INDEX_DIR, SEGMENT_NAME), ReadMode.mmap);
+    _indexSegment = immutableSegment;
+    _indexSegments = Arrays.asList(immutableSegment, immutableSegment);
   }
 
   private List<String> getDomainNames() {
@@ -109,15 +94,15 @@ public class NotOperatorQueriesTest extends 
BaseQueriesTest {
         "www.sd.domain2.co.bc", "www.sd.domain2.co.cd");
   }
 
-  private List<GenericRow> createTestData(int numRows) {
+  private List<GenericRow> createTestData() {
     List<GenericRow> rows = new ArrayList<>();
     List<String> domainNames = getDomainNames();
-    for (int i = 0; i < numRows; i++) {
+    for (int i = 0; i < NUM_ROWS; i++) {
       String domain = domainNames.get(i % domainNames.size());
       GenericRow row = new GenericRow();
-      row.putField(FIRST_INT_COL_NAME, i);
-      row.putField(SECOND_INT_COL_NAME, INT_BASE_VALUE + i);
-      row.putField(DOMAIN_NAMES_COL, domain);
+      row.putValue(FIRST_INT_COL_NAME, i);
+      row.putValue(SECOND_INT_COL_NAME, INT_BASE_VALUE + i);
+      row.putValue(DOMAIN_NAMES_COL, domain);
       rows.add(row);
     }
     return rows;
@@ -125,15 +110,8 @@ public class NotOperatorQueriesTest extends 
BaseQueriesTest {
 
   private void buildSegment()
       throws Exception {
-    List<GenericRow> rows = createTestData(NUM_ROWS);
-    List<FieldConfig> fieldConfigs = new ArrayList<>();
-    fieldConfigs.add(
-        new FieldConfig(DOMAIN_NAMES_COL, FieldConfig.EncodingType.DICTIONARY, 
FieldConfig.IndexType.FST, null, null));
-    fieldConfigs.add(
-        new FieldConfig(FIRST_INT_COL_NAME, 
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.INVERTED, null,
-            null));
-    TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
-        
.setInvertedIndexColumns(Arrays.asList(FIRST_INT_COL_NAME)).setFieldConfigList(fieldConfigs).build();
+    List<GenericRow> rows = createTestData();
+    TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build();
     Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
         .addSingleValueDimension(FIRST_INT_COL_NAME, FieldSpec.DataType.INT)
         .addSingleValueDimension(DOMAIN_NAMES_COL, FieldSpec.DataType.STRING)
@@ -156,92 +134,53 @@ public class NotOperatorQueriesTest extends 
BaseQueriesTest {
     FileUtils.deleteQuietly(INDEX_DIR);
   }
 
-  private void testSelectionResults(String query, int expectedResultSize, 
List<Serializable[]> expectedResults) {
-    Operator<IntermediateResultsBlock> operator = 
getOperatorForSqlQuery(query);
-    IntermediateResultsBlock operatorResult = operator.nextBlock();
-    List<Object[]> resultset = (List<Object[]>) 
operatorResult.getSelectionResult();
-    Assert.assertNotNull(resultset);
-    Assert.assertEquals(resultset.size(), expectedResultSize);
-    if (expectedResults != null) {
-      for (int i = 0; i < expectedResultSize; i++) {
-        Object[] actualRow = resultset.get(i);
-        Object[] expectedRow = expectedResults.get(i);
-        Assert.assertEquals(actualRow.length, expectedRow.length);
-        for (int j = 0; j < actualRow.length; j++) {
-          Object actualColValue = actualRow[j];
-          Object expectedColValue = expectedRow[j];
-          Assert.assertEquals(actualColValue, expectedColValue);
-        }
-      }
-    }
-  }
-
-  private void testOptimizedQueryHelper(String query, int expectedResultSize, 
List<Serializable[]> expectedResults) {
-    BrokerResponseNative brokerResponseNative = 
getBrokerResponseForOptimizedSqlQuery(query, null);
-    ResultTable resultTable = brokerResponseNative.getResultTable();
-    List<Object[]> results = resultTable.getRows();
-    Assert.assertEquals(results.size(), expectedResultSize);
-
-    if (expectedResults != null) {
-      for (int i = 0; i < expectedResults.size(); i++) {
-        Object[] actualRow = results.get(i);
-        Serializable[] expectedRow = expectedResults.get(i);
-        Assert.assertEquals(actualRow, expectedRow);
-      }
-    }
+  private void testNotOperator(String filter, long expectedSegmentResult) {
+    String query = "SELECT COUNT(*) FROM testTable WHERE " + filter;
+    AggregationOperator aggregationOperator = getOperatorForSqlQuery(query);
+    List<Object> segmentResults = 
aggregationOperator.nextBlock().getAggregationResult();
+    assertNotNull(segmentResults);
+    assertEquals((long) segmentResults.get(0), expectedSegmentResult);
+
+    BrokerResponseNative brokerResponse = getBrokerResponseForSqlQuery(query);
+    ResultTable resultTable = brokerResponse.getResultTable();
+    Object[] brokerResults = resultTable.getRows().get(0);
+    assertEquals((long) brokerResults[0], 4 * expectedSegmentResult);
   }
 
   @Test
-  public void testLikeBasedNotOperator() {
-    String query =
-        "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT 
REGEXP_LIKE(DOMAIN_NAMES, 'www.domain1.*') LIMIT "
-            + "50000";
-    testSelectionResults(query, 768, null);
-    testOptimizedQueryHelper(query, 1536, null);
-
-    query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT 
REGEXP_LIKE(DOMAIN_NAMES, 'www.sd.domain1.*') "
-        + "LIMIT 50000";
-    testSelectionResults(query, 768, null);
-    testOptimizedQueryHelper(query, 1536, null);
-
-    query =
-        "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT 
REGEXP_LIKE(DOMAIN_NAMES, '.*domain1.*') LIMIT "
-            + "50000";
-    testSelectionResults(query, 512, null);
-    testOptimizedQueryHelper(query, 1024, null);
-
-    query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT 
REGEXP_LIKE(DOMAIN_NAMES, '.*domain.*') LIMIT "
-        + "50000";
-    testSelectionResults(query, 0, null);
-    testOptimizedQueryHelper(query, 0, null);
-
-    query =
-        "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT 
REGEXP_LIKE(DOMAIN_NAMES, '.*com') LIMIT 50000";
-    testSelectionResults(query, 768, null);
-    testOptimizedQueryHelper(query, 1536, null);
+  public void testLikePredicates() {
+    testNotOperator("DOMAIN_NAMES NOT LIKE 'www.domain1%'", 768);
+    testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.domain1.*')", 768);
+
+    testNotOperator("DOMAIN_NAMES NOT LIKE 'www.sd.domain1%'", 768);
+    testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, 'www.sd.domain1.*')", 768);
+
+    testNotOperator("DOMAIN_NAMES NOT LIKE '%domain1%'", 512);
+    testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain1.*')", 512);
+
+    testNotOperator("DOMAIN_NAMES NOT LIKE '%domain%'", 0);
+    testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, '.*domain.*')", 0);
+
+    testNotOperator("DOMAIN_NAMES NOT LIKE '%com'", 768);
+    testNotOperator("NOT REGEXP_LIKE(DOMAIN_NAMES, '.*com')", 768);
   }
 
   @Test
-  public void testWeirdPredicates() {
-    String query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE 
NOT FIRST_INT_COL = 5 LIMIT 50000";
-    testSelectionResults(query, 1023, null);
+  public void testRangePredicates() {
+    testNotOperator("NOT FIRST_INT_COL = 5", 1023);
+    testNotOperator("NOT FIRST_INT_COL < 5", 1019);
+    testNotOperator("NOT FIRST_INT_COL > 5", 6);
 
-    query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT 
FIRST_INT_COL < 5 LIMIT " + "50000";
-    testSelectionResults(query, 1019, null);
-
-    query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT 
FIRST_INT_COL > 5 LIMIT " + "50000";
-    testSelectionResults(query, 6, null);
+    testNotOperator("FIRST_INT_COL NOT BETWEEN 10 AND 20", 1013);
+    testNotOperator("NOT FIRST_INT_COL BETWEEN 10 AND 20", 1013);
   }
 
   @Test
   public void testCompositePredicates() {
-    String query =
-        "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT 
(FIRST_INT_COL > 5 AND SECOND_INT_COL < 1009) "
-            + "LIMIT " + "50000";
-    testSelectionResults(query, 1021, null);
-
-    query = "SELECT FIRST_INT_COL, SECOND_INT_COL FROM MyTable WHERE NOT 
(FIRST_INT_COL < 5 OR SECOND_INT_COL > 2000) "
-        + "LIMIT " + "50000";
-    testSelectionResults(query, 996, null);
+    testNotOperator("NOT (FIRST_INT_COL > 5 AND SECOND_INT_COL < 1009)", 1021);
+    testNotOperator("NOT FIRST_INT_COL > 5 OR NOT SECOND_INT_COL < 1009", 
1021);
+
+    testNotOperator("NOT (FIRST_INT_COL < 5 OR SECOND_INT_COL > 2000)", 996);
+    testNotOperator("NOT FIRST_INT_COL < 5 AND NOT SECOND_INT_COL > 2000", 
996);
   }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to