This is an automated email from the ASF dual-hosted git repository.

rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 61fc9190bc [multistage] Support IN and NOT-IN Clauses (#9374)
61fc9190bc is described below

commit 61fc9190bc1c2f15b3775acf892689a30da6df5e
Author: Ankit Sultana <ankitsult...@uber.com>
AuthorDate: Thu Sep 15 07:26:28 2022 +0530

    [multistage] Support IN and NOT-IN Clauses (#9374)
    
    * [multistage] Support IN Clause With 1 Argument
    
    * Working in/not-in
    * Refactor RexExpressionUtils after rebasing with master
---
 .../pinot/query/planner/logical/RexExpression.java | 26 +++----
 .../query/planner/logical/RexExpressionUtils.java  | 84 ++++++++++++++++++++++
 .../apache/pinot/query/QueryCompilationTest.java   |  4 ++
 .../pinot/query/QueryEnvironmentTestBase.java      |  2 +
 .../query/runtime/operator/AggregateOperator.java  |  4 +-
 .../pinot/query/runtime/QueryRunnerTest.java       | 17 +++++
 6 files changed, 117 insertions(+), 20 deletions(-)

diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java
 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java
index 32a3608c86..a8f51036ca 100644
--- 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.query.planner.logical;
 
-import com.google.common.base.Preconditions;
 import java.math.BigDecimal;
 import java.util.List;
 import java.util.stream.Collectors;
@@ -29,7 +28,6 @@ import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.sql.SqlKind;
-import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.util.NlsString;
 import org.apache.pinot.common.utils.PinotDataType;
 import org.apache.pinot.query.planner.serde.ProtoProperties;
@@ -52,30 +50,24 @@ public interface RexExpression {
     } else if (rexNode instanceof RexLiteral) {
       RexLiteral rexLiteral = ((RexLiteral) rexNode);
       FieldSpec.DataType dataType = toDataType(rexLiteral.getType());
-      return new RexExpression.Literal(dataType, rexLiteral.getTypeName(),
-          toRexValue(dataType, rexLiteral.getValue()));
+      return new RexExpression.Literal(dataType, toRexValue(dataType, 
rexLiteral.getValue()));
     } else if (rexNode instanceof RexCall) {
       RexCall rexCall = (RexCall) rexNode;
-      List<RexExpression> operands = 
rexCall.getOperands().stream().map(RexExpression::toRexExpression)
-          .collect(Collectors.toList());
-      return toRexExpression(rexCall, operands);
+      return toRexExpression(rexCall);
     } else {
       throw new IllegalArgumentException("Unsupported RexNode type with 
SqlKind: " + rexNode.getKind());
     }
   }
 
-  static RexExpression toRexExpression(RexCall rexCall, List<RexExpression> 
operands) {
+  static RexExpression toRexExpression(RexCall rexCall) {
     switch (rexCall.getKind()) {
       case CAST:
-        // CAST is being rewritten into "rexCall.CAST<targetType>(inputValue)",
-        //   - e.g. result type has already been converted into the CAST 
RexCall, so we assert single operand.
-        Preconditions.checkState(operands.size() == 1, "CAST takes exactly 2 
arguments");
-        RelDataType castType = rexCall.getType();
-        // add the 2nd argument as the source type info.
-        operands.add(new Literal(FieldSpec.DataType.STRING, 
rexCall.getOperands().get(0).getType().getSqlTypeName(),
-            toPinotDataType(rexCall.getOperands().get(0).getType()).name()));
-        return new RexExpression.FunctionCall(rexCall.getKind(), 
toDataType(rexCall.getType()), "CAST", operands);
+        return RexExpressionUtils.handleCast(rexCall);
+      case SEARCH:
+        return RexExpressionUtils.handleSearch(rexCall);
       default:
+        List<RexExpression> operands =
+            
rexCall.getOperands().stream().map(RexExpression::toRexExpression).collect(Collectors.toList());
         return new RexExpression.FunctionCall(rexCall.getKind(), 
toDataType(rexCall.getType()),
             rexCall.getOperator().getName(), operands);
     }
@@ -186,7 +178,7 @@ public interface RexExpression {
     public Literal() {
     }
 
-    public Literal(FieldSpec.DataType dataType, SqlTypeName sqlTypeName, 
@Nullable Object value) {
+    public Literal(FieldSpec.DataType dataType, @Nullable Object value) {
       _sqlKind = SqlKind.LITERAL;
       _dataType = dataType;
       _value = value;
diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
new file mode 100644
index 0000000000..92d246b904
--- /dev/null
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.query.planner.logical;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Range;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.util.Sarg;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.pinot.spi.data.FieldSpec;
+
+
+public class RexExpressionUtils {
+
+  private RexExpressionUtils() {
+  }
+
+  static RexExpression handleCast(RexCall rexCall) {
+    // CAST is being rewritten into "rexCall.CAST<targetType>(inputValue)",
+    //   - e.g. result type has already been converted into the CAST RexCall, 
so we assert single operand.
+    List<RexExpression> operands =
+        
rexCall.getOperands().stream().map(RexExpression::toRexExpression).collect(Collectors.toList());
+    Preconditions.checkState(operands.size() == 1, "CAST takes exactly 2 
arguments");
+    RelDataType castType = rexCall.getType();
+    // add the 2nd argument as the source type info.
+    operands.add(new RexExpression.Literal(FieldSpec.DataType.STRING,
+        
RexExpression.toPinotDataType(rexCall.getOperands().get(0).getType()).name()));
+    return new RexExpression.FunctionCall(rexCall.getKind(), 
RexExpression.toDataType(rexCall.getType()), "CAST",
+        operands);
+  }
+
+  // TODO: Add support for range filter expressions (e.g. a > 0 and a < 30)
+  static RexExpression handleSearch(RexCall rexCall) {
+    List<RexNode> operands = rexCall.getOperands();
+    RexInputRef rexInputRef = (RexInputRef) operands.get(0);
+    RexLiteral rexLiteral = (RexLiteral) operands.get(1);
+    FieldSpec.DataType dataType = 
RexExpression.toDataType(rexLiteral.getType());
+    Sarg sarg = rexLiteral.getValueAs(Sarg.class);
+    if (sarg.isPoints()) {
+      return new RexExpression.FunctionCall(SqlKind.IN, dataType, 
SqlKind.IN.name(), toFunctionOperands(rexInputRef,
+          sarg.rangeSet.asRanges(), dataType));
+    } else if (sarg.isComplementedPoints()) {
+      return new RexExpression.FunctionCall(SqlKind.NOT_IN, dataType, 
SqlKind.NOT_IN.name(),
+          toFunctionOperands(rexInputRef, 
sarg.rangeSet.complement().asRanges(), dataType));
+    } else {
+      throw new NotImplementedException("Range is not implemented yet");
+    }
+  }
+
+  private static List<RexExpression> toFunctionOperands(RexInputRef 
rexInputRef, Set<Range> ranges,
+      FieldSpec.DataType dataType) {
+    List<RexExpression> result = new ArrayList<>(ranges.size() + 1);
+    result.add(RexExpression.toRexExpression(rexInputRef));
+    for (Range range : ranges) {
+      result.add(new RexExpression.Literal(dataType, 
RexExpression.toRexValue(dataType, range.lowerEndpoint())));
+    }
+    return result;
+  }
+}
diff --git 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
index bace47a3ed..4223a1717a 100644
--- 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
+++ 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
@@ -255,6 +255,10 @@ public class QueryCompilationTest extends 
QueryEnvironmentTestBase {
         new Object[]{"SELECT b.col1 - a.col3 FROM a JOIN c ON a.col1 = 
c.col3", "Table 'b' not found"},
         // non-agg column not being grouped
         new Object[]{"SELECT a.col1, SUM(a.col3) FROM a", "'a.col1' is not 
being grouped"},
+        // empty IN clause fails compilation
+        new Object[]{"SELECT a.col1 FROM a WHERE a.col1 IN ()", "Encountered 
\"\" at line"},
+        // range filter queries are not supported right now
+        new Object[]{"SELECT a.col1 FROM a WHERE a.col1 > 'x' AND a.col1 < 
'y'", "Range is not implemented yet"}
     };
   }
 
diff --git 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
index 3d633e932f..799d0ef380 100644
--- 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
+++ 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
@@ -67,6 +67,8 @@ public class QueryEnvironmentTestBase {
         new Object[]{"SELECT dateTrunc('DAY', a.ts + b.ts) FROM a JOIN b on 
a.col1 = b.col1 AND a.col2 = b.col2"},
         new Object[]{"SELECT a.col2, a.col3 FROM a JOIN b ON a.col1 = b.col1 "
             + " WHERE a.col3 >= 0 GROUP BY a.col2, a.col3"},
+        new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 
WHERE a.col2 IN ('foo', 'bar') AND"
+            + " b.col2 NOT IN ('alice', 'charlie')"},
     };
   }
 }
diff --git 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java
 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java
index 67c5edfa99..cd184fe89f 100644
--- 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java
+++ 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java
@@ -25,7 +25,6 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import javax.annotation.Nullable;
-import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.common.utils.DataSchema;
 import org.apache.pinot.core.common.Operator;
@@ -98,8 +97,7 @@ public class AggregateOperator extends 
BaseOperator<TransferableBlock> {
   private RexExpression toAggregationFunctionOperand(RexExpression 
rexExpression) {
     List<RexExpression> functionOperands = ((RexExpression.FunctionCall) 
rexExpression).getFunctionOperands();
     Preconditions.checkState(functionOperands.size() < 2);
-    return functionOperands.size() > 0 ? functionOperands.get(0)
-        : new RexExpression.Literal(FieldSpec.DataType.INT, 
SqlTypeName.INTEGER, 1);
+    return functionOperands.size() > 0 ? functionOperands.get(0) : new 
RexExpression.Literal(FieldSpec.DataType.INT, 1);
   }
 
   @Override
diff --git 
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java
 
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java
index 7ae65076b8..71656f7817 100644
--- 
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java
+++ 
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java
@@ -109,6 +109,23 @@ public class QueryRunnerTest extends QueryRunnerTestBase {
         new Object[]{"SELECT a.col1, a.ts, b.col2, b.col3 FROM a JOIN b ON 
a.col1 = b.col2 "
             + " WHERE a.col3 >= 0 AND a.col2 = 'alice' AND b.col3 >= 0", 3},
 
+        // Join query with IN and Not-IN clause. Table A's side of join will 
return 9 rows and Table B's side will
+        // return 2 rows. Join will be only on col1=bar and since A will 
return 3 rows with that value and B will return
+        // 1 row, the final output will have 3 rows.
+        new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 "
+            + " WHERE a.col1 IN ('foo', 'bar', 'alice') AND b.col2 NOT IN 
('foo', 'alice')", 3},
+
+        // Same query as above but written using OR/AND instead of IN.
+        new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 "
+            + " WHERE (a.col1 = 'foo' OR a.col1 = 'bar' OR a.col1 = 'alice') 
AND b.col2 != 'foo'"
+            + " AND b.col2 != 'alice'", 3},
+
+        // Same as above but with single argument IN clauses. Left side of the 
join returns 3 rows, and the right side
+        // returns 5 rows. Only key where join succeeds is col1=foo, and since 
table B has only 1 row with that value,
+        // the number of rows should be 3.
+        new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 "
+            + " WHERE a.col1 IN ('foo') AND b.col2 NOT IN ('')", 3},
+
         // Projection pushdown
         new Object[]{"SELECT a.col1, a.col3 + a.col3 FROM a WHERE a.col3 >= 0 
AND a.col2 = 'alice'", 3},
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to