This is an automated email from the ASF dual-hosted git repository. rongr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 61fc9190bc [multistage] Support IN and NOT-IN Clauses (#9374) 61fc9190bc is described below commit 61fc9190bc1c2f15b3775acf892689a30da6df5e Author: Ankit Sultana <ankitsult...@uber.com> AuthorDate: Thu Sep 15 07:26:28 2022 +0530 [multistage] Support IN and NOT-IN Clauses (#9374) * [multistage] Support IN Clause With 1 Argument * Working in/not-in * Refactor RexExpressionUtils after rebasing with master --- .../pinot/query/planner/logical/RexExpression.java | 26 +++---- .../query/planner/logical/RexExpressionUtils.java | 84 ++++++++++++++++++++++ .../apache/pinot/query/QueryCompilationTest.java | 4 ++ .../pinot/query/QueryEnvironmentTestBase.java | 2 + .../query/runtime/operator/AggregateOperator.java | 4 +- .../pinot/query/runtime/QueryRunnerTest.java | 17 +++++ 6 files changed, 117 insertions(+), 20 deletions(-) diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java index 32a3608c86..a8f51036ca 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.query.planner.logical; -import com.google.common.base.Preconditions; import java.math.BigDecimal; import java.util.List; import java.util.stream.Collectors; @@ -29,7 +28,6 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.NlsString; import org.apache.pinot.common.utils.PinotDataType; import org.apache.pinot.query.planner.serde.ProtoProperties; @@ -52,30 +50,24 @@ public interface RexExpression { } else if (rexNode instanceof RexLiteral) { RexLiteral rexLiteral = ((RexLiteral) rexNode); FieldSpec.DataType dataType = toDataType(rexLiteral.getType()); - return new RexExpression.Literal(dataType, rexLiteral.getTypeName(), - toRexValue(dataType, rexLiteral.getValue())); + return new RexExpression.Literal(dataType, toRexValue(dataType, rexLiteral.getValue())); } else if (rexNode instanceof RexCall) { RexCall rexCall = (RexCall) rexNode; - List<RexExpression> operands = rexCall.getOperands().stream().map(RexExpression::toRexExpression) - .collect(Collectors.toList()); - return toRexExpression(rexCall, operands); + return toRexExpression(rexCall); } else { throw new IllegalArgumentException("Unsupported RexNode type with SqlKind: " + rexNode.getKind()); } } - static RexExpression toRexExpression(RexCall rexCall, List<RexExpression> operands) { + static RexExpression toRexExpression(RexCall rexCall) { switch (rexCall.getKind()) { case CAST: - // CAST is being rewritten into "rexCall.CAST<targetType>(inputValue)", - // - e.g. result type has already been converted into the CAST RexCall, so we assert single operand. - Preconditions.checkState(operands.size() == 1, "CAST takes exactly 2 arguments"); - RelDataType castType = rexCall.getType(); - // add the 2nd argument as the source type info. - operands.add(new Literal(FieldSpec.DataType.STRING, rexCall.getOperands().get(0).getType().getSqlTypeName(), - toPinotDataType(rexCall.getOperands().get(0).getType()).name())); - return new RexExpression.FunctionCall(rexCall.getKind(), toDataType(rexCall.getType()), "CAST", operands); + return RexExpressionUtils.handleCast(rexCall); + case SEARCH: + return RexExpressionUtils.handleSearch(rexCall); default: + List<RexExpression> operands = + rexCall.getOperands().stream().map(RexExpression::toRexExpression).collect(Collectors.toList()); return new RexExpression.FunctionCall(rexCall.getKind(), toDataType(rexCall.getType()), rexCall.getOperator().getName(), operands); } @@ -186,7 +178,7 @@ public interface RexExpression { public Literal() { } - public Literal(FieldSpec.DataType dataType, SqlTypeName sqlTypeName, @Nullable Object value) { + public Literal(FieldSpec.DataType dataType, @Nullable Object value) { _sqlKind = SqlKind.LITERAL; _dataType = dataType; _value = value; diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java new file mode 100644 index 0000000000..92d246b904 --- /dev/null +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.query.planner.logical; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Range; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.util.Sarg; +import org.apache.commons.lang3.NotImplementedException; +import org.apache.pinot.spi.data.FieldSpec; + + +public class RexExpressionUtils { + + private RexExpressionUtils() { + } + + static RexExpression handleCast(RexCall rexCall) { + // CAST is being rewritten into "rexCall.CAST<targetType>(inputValue)", + // - e.g. result type has already been converted into the CAST RexCall, so we assert single operand. + List<RexExpression> operands = + rexCall.getOperands().stream().map(RexExpression::toRexExpression).collect(Collectors.toList()); + Preconditions.checkState(operands.size() == 1, "CAST takes exactly 2 arguments"); + RelDataType castType = rexCall.getType(); + // add the 2nd argument as the source type info. + operands.add(new RexExpression.Literal(FieldSpec.DataType.STRING, + RexExpression.toPinotDataType(rexCall.getOperands().get(0).getType()).name())); + return new RexExpression.FunctionCall(rexCall.getKind(), RexExpression.toDataType(rexCall.getType()), "CAST", + operands); + } + + // TODO: Add support for range filter expressions (e.g. a > 0 and a < 30) + static RexExpression handleSearch(RexCall rexCall) { + List<RexNode> operands = rexCall.getOperands(); + RexInputRef rexInputRef = (RexInputRef) operands.get(0); + RexLiteral rexLiteral = (RexLiteral) operands.get(1); + FieldSpec.DataType dataType = RexExpression.toDataType(rexLiteral.getType()); + Sarg sarg = rexLiteral.getValueAs(Sarg.class); + if (sarg.isPoints()) { + return new RexExpression.FunctionCall(SqlKind.IN, dataType, SqlKind.IN.name(), toFunctionOperands(rexInputRef, + sarg.rangeSet.asRanges(), dataType)); + } else if (sarg.isComplementedPoints()) { + return new RexExpression.FunctionCall(SqlKind.NOT_IN, dataType, SqlKind.NOT_IN.name(), + toFunctionOperands(rexInputRef, sarg.rangeSet.complement().asRanges(), dataType)); + } else { + throw new NotImplementedException("Range is not implemented yet"); + } + } + + private static List<RexExpression> toFunctionOperands(RexInputRef rexInputRef, Set<Range> ranges, + FieldSpec.DataType dataType) { + List<RexExpression> result = new ArrayList<>(ranges.size() + 1); + result.add(RexExpression.toRexExpression(rexInputRef)); + for (Range range : ranges) { + result.add(new RexExpression.Literal(dataType, RexExpression.toRexValue(dataType, range.lowerEndpoint()))); + } + return result; + } +} diff --git a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java index bace47a3ed..4223a1717a 100644 --- a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java +++ b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java @@ -255,6 +255,10 @@ public class QueryCompilationTest extends QueryEnvironmentTestBase { new Object[]{"SELECT b.col1 - a.col3 FROM a JOIN c ON a.col1 = c.col3", "Table 'b' not found"}, // non-agg column not being grouped new Object[]{"SELECT a.col1, SUM(a.col3) FROM a", "'a.col1' is not being grouped"}, + // empty IN clause fails compilation + new Object[]{"SELECT a.col1 FROM a WHERE a.col1 IN ()", "Encountered \"\" at line"}, + // range filter queries are not supported right now + new Object[]{"SELECT a.col1 FROM a WHERE a.col1 > 'x' AND a.col1 < 'y'", "Range is not implemented yet"} }; } diff --git a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java index 3d633e932f..799d0ef380 100644 --- a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java +++ b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java @@ -67,6 +67,8 @@ public class QueryEnvironmentTestBase { new Object[]{"SELECT dateTrunc('DAY', a.ts + b.ts) FROM a JOIN b on a.col1 = b.col1 AND a.col2 = b.col2"}, new Object[]{"SELECT a.col2, a.col3 FROM a JOIN b ON a.col1 = b.col1 " + " WHERE a.col3 >= 0 GROUP BY a.col2, a.col3"}, + new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 WHERE a.col2 IN ('foo', 'bar') AND" + + " b.col2 NOT IN ('alice', 'charlie')"}, }; } } diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java index 67c5edfa99..cd184fe89f 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java @@ -25,7 +25,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import javax.annotation.Nullable; -import org.apache.calcite.sql.type.SqlTypeName; import org.apache.pinot.common.request.context.ExpressionContext; import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.core.common.Operator; @@ -98,8 +97,7 @@ public class AggregateOperator extends BaseOperator<TransferableBlock> { private RexExpression toAggregationFunctionOperand(RexExpression rexExpression) { List<RexExpression> functionOperands = ((RexExpression.FunctionCall) rexExpression).getFunctionOperands(); Preconditions.checkState(functionOperands.size() < 2); - return functionOperands.size() > 0 ? functionOperands.get(0) - : new RexExpression.Literal(FieldSpec.DataType.INT, SqlTypeName.INTEGER, 1); + return functionOperands.size() > 0 ? functionOperands.get(0) : new RexExpression.Literal(FieldSpec.DataType.INT, 1); } @Override diff --git a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java index 7ae65076b8..71656f7817 100644 --- a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java +++ b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java @@ -109,6 +109,23 @@ public class QueryRunnerTest extends QueryRunnerTestBase { new Object[]{"SELECT a.col1, a.ts, b.col2, b.col3 FROM a JOIN b ON a.col1 = b.col2 " + " WHERE a.col3 >= 0 AND a.col2 = 'alice' AND b.col3 >= 0", 3}, + // Join query with IN and Not-IN clause. Table A's side of join will return 9 rows and Table B's side will + // return 2 rows. Join will be only on col1=bar and since A will return 3 rows with that value and B will return + // 1 row, the final output will have 3 rows. + new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 " + + " WHERE a.col1 IN ('foo', 'bar', 'alice') AND b.col2 NOT IN ('foo', 'alice')", 3}, + + // Same query as above but written using OR/AND instead of IN. + new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 " + + " WHERE (a.col1 = 'foo' OR a.col1 = 'bar' OR a.col1 = 'alice') AND b.col2 != 'foo'" + + " AND b.col2 != 'alice'", 3}, + + // Same as above but with single argument IN clauses. Left side of the join returns 3 rows, and the right side + // returns 5 rows. Only key where join succeeds is col1=foo, and since table B has only 1 row with that value, + // the number of rows should be 3. + new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 " + + " WHERE a.col1 IN ('foo') AND b.col2 NOT IN ('')", 3}, + // Projection pushdown new Object[]{"SELECT a.col1, a.col3 + a.col3 FROM a WHERE a.col3 >= 0 AND a.col2 = 'alice'", 3}, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org