This is an automated email from the ASF dual-hosted git repository. yashmayya pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 1515f49c4c Fix semi-join / pipeline breaker on BYTES and BIG_DECIMAL (#15531) 1515f49c4c is described below commit 1515f49c4cc5c761e07bc7c4bf914173ae3e018e Author: Yash Mayya <yash.ma...@gmail.com> AuthorDate: Mon Apr 14 15:12:05 2025 +0100 Fix semi-join / pipeline breaker on BYTES and BIG_DECIMAL (#15531) --- .../plan/server/ServerPlanRequestUtils.java | 24 ++++++- .../src/test/resources/queries/SemiJoins.json | 75 ++++++++++++++++++++++ 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java index 7e6f925e30..adcae1f30b 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java @@ -20,6 +20,7 @@ package org.apache.pinot.query.runtime.plan.server; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -54,6 +55,7 @@ import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.spi.config.table.TableType; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.query.QueryThreadContext; +import org.apache.pinot.spi.utils.ByteArray; import org.apache.pinot.spi.utils.CommonConstants; import org.apache.pinot.spi.utils.builder.TableNameBuilder; import org.apache.pinot.sql.FilterKind; @@ -357,8 +359,28 @@ public class ServerPlanRequestUtils { expressions.add(RequestUtils.getLiteralExpression(arrString[rowIdx])); } break; + case BIG_DECIMAL: + BigDecimal[] arrBigDecimal = new BigDecimal[numRows]; + for (int rowIdx = 0; rowIdx < numRows; rowIdx++) { + arrBigDecimal[rowIdx] = (BigDecimal) dataContainer.get(rowIdx)[colIdx]; + } + Arrays.sort(arrBigDecimal); + for (int rowIdx = 0; rowIdx < numRows; rowIdx++) { + expressions.add(RequestUtils.getLiteralExpression(arrBigDecimal[rowIdx])); + } + break; + case BYTES: + ByteArray[] arrBytes = new ByteArray[numRows]; + for (int rowIdx = 0; rowIdx < numRows; rowIdx++) { + arrBytes[rowIdx] = (ByteArray) dataContainer.get(rowIdx)[colIdx]; + } + Arrays.sort(arrBytes); + for (int rowIdx = 0; rowIdx < numRows; rowIdx++) { + expressions.add(RequestUtils.getLiteralExpression(arrBytes[rowIdx].getBytes())); + } + break; default: - throw new IllegalStateException("Illegal SV data type for ID_SET aggregation function: " + storedType); + throw new IllegalStateException("Illegal SV data type for IN filter: " + storedType); } return expressions; } diff --git a/pinot-query-runtime/src/test/resources/queries/SemiJoins.json b/pinot-query-runtime/src/test/resources/queries/SemiJoins.json new file mode 100644 index 0000000000..4dc851aca9 --- /dev/null +++ b/pinot-query-runtime/src/test/resources/queries/SemiJoins.json @@ -0,0 +1,75 @@ +{ + "semi_join_queries": { + "tables": { + "tbl": { + "schema": [ + {"name": "int_col", "type": "INT"}, + {"name": "long_col", "type": "LONG"}, + {"name": "float_col", "type": "FLOAT"}, + {"name": "double_col", "type": "DOUBLE"}, + {"name": "string_col", "type": "STRING"}, + {"name": "big_decimal_col", "type": "BIG_DECIMAL"}, + {"name": "bytes_col", "type": "BYTES"} + ], + "inputs": [ + [1, 1, 1.0, 1.0, "a", 1.0, "AA"], + [2, 2, 2.0, 2.0, "b", 2.0, "BB"], + [3, 3, 3.0, 3.0, "c", 3.0, "CC"], + [4, 4, 4.0, 4.0, "d", 4.0, "DD"], + [5, 5, 5.0, 5.0, "e", 5.0, "EE"] + ] + } + }, + "queries": [ + { + "description": "Semi join using IN clause with INT", + "sql": "SELECT int_col FROM {tbl} WHERE int_col IN (SELECT int_col FROM {tbl} WHERE string_col = 'c')", + "outputs": [ + [3] + ] + }, + { + "description": "Semi join using IN clause with LONG", + "sql": "SELECT long_col FROM {tbl} WHERE long_col IN (SELECT long_col FROM {tbl} WHERE string_col = 'c')", + "outputs": [ + [3] + ] + }, + { + "description": "Semi join using IN clause with FLOAT", + "sql": "SELECT float_col FROM {tbl} WHERE float_col IN (SELECT float_col FROM {tbl} WHERE string_col = 'c')", + "outputs": [ + [3.0] + ] + }, + { + "description": "Semi join using IN clause with DOUBLE", + "sql": "SELECT double_col FROM {tbl} WHERE double_col IN (SELECT double_col FROM {tbl} WHERE string_col = 'c')", + "outputs": [ + [3.0] + ] + }, + { + "description": "Semi join using IN clause with STRING", + "sql": "SELECT string_col FROM {tbl} WHERE string_col IN (SELECT string_col FROM {tbl} WHERE int_col = 3)", + "outputs": [ + ["c"] + ] + }, + { + "description": "Semi join using IN clause with BIG_DECIMAL", + "sql": "SELECT big_decimal_col FROM {tbl} WHERE big_decimal_col IN (SELECT big_decimal_col FROM {tbl} WHERE string_col = 'c')", + "outputs": [ + ["3.0"] + ] + }, + { + "description": "Semi join using IN clause with BYTES", + "sql": "SELECT bytes_col FROM {tbl} WHERE bytes_col IN (SELECT bytes_col FROM {tbl} WHERE string_col = 'c')", + "outputs": [ + ["cc"] + ] + } + ] + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org