This is an automated email from the ASF dual-hosted git repository.

yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 1515f49c4c Fix semi-join / pipeline breaker on BYTES and BIG_DECIMAL 
(#15531)
1515f49c4c is described below

commit 1515f49c4cc5c761e07bc7c4bf914173ae3e018e
Author: Yash Mayya <yash.ma...@gmail.com>
AuthorDate: Mon Apr 14 15:12:05 2025 +0100

    Fix semi-join / pipeline breaker on BYTES and BIG_DECIMAL (#15531)
---
 .../plan/server/ServerPlanRequestUtils.java        | 24 ++++++-
 .../src/test/resources/queries/SemiJoins.json      | 75 ++++++++++++++++++++++
 2 files changed, 98 insertions(+), 1 deletion(-)

diff --git 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java
 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java
index 7e6f925e30..adcae1f30b 100644
--- 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java
+++ 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java
@@ -20,6 +20,7 @@ package org.apache.pinot.query.runtime.plan.server;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
+import java.math.BigDecimal;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -54,6 +55,7 @@ import 
org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
 import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.query.QueryThreadContext;
+import org.apache.pinot.spi.utils.ByteArray;
 import org.apache.pinot.spi.utils.CommonConstants;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
 import org.apache.pinot.sql.FilterKind;
@@ -357,8 +359,28 @@ public class ServerPlanRequestUtils {
           
expressions.add(RequestUtils.getLiteralExpression(arrString[rowIdx]));
         }
         break;
+      case BIG_DECIMAL:
+        BigDecimal[] arrBigDecimal = new BigDecimal[numRows];
+        for (int rowIdx = 0; rowIdx < numRows; rowIdx++) {
+          arrBigDecimal[rowIdx] = (BigDecimal) 
dataContainer.get(rowIdx)[colIdx];
+        }
+        Arrays.sort(arrBigDecimal);
+        for (int rowIdx = 0; rowIdx < numRows; rowIdx++) {
+          
expressions.add(RequestUtils.getLiteralExpression(arrBigDecimal[rowIdx]));
+        }
+        break;
+      case BYTES:
+        ByteArray[] arrBytes = new ByteArray[numRows];
+        for (int rowIdx = 0; rowIdx < numRows; rowIdx++) {
+          arrBytes[rowIdx] = (ByteArray) dataContainer.get(rowIdx)[colIdx];
+        }
+        Arrays.sort(arrBytes);
+        for (int rowIdx = 0; rowIdx < numRows; rowIdx++) {
+          
expressions.add(RequestUtils.getLiteralExpression(arrBytes[rowIdx].getBytes()));
+        }
+        break;
       default:
-        throw new IllegalStateException("Illegal SV data type for ID_SET 
aggregation function: " + storedType);
+        throw new IllegalStateException("Illegal SV data type for IN filter: " 
+ storedType);
     }
     return expressions;
   }
diff --git a/pinot-query-runtime/src/test/resources/queries/SemiJoins.json 
b/pinot-query-runtime/src/test/resources/queries/SemiJoins.json
new file mode 100644
index 0000000000..4dc851aca9
--- /dev/null
+++ b/pinot-query-runtime/src/test/resources/queries/SemiJoins.json
@@ -0,0 +1,75 @@
+{
+  "semi_join_queries": {
+    "tables": {
+      "tbl": {
+        "schema": [
+          {"name": "int_col", "type": "INT"},
+          {"name": "long_col", "type": "LONG"},
+          {"name": "float_col", "type": "FLOAT"},
+          {"name": "double_col", "type": "DOUBLE"},
+          {"name": "string_col", "type": "STRING"},
+          {"name": "big_decimal_col", "type": "BIG_DECIMAL"},
+          {"name": "bytes_col", "type": "BYTES"}
+        ],
+        "inputs": [
+          [1, 1, 1.0, 1.0, "a", 1.0, "AA"],
+          [2, 2, 2.0, 2.0, "b", 2.0, "BB"],
+          [3, 3, 3.0, 3.0, "c", 3.0, "CC"],
+          [4, 4, 4.0, 4.0, "d", 4.0, "DD"],
+          [5, 5, 5.0, 5.0, "e", 5.0, "EE"]
+        ]
+      }
+    },
+    "queries": [
+      {
+        "description": "Semi join using IN clause with INT",
+        "sql": "SELECT int_col FROM {tbl} WHERE int_col IN (SELECT int_col 
FROM {tbl} WHERE string_col = 'c')",
+        "outputs": [
+          [3]
+        ]
+      },
+      {
+        "description": "Semi join using IN clause with LONG",
+        "sql": "SELECT long_col FROM {tbl} WHERE long_col IN (SELECT long_col 
FROM {tbl} WHERE string_col = 'c')",
+        "outputs": [
+          [3]
+        ]
+      },
+      {
+        "description": "Semi join using IN clause with FLOAT",
+        "sql": "SELECT float_col FROM {tbl} WHERE float_col IN (SELECT 
float_col FROM {tbl} WHERE string_col = 'c')",
+        "outputs": [
+          [3.0]
+        ]
+      },
+      {
+        "description": "Semi join using IN clause with DOUBLE",
+        "sql": "SELECT double_col FROM {tbl} WHERE double_col IN (SELECT 
double_col FROM {tbl} WHERE string_col = 'c')",
+        "outputs": [
+          [3.0]
+        ]
+      },
+      {
+        "description": "Semi join using IN clause with STRING",
+        "sql": "SELECT string_col FROM {tbl} WHERE string_col IN (SELECT 
string_col FROM {tbl} WHERE int_col = 3)",
+        "outputs": [
+          ["c"]
+        ]
+      },
+      {
+        "description": "Semi join using IN clause with BIG_DECIMAL",
+        "sql": "SELECT big_decimal_col FROM {tbl} WHERE big_decimal_col IN 
(SELECT big_decimal_col FROM {tbl} WHERE string_col = 'c')",
+        "outputs": [
+          ["3.0"]
+        ]
+      },
+      {
+        "description": "Semi join using IN clause with BYTES",
+        "sql": "SELECT bytes_col FROM {tbl} WHERE bytes_col IN (SELECT 
bytes_col FROM {tbl} WHERE string_col = 'c')",
+        "outputs": [
+          ["cc"]
+        ]
+      }
+    ]
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to