This is an automated email from the ASF dual-hosted git repository.
yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 15759e91916 Introduce query option to exclude virtual columns from
schema for an MSE query (#17047)
15759e91916 is described below
commit 15759e919165e7a15372167d94cc305cd7b7d890
Author: Chaitanya Deepthi <[email protected]>
AuthorDate: Tue Oct 28 11:33:58 2025 -0700
Introduce query option to exclude virtual columns from schema for an MSE
query (#17047)
---
.../tests/MultiStageEngineIntegrationTest.java | 43 ++++++++++++++++------
.../org/apache/pinot/query/QueryEnvironment.java | 5 +++
.../apache/pinot/query/catalog/PinotCatalog.java | 12 +++++-
.../org/apache/pinot/query/catalog/PinotTable.java | 19 +++++++++-
.../org/apache/pinot/query/type/TypeFactory.java | 10 ++++-
.../org/apache/pinot/query/validate/Validator.java | 2 +-
.../apache/pinot/spi/utils/CommonConstants.java | 5 +++
7 files changed, 81 insertions(+), 15 deletions(-)
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
index d91e879289a..7656ce92eb4 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
@@ -94,6 +94,7 @@ public class MultiStageEngineIntegrationTest extends
BaseClusterIntegrationTestS
private static final String DIM_TABLE_SCHEMA_PATH =
"dimDayOfWeek_schema.json";
private static final String DIM_TABLE_TABLE_CONFIG_PATH =
"dimDayOfWeek_config.json";
private static final Integer DIM_NUMBER_OF_RECORDS = 7;
+ private static final String DIM_TABLE = "daysOfWeek";
@Override
protected String getSchemaFileName() {
@@ -144,6 +145,7 @@ public class MultiStageEngineIntegrationTest extends
BaseClusterIntegrationTestS
waitForAllDocsLoaded(600_000L);
setupTableWithNonDefaultDatabase(avroFiles);
+ setupDimensionTable();
}
@Override
@@ -1640,15 +1642,6 @@ public class MultiStageEngineIntegrationTest extends
BaseClusterIntegrationTestS
public void testLookupJoin()
throws Exception {
- Schema lookupTableSchema = createSchema(DIM_TABLE_SCHEMA_PATH);
- addSchema(lookupTableSchema);
- TableConfig tableConfig = createTableConfig(DIM_TABLE_TABLE_CONFIG_PATH);
- TenantConfig tenantConfig = new TenantConfig(getBrokerTenant(),
getServerTenant(), null);
- tableConfig.setTenantConfig(tenantConfig);
- addTableConfig(tableConfig);
- createAndUploadSegmentFromClasspath(tableConfig, lookupTableSchema,
DIM_TABLE_DATA_PATH, FileFormat.CSV,
- DIM_NUMBER_OF_RECORDS, 60_000);
-
// Compare total rows in the primary table with number of rows in the
result of the join with lookup table
String query = "select count(*) from " + getTableName();
JsonNode jsonNode = postQuery(query);
@@ -1671,8 +1664,6 @@ public class MultiStageEngineIntegrationTest extends
BaseClusterIntegrationTestS
}
assertTrue(stages.contains("LOOKUP_JOIN"), "Could not find LOOKUP_JOIN
stage in the query plan");
assertFalse(stages.contains("HASH_JOIN"), "HASH_JOIN stage should not be
present in the query plan");
-
- dropOfflineTable(tableConfig.getTableName());
}
public void testSearchLiteralFilter()
@@ -2109,11 +2100,41 @@ public class MultiStageEngineIntegrationTest extends
BaseClusterIntegrationTestS
return postQuery(query, headers);
}
+ private void setupDimensionTable() throws Exception {
+ // Set up the dimension table for JOIN tests
+ Schema lookupTableSchema = createSchema(DIM_TABLE_SCHEMA_PATH);
+ addSchema(lookupTableSchema);
+ TableConfig tableConfig = createTableConfig(DIM_TABLE_TABLE_CONFIG_PATH);
+ TenantConfig tenantConfig = new TenantConfig(getBrokerTenant(),
getServerTenant(), null);
+ tableConfig.setTenantConfig(tenantConfig);
+ addTableConfig(tableConfig);
+ createAndUploadSegmentFromClasspath(tableConfig, lookupTableSchema,
DIM_TABLE_DATA_PATH, FileFormat.CSV,
+ DIM_NUMBER_OF_RECORDS, 60_000);
+ }
+
+ @Test
+ public void testNaturalJoinWithVirtualColumns()
+ throws Exception {
+ String query = "SET excludeVirtualColumns=false; SELECT * FROM mytable a
NATURAL JOIN daysOfWeek b LIMIT 5";
+ JsonNode response = postQuery(query);
+ assertNotNull(response.get("exceptions").get(0).get("message"), "Should
have an error message");
+ }
+
+ @Test
+ public void testNaturalJoinWithNoVirtualColumns()
+ throws Exception {
+ String query = "SET excludeVirtualColumns=true; SELECT * FROM mytable
NATURAL JOIN daysOfWeek LIMIT 5";
+ JsonNode response = postQuery(query);
+ assertEquals(response.get("exceptions").get(0), null);
+ assertNotNull(response.get("resultTable"), "Should have result table");
+ }
+
@AfterClass
public void tearDown()
throws Exception {
dropOfflineTable(DEFAULT_TABLE_NAME);
dropOfflineTable(TABLE_NAME_WITH_DATABASE);
+ dropOfflineTable(DIM_TABLE);
stopServer();
stopBroker();
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/QueryEnvironment.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/QueryEnvironment.java
index 5f881322f72..5d1a7ddc232 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/QueryEnvironment.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/QueryEnvironment.java
@@ -92,6 +92,7 @@ import org.apache.pinot.query.validate.BytesCastVisitor;
import org.apache.pinot.spi.exception.QueryErrorCode;
import org.apache.pinot.spi.exception.QueryException;
import org.apache.pinot.spi.utils.CommonConstants;
+import
org.apache.pinot.spi.utils.CommonConstants.Broker.Request.QueryOptionKey;
import org.apache.pinot.sql.parsers.CalciteSqlParser;
import org.apache.pinot.sql.parsers.SqlNodeAndOptions;
import org.apache.pinot.sql.parsers.parser.SqlPhysicalExplain;
@@ -180,6 +181,10 @@ public class QueryEnvironment {
private PlannerContext getPlannerContext(SqlNodeAndOptions
sqlNodeAndOptions) {
WorkerManager workerManager = getWorkerManager(sqlNodeAndOptions);
Map<String, String> options = sqlNodeAndOptions.getOptions();
+ if
(Boolean.parseBoolean(options.get(QueryOptionKey.EXCLUDE_VIRTUAL_COLUMNS))) {
+ _catalog.configureVirtualColumnExclusion(true);
+ }
+
HepProgram optProgram = _optProgram;
Set<String> useRuleSet = QueryOptionsUtils.getUsePlannerRules(options);
if (MapUtils.isNotEmpty(options)) {
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/catalog/PinotCatalog.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/catalog/PinotCatalog.java
index c4ba328cee3..981a4e3aa17 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/catalog/PinotCatalog.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/catalog/PinotCatalog.java
@@ -48,6 +48,7 @@ public class PinotCatalog implements Schema {
private final TableCache _tableCache;
private final String _databaseName;
+ private boolean _excludeVirtualColumns = false;
/**
* PinotCatalog needs have access to the actual {@link TableCache} object
because TableCache hosts the actual
@@ -58,6 +59,15 @@ public class PinotCatalog implements Schema {
_databaseName = databaseName;
}
+ /**
+ * Configures whether virtual columns should be excluded from table schemas.
+ * This is typically used for NATURAL JOIN operations where virtual columns
+ * should not participate in join condition matching.
+ */
+ public void configureVirtualColumnExclusion(boolean excludeVirtualColumns) {
+ _excludeVirtualColumns = excludeVirtualColumns;
+ }
+
/**
* Acquire a table by its name.
* @param name name of the table.
@@ -82,7 +92,7 @@ public class PinotCatalog implements Schema {
return null;
}
- return new PinotTable(schema);
+ return new PinotTable(schema, _excludeVirtualColumns);
}
/**
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/catalog/PinotTable.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/catalog/PinotTable.java
index aee8ea85733..5b186dd6d52 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/catalog/PinotTable.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/catalog/PinotTable.java
@@ -25,6 +25,7 @@ import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.schema.ScannableTable;
import org.apache.calcite.schema.impl.AbstractTable;
import org.apache.pinot.query.type.TypeFactory;
+import org.apache.pinot.query.validate.Validator;
import org.apache.pinot.spi.data.Schema;
@@ -36,9 +37,20 @@ import org.apache.pinot.spi.data.Schema;
*/
public class PinotTable extends AbstractTable implements ScannableTable {
private Schema _schema;
+ private boolean _excludeVirtualColumns = false;
public PinotTable(Schema schema) {
+ this(schema, false);
+ }
+
+ /**
+ * Constructor with option to exclude virtual columns.
+ * This is typically used for NATURAL JOIN operations where virtual columns
+ * should not participate in join condition matching.
+ */
+ public PinotTable(Schema schema, boolean excludeVirtualColumns) {
_schema = schema;
+ _excludeVirtualColumns = excludeVirtualColumns;
}
@Override
@@ -49,7 +61,12 @@ public class PinotTable extends AbstractTable implements
ScannableTable {
} else { // this can happen when using Frameworks.withPrepare, which wraps
our factory in a JavaTypeFactoryImpl
typeFactory = TypeFactory.INSTANCE;
}
- return typeFactory.createRelDataTypeFromSchema(_schema);
+
+ if (_excludeVirtualColumns) {
+ return typeFactory.createRelDataTypeFromSchema(_schema,
Validator::isVirtualColumn);
+ } else {
+ return typeFactory.createRelDataTypeFromSchema(_schema);
+ }
}
@Override
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
index 8a1450b5842..7a6c120b82d 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
@@ -21,6 +21,7 @@ package org.apache.pinot.query.type;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Map;
+import java.util.function.Predicate;
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.sql.type.SqlTypeName;
@@ -53,10 +54,17 @@ public class TypeFactory extends JavaTypeFactoryImpl {
}
public RelDataType createRelDataTypeFromSchema(Schema schema) {
+ return createRelDataTypeFromSchema(schema, column -> false);
+ }
+
+ public RelDataType createRelDataTypeFromSchema(Schema schema,
Predicate<String> shouldExclude) {
Builder builder = new Builder(this);
boolean enableNullHandling = schema.isEnableColumnBasedNullHandling();
for (Map.Entry<String, FieldSpec> entry :
schema.getFieldSpecMap().entrySet()) {
- builder.add(entry.getKey(), toRelDataType(entry.getValue(),
enableNullHandling));
+ String columnName = entry.getKey();
+ if (!shouldExclude.test(columnName)) {
+ builder.add(columnName, toRelDataType(entry.getValue(),
enableNullHandling));
+ }
}
return builder.build();
}
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/Validator.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/Validator.java
index 4fad84cd836..55b70607867 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/Validator.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/Validator.java
@@ -120,7 +120,7 @@ public class Validator extends SqlValidatorImpl {
* @param columnName column name
* @return true if the column is a virtual column
*/
- private static boolean isVirtualColumn(String columnName) {
+ public static boolean isVirtualColumn(String columnName) {
return columnName.length() > 0 && columnName.charAt(0) == '$';
}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
index 5c7a84f4e12..10953f52893 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
@@ -733,6 +733,11 @@ public class CommonConstants {
public static final String DROP_RESULTS = "dropResults";
+ // Exclude virtual columns (columns starting with '$') from table
schema
+ // This is typically used for NATURAL JOIN operations where virtual
columns
+ // should not participate in join condition matching. Can only be used
in MSE as of now
+ public static final String EXCLUDE_VIRTUAL_COLUMNS =
"excludeVirtualColumns";
+
// Maximum number of pending results blocks allowed in the streaming
operator
public static final String MAX_STREAMING_PENDING_BLOCKS =
"maxStreamingPendingBlocks";
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]