This is an automated email from the ASF dual-hosted git repository. somandal pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new ba2013728b Add validations for MV columns: reject MV primary-keys for upsert/dedup, BIG_DECIMAL, and JSON (#16079) ba2013728b is described below commit ba2013728b29178df2033fe5af75d79a75f9e30e Author: Sonam Mandal <sonam.man...@startree.ai> AuthorDate: Wed Jun 11 16:02:23 2025 -0700 Add validations for MV columns: reject MV primary-keys for upsert/dedup, BIG_DECIMAL, and JSON (#16079) * Add validations for MV columns: reject MV primary-keys for upsert/dedup, BIG_DECIMAL, and JSON * Address review comments --- .../apache/pinot/core/util/SchemaUtilsTest.java | 29 +++++++++++++++ .../pinot/segment/local/utils/SchemaUtils.java | 13 +++++++ .../segment/local/utils/TableConfigUtils.java | 8 +++++ .../segment/local/utils/TableConfigUtilsTest.java | 41 ++++++++++++++++++++++ 4 files changed, 91 insertions(+) diff --git a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java index 69f58aa5eb..a37f9fe635 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java @@ -290,6 +290,35 @@ public class SchemaUtilsTest { checkValidationFails(pinotSchema); } + @Test + public void testValidateMultiValueFieldSpec() { + Schema pinotSchema; + + pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) + .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addMultiValueDimension("myJsonCol", FieldSpec.DataType.JSON).build(); + + checkValidationFails(pinotSchema); + + pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) + .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addMultiValueDimension("myBigDecimalCol", FieldSpec.DataType.BIG_DECIMAL).build(); + + checkValidationFails(pinotSchema); + + pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) + .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addSingleValueDimension("myJsonCol", FieldSpec.DataType.JSON).build(); + + SchemaUtils.validate(pinotSchema); + + pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) + .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addSingleValueDimension("myBigDecimalCol", FieldSpec.DataType.BIG_DECIMAL).build(); + + SchemaUtils.validate(pinotSchema); + } + @Test public void testValidateCaseInsensitive() { Schema pinotSchema = new Schema.SchemaBuilder() diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java index 09ebf24063..45e6754052 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java @@ -148,6 +148,9 @@ public class SchemaUtils { validateDefaultIsNotNaN(fieldSpec); } } + if (!fieldSpec.isSingleValueField()) { + validateMultiValueCompatibility(fieldSpec); + } } Preconditions.checkState(Collections.disjoint(transformedColumns, argumentColumns), "Columns: %s are a result of transformations, and cannot be used as arguments to other transform functions", @@ -166,6 +169,16 @@ public class SchemaUtils { fieldSpec.getName()); } + /** + * Validations for MV type columns + */ + private static void validateMultiValueCompatibility(FieldSpec fieldSpec) { + Preconditions.checkState(!fieldSpec.getDataType().equals(FieldSpec.DataType.JSON), + "JSON columns cannot be of multi-value type"); + Preconditions.checkState(!fieldSpec.getDataType().equals(FieldSpec.DataType.BIG_DECIMAL), + "BIG_DECIMAL columns cannot be of multi-value type"); + } + /** * Validates that the schema is compatible with the given table config */ diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java index c1edee398f..3901c4e5a8 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java @@ -707,6 +707,14 @@ public final class TableConfigUtils { // primary key exists Preconditions.checkState(CollectionUtils.isNotEmpty(schema.getPrimaryKeyColumns()), "Upsert/Dedup table must have primary key columns in the schema"); + // primary key columns are not of multi-value type + for (String primaryKeyColumn : schema.getPrimaryKeyColumns()) { + FieldSpec fieldSpec = schema.getFieldSpecFor(primaryKeyColumn); + if (fieldSpec != null) { + Preconditions.checkState(fieldSpec.isSingleValueField(), + String.format("Upsert/Dedup primary key column: %s cannot be of multi-value type", primaryKeyColumn)); + } + } // replica group is configured for routing Preconditions.checkState( tableConfig.getRoutingConfig() != null && isRoutingStrategyAllowedForUpsert(tableConfig.getRoutingConfig()), diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java index 33c6737cc9..ce14be01ef 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java @@ -1709,6 +1709,20 @@ public class TableConfigUtilsTest { Assert.assertEquals(e.getMessage(), "Upsert/Dedup table must have primary key columns in the schema"); } + schema = + new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addMultiValueDimension("myCol", FieldSpec.DataType.STRING) + .setPrimaryKeyColumns(Lists.newArrayList("myCol")).build(); + tableConfig = new TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME) + .setDedupConfig(new DedupConfig()) + .build(); + try { + TableConfigUtils.validateUpsertAndDedupConfig(tableConfig, schema); + Assert.fail(); + } catch (IllegalStateException e) { + Assert.assertEquals(e.getMessage(), + "Upsert/Dedup primary key column: myCol cannot be of multi-value type"); + } + schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addSingleValueDimension("myCol", FieldSpec.DataType.STRING) .setPrimaryKeyColumns(Lists.newArrayList("myCol")).build(); @@ -1932,6 +1946,7 @@ public class TableConfigUtilsTest { String invalidCol = "invalidCol"; schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol")) .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING) .addSingleValueDimension(stringTypeDelCol, FieldSpec.DataType.STRING) .addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN) .addSingleValueDimension(timestampCol, FieldSpec.DataType.TIMESTAMP) @@ -2001,10 +2016,33 @@ public class TableConfigUtilsTest { Assert.assertEquals(e.getMessage(), "The deleteRecordColumn - mvCol must be a single-valued column"); } + schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol")) + .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addMultiValueDimension("myPkCol", FieldSpec.DataType.STRING) + .addSingleValueDimension(stringTypeDelCol, FieldSpec.DataType.STRING) + .addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN) + .addSingleValueDimension(timestampCol, FieldSpec.DataType.TIMESTAMP) + .addMultiValueDimension(mvCol, FieldSpec.DataType.STRING).build(); + streamConfigs = getStreamConfigs(); + + upsertConfig = new UpsertConfig(UpsertConfig.Mode.FULL); + upsertConfig.setDeleteRecordColumn(stringTypeDelCol); + tableConfig = new TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).setStreamConfigs(streamConfigs) + .setUpsertConfig(upsertConfig).setRoutingConfig( + new RoutingConfig(null, null, RoutingConfig.STRICT_REPLICA_GROUP_INSTANCE_SELECTOR_TYPE, false)) + .build(); + try { + TableConfigUtils.validateUpsertAndDedupConfig(tableConfig, schema); + Assert.fail("Should fail table creation when PK column type is multi-valued."); + } catch (IllegalStateException e) { + Assert.assertEquals(e.getMessage(), "Upsert/Dedup primary key column: myPkCol cannot be of multi-value type"); + } + // upsert deleted-keys-ttl configs with no deleted column schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol")) .addDateTime(TIME_COLUMN, FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS") .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING) .addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN).build(); upsertConfig = new UpsertConfig(UpsertConfig.Mode.FULL); upsertConfig.setDeletedKeysTTL(3600); @@ -2021,6 +2059,7 @@ public class TableConfigUtilsTest { // multiple comparison columns set for deleted-keys-ttl schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol")) .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING) .addDateTime(TIME_COLUMN, FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS") .addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN).build(); upsertConfig.setComparisonColumns(Lists.newArrayList(TIME_COLUMN, "myCol")); @@ -2058,6 +2097,7 @@ public class TableConfigUtilsTest { boolean dropOutOfOrderRecord = true; schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol")) .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING) .addSingleValueDimension(outOfOrderRecordColumn, FieldSpec.DataType.BOOLEAN).build(); streamConfigs = getStreamConfigs(); @@ -2078,6 +2118,7 @@ public class TableConfigUtilsTest { // outOfOrderRecordColumn not of type BOOLEAN schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol")) .addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING) .addSingleValueDimension(outOfOrderRecordColumn, FieldSpec.DataType.STRING).build(); streamConfigs = getStreamConfigs(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org