This is an automated email from the ASF dual-hosted git repository.

somandal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new ba2013728b Add validations for MV columns: reject MV primary-keys for 
upsert/dedup, BIG_DECIMAL, and JSON (#16079)
ba2013728b is described below

commit ba2013728b29178df2033fe5af75d79a75f9e30e
Author: Sonam Mandal <sonam.man...@startree.ai>
AuthorDate: Wed Jun 11 16:02:23 2025 -0700

    Add validations for MV columns: reject MV primary-keys for upsert/dedup, 
BIG_DECIMAL, and JSON (#16079)
    
    * Add validations for MV columns: reject MV primary-keys for upsert/dedup, 
BIG_DECIMAL, and JSON
    
    * Address review comments
---
 .../apache/pinot/core/util/SchemaUtilsTest.java    | 29 +++++++++++++++
 .../pinot/segment/local/utils/SchemaUtils.java     | 13 +++++++
 .../segment/local/utils/TableConfigUtils.java      |  8 +++++
 .../segment/local/utils/TableConfigUtilsTest.java  | 41 ++++++++++++++++++++++
 4 files changed, 91 insertions(+)

diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java 
b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
index 69f58aa5eb..a37f9fe635 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
@@ -290,6 +290,35 @@ public class SchemaUtilsTest {
     checkValidationFails(pinotSchema);
   }
 
+  @Test
+  public void testValidateMultiValueFieldSpec() {
+    Schema pinotSchema;
+
+    pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+        .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addMultiValueDimension("myJsonCol", FieldSpec.DataType.JSON).build();
+
+    checkValidationFails(pinotSchema);
+
+    pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+        .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addMultiValueDimension("myBigDecimalCol", 
FieldSpec.DataType.BIG_DECIMAL).build();
+
+    checkValidationFails(pinotSchema);
+
+    pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+        .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addSingleValueDimension("myJsonCol", FieldSpec.DataType.JSON).build();
+
+    SchemaUtils.validate(pinotSchema);
+
+    pinotSchema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+        .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addSingleValueDimension("myBigDecimalCol", 
FieldSpec.DataType.BIG_DECIMAL).build();
+
+    SchemaUtils.validate(pinotSchema);
+  }
+
   @Test
   public void testValidateCaseInsensitive() {
     Schema pinotSchema = new Schema.SchemaBuilder()
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
index 09ebf24063..45e6754052 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
@@ -148,6 +148,9 @@ public class SchemaUtils {
           validateDefaultIsNotNaN(fieldSpec);
         }
       }
+      if (!fieldSpec.isSingleValueField()) {
+        validateMultiValueCompatibility(fieldSpec);
+      }
     }
     Preconditions.checkState(Collections.disjoint(transformedColumns, 
argumentColumns),
         "Columns: %s are a result of transformations, and cannot be used as 
arguments to other transform functions",
@@ -166,6 +169,16 @@ public class SchemaUtils {
             fieldSpec.getName());
   }
 
+  /**
+   * Validations for MV type columns
+   */
+  private static void validateMultiValueCompatibility(FieldSpec fieldSpec) {
+    
Preconditions.checkState(!fieldSpec.getDataType().equals(FieldSpec.DataType.JSON),
+        "JSON columns cannot be of multi-value type");
+    
Preconditions.checkState(!fieldSpec.getDataType().equals(FieldSpec.DataType.BIG_DECIMAL),
+        "BIG_DECIMAL columns cannot be of multi-value type");
+  }
+
   /**
    * Validates that the schema is compatible with the given table config
    */
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
index c1edee398f..3901c4e5a8 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
@@ -707,6 +707,14 @@ public final class TableConfigUtils {
     // primary key exists
     
Preconditions.checkState(CollectionUtils.isNotEmpty(schema.getPrimaryKeyColumns()),
         "Upsert/Dedup table must have primary key columns in the schema");
+    // primary key columns are not of multi-value type
+    for (String primaryKeyColumn : schema.getPrimaryKeyColumns()) {
+      FieldSpec fieldSpec = schema.getFieldSpecFor(primaryKeyColumn);
+      if (fieldSpec != null) {
+        Preconditions.checkState(fieldSpec.isSingleValueField(),
+            String.format("Upsert/Dedup primary key column: %s cannot be of 
multi-value type", primaryKeyColumn));
+      }
+    }
     // replica group is configured for routing
     Preconditions.checkState(
         tableConfig.getRoutingConfig() != null && 
isRoutingStrategyAllowedForUpsert(tableConfig.getRoutingConfig()),
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
index 33c6737cc9..ce14be01ef 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
@@ -1709,6 +1709,20 @@ public class TableConfigUtilsTest {
       Assert.assertEquals(e.getMessage(), "Upsert/Dedup table must have 
primary key columns in the schema");
     }
 
+    schema =
+        new 
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addMultiValueDimension("myCol",
 FieldSpec.DataType.STRING)
+            .setPrimaryKeyColumns(Lists.newArrayList("myCol")).build();
+    tableConfig = new 
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME)
+        .setDedupConfig(new DedupConfig())
+        .build();
+    try {
+      TableConfigUtils.validateUpsertAndDedupConfig(tableConfig, schema);
+      Assert.fail();
+    } catch (IllegalStateException e) {
+      Assert.assertEquals(e.getMessage(),
+          "Upsert/Dedup primary key column: myCol cannot be of multi-value 
type");
+    }
+
     schema =
         new 
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addSingleValueDimension("myCol",
 FieldSpec.DataType.STRING)
             .setPrimaryKeyColumns(Lists.newArrayList("myCol")).build();
@@ -1932,6 +1946,7 @@ public class TableConfigUtilsTest {
     String invalidCol = "invalidCol";
     schema = new 
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
         .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
         .addSingleValueDimension(stringTypeDelCol, FieldSpec.DataType.STRING)
         .addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN)
         .addSingleValueDimension(timestampCol, FieldSpec.DataType.TIMESTAMP)
@@ -2001,10 +2016,33 @@ public class TableConfigUtilsTest {
       Assert.assertEquals(e.getMessage(), "The deleteRecordColumn - mvCol must 
be a single-valued column");
     }
 
+    schema = new 
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
+        .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addMultiValueDimension("myPkCol", FieldSpec.DataType.STRING)
+        .addSingleValueDimension(stringTypeDelCol, FieldSpec.DataType.STRING)
+        .addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN)
+        .addSingleValueDimension(timestampCol, FieldSpec.DataType.TIMESTAMP)
+        .addMultiValueDimension(mvCol, FieldSpec.DataType.STRING).build();
+    streamConfigs = getStreamConfigs();
+
+    upsertConfig = new UpsertConfig(UpsertConfig.Mode.FULL);
+    upsertConfig.setDeleteRecordColumn(stringTypeDelCol);
+    tableConfig = new 
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).setStreamConfigs(streamConfigs)
+        .setUpsertConfig(upsertConfig).setRoutingConfig(
+            new RoutingConfig(null, null, 
RoutingConfig.STRICT_REPLICA_GROUP_INSTANCE_SELECTOR_TYPE, false))
+        .build();
+    try {
+      TableConfigUtils.validateUpsertAndDedupConfig(tableConfig, schema);
+      Assert.fail("Should fail table creation when PK column type is 
multi-valued.");
+    } catch (IllegalStateException e) {
+      Assert.assertEquals(e.getMessage(), "Upsert/Dedup primary key column: 
myPkCol cannot be of multi-value type");
+    }
+
     // upsert deleted-keys-ttl configs with no deleted column
     schema = new 
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
         .addDateTime(TIME_COLUMN, FieldSpec.DataType.LONG, 
"1:MILLISECONDS:EPOCH", "1:MILLISECONDS")
         .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
         .addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN).build();
     upsertConfig = new UpsertConfig(UpsertConfig.Mode.FULL);
     upsertConfig.setDeletedKeysTTL(3600);
@@ -2021,6 +2059,7 @@ public class TableConfigUtilsTest {
     // multiple comparison columns set for deleted-keys-ttl
     schema = new 
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
         .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
         .addDateTime(TIME_COLUMN, FieldSpec.DataType.LONG, 
"1:MILLISECONDS:EPOCH", "1:MILLISECONDS")
         .addSingleValueDimension(delCol, FieldSpec.DataType.BOOLEAN).build();
     upsertConfig.setComparisonColumns(Lists.newArrayList(TIME_COLUMN, 
"myCol"));
@@ -2058,6 +2097,7 @@ public class TableConfigUtilsTest {
     boolean dropOutOfOrderRecord = true;
     schema = new 
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
         .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
         .addSingleValueDimension(outOfOrderRecordColumn, 
FieldSpec.DataType.BOOLEAN).build();
     streamConfigs = getStreamConfigs();
 
@@ -2078,6 +2118,7 @@ public class TableConfigUtilsTest {
     // outOfOrderRecordColumn not of type BOOLEAN
     schema = new 
Schema.SchemaBuilder().setSchemaName(TABLE_NAME).setPrimaryKeyColumns(Lists.newArrayList("myPkCol"))
         .addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+        .addSingleValueDimension("myPkCol", FieldSpec.DataType.STRING)
         .addSingleValueDimension(outOfOrderRecordColumn, 
FieldSpec.DataType.STRING).build();
     streamConfigs = getStreamConfigs();
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to