This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new ff0db32e49 Enhance index and field config validation to block adding 
bloomfilter on boolean column (#15283)
ff0db32e49 is described below

commit ff0db32e491aee6d89e4cdd500006f0532ec5ea5
Author: ayesheepatra07 <ayeshee.pa...@startree.ai>
AuthorDate: Wed Mar 26 17:34:21 2025 -0700

    Enhance index and field config validation to block adding bloomfilter on 
boolean column (#15283)
---
 .../src/test/resources/TableIndexingTest.csv       |  4 +--
 .../segment/local/utils/TableConfigUtils.java      | 11 ++++++++
 .../segment/local/utils/TableConfigUtilsTest.java  | 30 ++++++++++++++++++++++
 3 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/pinot-core/src/test/resources/TableIndexingTest.csv 
b/pinot-core/src/test/resources/TableIndexingTest.csv
index 0c3891be1c..d3b046a908 100644
--- a/pinot-core/src/test/resources/TableIndexingTest.csv
+++ b/pinot-core/src/test/resources/TableIndexingTest.csv
@@ -198,7 +198,7 @@ DECIMAL;sv_BIG;dict;range_index;true;
 DECIMAL;sv_BIG;dict;startree_index;true;
 DECIMAL;sv_BIG;dict;vector_index;false;Vector index is currently only 
supported on float array columns
 BOOLEAN;sv;raw;timestamp_index;true;
-BOOLEAN;sv;raw;bloom_filter;true;
+BOOLEAN;sv;raw;bloom_filter;false;Cannot create a bloom filter on boolean 
column col
 BOOLEAN;sv;raw;fst_index;false;Cannot create FST index on column: col, it can 
only be applied to dictionary encoded single value string columns
 BOOLEAN;sv;raw;h3_index;false;H3 index is currently only supported on BYTES 
columns
 BOOLEAN;sv;raw;inverted_index;false;Cannot create inverted index for raw index 
column: col
@@ -209,7 +209,7 @@ BOOLEAN;sv;raw;range_index;false;Unsupported data type 
BOOLEAN for range index
 BOOLEAN;sv;raw;startree_index;false;Dimension: col does not have dictionary
 BOOLEAN;sv;raw;vector_index;false;Vector index is currently only supported on 
float array columns
 BOOLEAN;mv;raw;timestamp_index;false;Caught exception while reading data
-BOOLEAN;mv;raw;bloom_filter;true;
+BOOLEAN;mv;raw;bloom_filter;false;Cannot create a bloom filter on boolean 
column col
 BOOLEAN;mv;raw;fst_index;false;Cannot create FST index on column: col, it can 
only be applied to dictionary encoded single value string columns
 BOOLEAN;mv;raw;h3_index;false;H3 index is currently only supported on 
single-value columns
 BOOLEAN;mv;raw;inverted_index;false;Cannot create inverted index for raw index 
column: col
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
index 9b1c484003..8e17162312 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
@@ -1094,6 +1094,11 @@ public final class TableConfigUtils {
       }
     }
 
+    for (String bloomFilterColumn : bloomFilterColumns) {
+      
Preconditions.checkState(schema.getFieldSpecFor(bloomFilterColumn).getDataType()
 != FieldSpec.DataType.BOOLEAN,
+          "Cannot create bloom filter on BOOLEAN column: " + 
bloomFilterColumn);
+    }
+
     for (String jsonIndexColumn : jsonIndexColumns) {
       FieldSpec fieldSpec = schema.getFieldSpecFor(jsonIndexColumn);
       Preconditions.checkState(
@@ -1235,6 +1240,12 @@ public final class TableConfigUtils {
       // Validate the forward index disabled compatibility with other indexes 
if enabled for this column
       validateForwardIndexDisabledIndexCompatibility(columnName, fieldConfig, 
indexingConfig, schema, tableType);
 
+      // Validate bloom filter is not added to boolean column
+      if (fieldConfig.getIndexes() != null && 
fieldConfig.getIndexes().has("bloom")) {
+        Preconditions.checkState(fieldSpec.getDataType() != 
FieldSpec.DataType.BOOLEAN,
+          "Cannot create a bloom filter on boolean column " + columnName);
+      }
+
       if (CollectionUtils.isNotEmpty(fieldConfig.getIndexTypes())) {
         for (FieldConfig.IndexType indexType : fieldConfig.getIndexTypes()) {
           switch (indexType) {
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
index b3bf9d3e1a..12489e38fd 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
@@ -18,6 +18,8 @@
  */
 package org.apache.pinot.segment.local.utils;
 
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import java.util.Arrays;
@@ -30,6 +32,7 @@ import org.apache.pinot.common.tier.TierFactory;
 import org.apache.pinot.segment.spi.AggregationFunctionType;
 import org.apache.pinot.segment.spi.Constants;
 import 
org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair;
+import org.apache.pinot.spi.config.table.BloomFilterConfig;
 import org.apache.pinot.spi.config.table.ColumnPartitionConfig;
 import org.apache.pinot.spi.config.table.DedupConfig;
 import org.apache.pinot.spi.config.table.FieldConfig;
@@ -73,6 +76,8 @@ import org.mockito.Mockito;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
+import static org.testng.Assert.assertThrows;
+
 
 /**
  * Tests for the validations in {@link TableConfigUtils}
@@ -1321,6 +1326,31 @@ public class TableConfigUtilsTest {
     }
   }
 
+  @Test
+  public void testValidateBFOnBoolean() {
+    Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+        .addSingleValueDimension("myCol", FieldSpec.DataType.BOOLEAN)
+        .addSingleValueDimension("mycol2", FieldSpec.DataType.STRING).build();
+
+    TableConfig tableconfig1 = new TableConfigBuilder(TableType.REALTIME)
+        
.setTableName(TABLE_NAME).setBloomFilterColumns(Arrays.asList("mycol")).build();
+    assertThrows(IllegalStateException.class, () -> 
TableConfigUtils.validate(tableconfig1, schema));
+
+    TableConfig tableconfig2 = new 
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).build();
+    tableconfig2.getIndexingConfig().setBloomFilterConfigs(
+        Collections.singletonMap("myCol", new BloomFilterConfig(0.01, 1000, 
true)));
+    assertThrows(IllegalStateException.class, () -> 
TableConfigUtils.validate(tableconfig2, schema));
+
+    TableConfig tableconfig3 = new 
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).build();
+    ObjectNode indexesNode = JsonNodeFactory.instance.objectNode();
+    indexesNode.putObject("bloom");
+    FieldConfig fieldConfig = new FieldConfig(
+        "MyCol", FieldConfig.EncodingType.DICTIONARY, null, null, null,
+        null, indexesNode, null, null);
+    tableconfig3.setFieldConfigList(Arrays.asList(fieldConfig));
+    assertThrows(IllegalStateException.class, () -> 
TableConfigUtils.validate(tableconfig3, schema));
+  }
+
   @Test
   public void testValidateIndexingConfig() {
     Schema schema =


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to