This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new ff0db32e49 Enhance index and field config validation to block adding bloomfilter on boolean column (#15283) ff0db32e49 is described below commit ff0db32e491aee6d89e4cdd500006f0532ec5ea5 Author: ayesheepatra07 <ayeshee.pa...@startree.ai> AuthorDate: Wed Mar 26 17:34:21 2025 -0700 Enhance index and field config validation to block adding bloomfilter on boolean column (#15283) --- .../src/test/resources/TableIndexingTest.csv | 4 +-- .../segment/local/utils/TableConfigUtils.java | 11 ++++++++ .../segment/local/utils/TableConfigUtilsTest.java | 30 ++++++++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/pinot-core/src/test/resources/TableIndexingTest.csv b/pinot-core/src/test/resources/TableIndexingTest.csv index 0c3891be1c..d3b046a908 100644 --- a/pinot-core/src/test/resources/TableIndexingTest.csv +++ b/pinot-core/src/test/resources/TableIndexingTest.csv @@ -198,7 +198,7 @@ DECIMAL;sv_BIG;dict;range_index;true; DECIMAL;sv_BIG;dict;startree_index;true; DECIMAL;sv_BIG;dict;vector_index;false;Vector index is currently only supported on float array columns BOOLEAN;sv;raw;timestamp_index;true; -BOOLEAN;sv;raw;bloom_filter;true; +BOOLEAN;sv;raw;bloom_filter;false;Cannot create a bloom filter on boolean column col BOOLEAN;sv;raw;fst_index;false;Cannot create FST index on column: col, it can only be applied to dictionary encoded single value string columns BOOLEAN;sv;raw;h3_index;false;H3 index is currently only supported on BYTES columns BOOLEAN;sv;raw;inverted_index;false;Cannot create inverted index for raw index column: col @@ -209,7 +209,7 @@ BOOLEAN;sv;raw;range_index;false;Unsupported data type BOOLEAN for range index BOOLEAN;sv;raw;startree_index;false;Dimension: col does not have dictionary BOOLEAN;sv;raw;vector_index;false;Vector index is currently only supported on float array columns BOOLEAN;mv;raw;timestamp_index;false;Caught exception while reading data -BOOLEAN;mv;raw;bloom_filter;true; +BOOLEAN;mv;raw;bloom_filter;false;Cannot create a bloom filter on boolean column col BOOLEAN;mv;raw;fst_index;false;Cannot create FST index on column: col, it can only be applied to dictionary encoded single value string columns BOOLEAN;mv;raw;h3_index;false;H3 index is currently only supported on single-value columns BOOLEAN;mv;raw;inverted_index;false;Cannot create inverted index for raw index column: col diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java index 9b1c484003..8e17162312 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java @@ -1094,6 +1094,11 @@ public final class TableConfigUtils { } } + for (String bloomFilterColumn : bloomFilterColumns) { + Preconditions.checkState(schema.getFieldSpecFor(bloomFilterColumn).getDataType() != FieldSpec.DataType.BOOLEAN, + "Cannot create bloom filter on BOOLEAN column: " + bloomFilterColumn); + } + for (String jsonIndexColumn : jsonIndexColumns) { FieldSpec fieldSpec = schema.getFieldSpecFor(jsonIndexColumn); Preconditions.checkState( @@ -1235,6 +1240,12 @@ public final class TableConfigUtils { // Validate the forward index disabled compatibility with other indexes if enabled for this column validateForwardIndexDisabledIndexCompatibility(columnName, fieldConfig, indexingConfig, schema, tableType); + // Validate bloom filter is not added to boolean column + if (fieldConfig.getIndexes() != null && fieldConfig.getIndexes().has("bloom")) { + Preconditions.checkState(fieldSpec.getDataType() != FieldSpec.DataType.BOOLEAN, + "Cannot create a bloom filter on boolean column " + columnName); + } + if (CollectionUtils.isNotEmpty(fieldConfig.getIndexTypes())) { for (FieldConfig.IndexType indexType : fieldConfig.getIndexTypes()) { switch (indexType) { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java index b3bf9d3e1a..12489e38fd 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java @@ -18,6 +18,8 @@ */ package org.apache.pinot.segment.local.utils; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import java.util.Arrays; @@ -30,6 +32,7 @@ import org.apache.pinot.common.tier.TierFactory; import org.apache.pinot.segment.spi.AggregationFunctionType; import org.apache.pinot.segment.spi.Constants; import org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair; +import org.apache.pinot.spi.config.table.BloomFilterConfig; import org.apache.pinot.spi.config.table.ColumnPartitionConfig; import org.apache.pinot.spi.config.table.DedupConfig; import org.apache.pinot.spi.config.table.FieldConfig; @@ -73,6 +76,8 @@ import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.Test; +import static org.testng.Assert.assertThrows; + /** * Tests for the validations in {@link TableConfigUtils} @@ -1321,6 +1326,31 @@ public class TableConfigUtilsTest { } } + @Test + public void testValidateBFOnBoolean() { + Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) + .addSingleValueDimension("myCol", FieldSpec.DataType.BOOLEAN) + .addSingleValueDimension("mycol2", FieldSpec.DataType.STRING).build(); + + TableConfig tableconfig1 = new TableConfigBuilder(TableType.REALTIME) + .setTableName(TABLE_NAME).setBloomFilterColumns(Arrays.asList("mycol")).build(); + assertThrows(IllegalStateException.class, () -> TableConfigUtils.validate(tableconfig1, schema)); + + TableConfig tableconfig2 = new TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).build(); + tableconfig2.getIndexingConfig().setBloomFilterConfigs( + Collections.singletonMap("myCol", new BloomFilterConfig(0.01, 1000, true))); + assertThrows(IllegalStateException.class, () -> TableConfigUtils.validate(tableconfig2, schema)); + + TableConfig tableconfig3 = new TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).build(); + ObjectNode indexesNode = JsonNodeFactory.instance.objectNode(); + indexesNode.putObject("bloom"); + FieldConfig fieldConfig = new FieldConfig( + "MyCol", FieldConfig.EncodingType.DICTIONARY, null, null, null, + null, indexesNode, null, null); + tableconfig3.setFieldConfigList(Arrays.asList(fieldConfig)); + assertThrows(IllegalStateException.class, () -> TableConfigUtils.validate(tableconfig3, schema)); + } + @Test public void testValidateIndexingConfig() { Schema schema = --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org