This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new f5f1f8c5aab branch-3.0: [fix](ngram bloomfilter) fix narrow conversion for ngram bf_size (#43645) f5f1f8c5aab is described below commit f5f1f8c5aab17bd44df01e8303ccd7d2c99da2ac Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Tue Nov 12 11:25:09 2024 +0800 branch-3.0: [fix](ngram bloomfilter) fix narrow conversion for ngram bf_size (#43645) Cherry-picked from #43480 Co-authored-by: airborne12 <jiang...@selectdb.com> --- be/src/olap/rowset/segment_v2/segment_writer.cpp | 15 ++++++- .../rowset/segment_v2/vertical_segment_writer.cpp | 15 ++++++- .../java/org/apache/doris/analysis/IndexDef.java | 4 +- .../trees/plans/commands/info/IndexDefinition.java | 4 +- .../index_p0/test_ngram_bloomfilter_index.groovy | 47 ++++++++++++++++++++++ 5 files changed, 77 insertions(+), 8 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 5957a555ba7..c532969baa4 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -192,8 +192,19 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co if (tablet_index) { opts.need_bloom_filter = true; opts.is_ngram_bf_index = true; - opts.gram_size = tablet_index->get_gram_size(); - opts.gram_bf_size = tablet_index->get_gram_bf_size(); + //narrow convert from int32_t to uint8_t and uint16_t which is dangerous + auto gram_size = tablet_index->get_gram_size(); + auto gram_bf_size = tablet_index->get_gram_bf_size(); + if (gram_size > 256 || gram_size < 1) { + return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ", + gram_size); + } + if (gram_bf_size > 65535 || gram_bf_size < 64) { + return Status::NotSupported("Do not support ngram bloom filter for bf_size: ", + gram_bf_size); + } + opts.gram_size = gram_size; + opts.gram_bf_size = gram_bf_size; } opts.need_bitmap_index = column.has_bitmap_index(); diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 9ade9c1bfcc..2cea4c86c09 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -183,8 +183,19 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo if (tablet_index) { opts.need_bloom_filter = true; opts.is_ngram_bf_index = true; - opts.gram_size = tablet_index->get_gram_size(); - opts.gram_bf_size = tablet_index->get_gram_bf_size(); + //narrow convert from int32_t to uint8_t and uint16_t which is dangerous + auto gram_size = tablet_index->get_gram_size(); + auto gram_bf_size = tablet_index->get_gram_bf_size(); + if (gram_size > 256 || gram_size < 1) { + return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ", + gram_size); + } + if (gram_bf_size > 65535 || gram_bf_size < 64) { + return Status::NotSupported("Do not support ngram bloom filter for bf_size: ", + gram_bf_size); + } + opts.gram_size = gram_size; + opts.gram_bf_size = gram_bf_size; } opts.need_bitmap_index = column.has_bitmap_index(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java index b2ee4537297..d98a3b93e45 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java @@ -254,8 +254,8 @@ public class IndexDef { if (ngramSize > 256 || ngramSize < 1) { throw new AnalysisException("gram_size should be integer and less than 256"); } - if (bfSize > 65536 || bfSize < 64) { - throw new AnalysisException("bf_size should be integer and between 64 and 65536"); + if (bfSize > 65535 || bfSize < 64) { + throw new AnalysisException("bf_size should be integer and between 64 and 65535"); } } catch (NumberFormatException e) { throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java index 340ea581504..61f2c874fd7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java @@ -141,9 +141,9 @@ public class IndexDefinition { throw new AnalysisException( "gram_size should be integer and less than 256"); } - if (bfSize > 65536 || bfSize < 64) { + if (bfSize > 65535 || bfSize < 64) { throw new AnalysisException( - "bf_size should be integer and between 64 and 65536"); + "bf_size should be integer and between 64 and 65535"); } } catch (NumberFormatException e) { throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e); diff --git a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy index c56eed967a0..e2ab9b9c117 100644 --- a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy +++ b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy @@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") { qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = '/%/7212503657802320699%' ORDER BY key_id" qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%') ORDER BY key_id" qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like '/%/7212503657802320699%' ORDER BY key_id" + + //case for bf_size 65536 + def tableName2 = 'test_ngram_bloomfilter_index2' + sql "DROP TABLE IF EXISTS ${tableName2}" + test { + sql """ + CREATE TABLE IF NOT EXISTS ${tableName2} ( + `key_id` bigint(20) NULL COMMENT '', + `category` varchar(200) NULL COMMENT '', + `https_url` varchar(300) NULL COMMENT '', + `hostname` varchar(300) NULL, + `http_url` text NULL COMMENT '', + `url_path` varchar(2000) NULL COMMENT '', + `cnt` bigint(20) NULL COMMENT '', + `host_flag` boolean NULL COMMENT '', + INDEX idx_ngrambf (`http_url`) USING NGRAM_BF PROPERTIES("gram_size" = "2", "bf_size" = "65536") + ) ENGINE=OLAP + DUPLICATE KEY(`key_id`, `category`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`key_id`) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + exception "bf_size should be integer and between 64 and 65535" + } + + def tableName3 = 'test_ngram_bloomfilter_index3' + sql "DROP TABLE IF EXISTS ${tableName3}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName3} ( + `key_id` bigint(20) NULL COMMENT '', + `category` varchar(200) NULL COMMENT '', + `https_url` varchar(300) NULL COMMENT '', + `hostname` varchar(300) NULL, + `http_url` text NULL COMMENT '', + `url_path` varchar(2000) NULL COMMENT '', + `cnt` bigint(20) NULL COMMENT '', + `host_flag` boolean NULL COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`key_id`, `category`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`key_id`) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + test { + sql """ALTER TABLE ${tableName3} ADD INDEX idx_http_url(http_url) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url ngram_bf index'""" + exception "bf_size should be integer and between 64 and 65535" + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org