This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new f5f1f8c5aab branch-3.0: [fix](ngram bloomfilter) fix narrow conversion 
for ngram bf_size (#43645)
f5f1f8c5aab is described below

commit f5f1f8c5aab17bd44df01e8303ccd7d2c99da2ac
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 12 11:25:09 2024 +0800

    branch-3.0: [fix](ngram bloomfilter) fix narrow conversion for ngram 
bf_size (#43645)
    
    Cherry-picked from #43480
    
    Co-authored-by: airborne12 <jiang...@selectdb.com>
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp   | 15 ++++++-
 .../rowset/segment_v2/vertical_segment_writer.cpp  | 15 ++++++-
 .../java/org/apache/doris/analysis/IndexDef.java   |  4 +-
 .../trees/plans/commands/info/IndexDefinition.java |  4 +-
 .../index_p0/test_ngram_bloomfilter_index.groovy   | 47 ++++++++++++++++++++++
 5 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 5957a555ba7..c532969baa4 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -192,8 +192,19 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, 
const TabletColumn& co
     if (tablet_index) {
         opts.need_bloom_filter = true;
         opts.is_ngram_bf_index = true;
-        opts.gram_size = tablet_index->get_gram_size();
-        opts.gram_bf_size = tablet_index->get_gram_bf_size();
+        //narrow convert from int32_t to uint8_t and uint16_t which is 
dangerous
+        auto gram_size = tablet_index->get_gram_size();
+        auto gram_bf_size = tablet_index->get_gram_bf_size();
+        if (gram_size > 256 || gram_size < 1) {
+            return Status::NotSupported("Do not support ngram bloom filter for 
ngram_size: ",
+                                        gram_size);
+        }
+        if (gram_bf_size > 65535 || gram_bf_size < 64) {
+            return Status::NotSupported("Do not support ngram bloom filter for 
bf_size: ",
+                                        gram_bf_size);
+        }
+        opts.gram_size = gram_size;
+        opts.gram_bf_size = gram_bf_size;
     }
 
     opts.need_bitmap_index = column.has_bitmap_index();
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 9ade9c1bfcc..2cea4c86c09 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -183,8 +183,19 @@ Status 
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
     if (tablet_index) {
         opts.need_bloom_filter = true;
         opts.is_ngram_bf_index = true;
-        opts.gram_size = tablet_index->get_gram_size();
-        opts.gram_bf_size = tablet_index->get_gram_bf_size();
+        //narrow convert from int32_t to uint8_t and uint16_t which is 
dangerous
+        auto gram_size = tablet_index->get_gram_size();
+        auto gram_bf_size = tablet_index->get_gram_bf_size();
+        if (gram_size > 256 || gram_size < 1) {
+            return Status::NotSupported("Do not support ngram bloom filter for 
ngram_size: ",
+                                        gram_size);
+        }
+        if (gram_bf_size > 65535 || gram_bf_size < 64) {
+            return Status::NotSupported("Do not support ngram bloom filter for 
bf_size: ",
+                                        gram_bf_size);
+        }
+        opts.gram_size = gram_size;
+        opts.gram_bf_size = gram_bf_size;
     }
 
     opts.need_bitmap_index = column.has_bitmap_index();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index b2ee4537297..d98a3b93e45 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -254,8 +254,8 @@ public class IndexDef {
                     if (ngramSize > 256 || ngramSize < 1) {
                         throw new AnalysisException("gram_size should be 
integer and less than 256");
                     }
-                    if (bfSize > 65536 || bfSize < 64) {
-                        throw new AnalysisException("bf_size should be integer 
and between 64 and 65536");
+                    if (bfSize > 65535 || bfSize < 64) {
+                        throw new AnalysisException("bf_size should be integer 
and between 64 and 65535");
                     }
                 } catch (NumberFormatException e) {
                     throw new AnalysisException("invalid ngram properties:" + 
e.getMessage(), e);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
index 340ea581504..61f2c874fd7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
@@ -141,9 +141,9 @@ public class IndexDefinition {
                         throw new AnalysisException(
                                 "gram_size should be integer and less than 
256");
                     }
-                    if (bfSize > 65536 || bfSize < 64) {
+                    if (bfSize > 65535 || bfSize < 64) {
                         throw new AnalysisException(
-                                "bf_size should be integer and between 64 and 
65536");
+                                "bf_size should be integer and between 64 and 
65535");
                     }
                 } catch (NumberFormatException e) {
                     throw new AnalysisException("invalid ngram properties:" + 
e.getMessage(), e);
diff --git 
a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy 
b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
index c56eed967a0..e2ab9b9c117 100644
--- a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
+++ b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
@@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") {
     qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = 
'/%/7212503657802320699%' ORDER BY key_id"
     qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN 
('/%/7212503657802320699%') ORDER BY key_id"
     qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like 
'/%/7212503657802320699%' ORDER BY key_id"
+
+    //case for bf_size 65536
+    def tableName2 = 'test_ngram_bloomfilter_index2'
+    sql "DROP TABLE IF EXISTS ${tableName2}"
+    test {
+        sql """
+        CREATE TABLE IF NOT EXISTS ${tableName2} (
+            `key_id` bigint(20) NULL COMMENT '',
+            `category` varchar(200) NULL COMMENT '',
+            `https_url` varchar(300) NULL COMMENT '',
+            `hostname` varchar(300) NULL,
+            `http_url` text NULL COMMENT '',
+            `url_path` varchar(2000) NULL COMMENT '',
+            `cnt` bigint(20) NULL COMMENT '',
+            `host_flag` boolean NULL COMMENT '',
+            INDEX idx_ngrambf (`http_url`) USING NGRAM_BF 
PROPERTIES("gram_size" = "2", "bf_size" = "65536")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`key_id`, `category`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+        """
+        exception "bf_size should be integer and between 64 and 65535"
+    }
+
+    def tableName3 = 'test_ngram_bloomfilter_index3'
+    sql "DROP TABLE IF EXISTS ${tableName3}"
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName3} (
+            `key_id` bigint(20) NULL COMMENT '',
+            `category` varchar(200) NULL COMMENT '',
+            `https_url` varchar(300) NULL COMMENT '',
+            `hostname` varchar(300) NULL,
+            `http_url` text NULL COMMENT '',
+            `url_path` varchar(2000) NULL COMMENT '',
+            `cnt` bigint(20) NULL COMMENT '',
+            `host_flag` boolean NULL COMMENT ''
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`key_id`, `category`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+        """
+    test {
+        sql """ALTER TABLE  ${tableName3} ADD INDEX idx_http_url(http_url) 
USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url 
ngram_bf index'"""
+        exception "bf_size should be integer and between 64 and 65535"
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to