This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new c3c6a4fc4c6 [pick](Variant) variant fallthrough with inverted index 
(#40070)
c3c6a4fc4c6 is described below

commit c3c6a4fc4c6eebb1aefb4681aa628f5b77113154
Author: lihangyu <15605149...@163.com>
AuthorDate: Thu Aug 29 00:30:37 2024 +0800

    [pick](Variant) variant fallthrough with inverted index (#40070)
    
    backport #40069
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp            |  7 ++++++-
 regression-test/data/variant_github_events_p2/load.out        | 10 ++++++++++
 regression-test/suites/variant_github_events_p2/load.groovy   | 10 ++++++----
 regression-test/suites/variant_p0/with_index/load.groovy      |  1 +
 regression-test/suites/variant_p0/with_index/var_index.groovy |  2 ++
 5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index c9eec10e0c4..86476139a44 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1412,10 +1412,15 @@ Status 
SegmentIterator::_init_inverted_index_iterators() {
     }
     for (auto cid : _schema->column_ids()) {
         if (_inverted_index_iterators[cid] == nullptr) {
+            // Not check type valid, since we need to get inverted index for 
related variant type when reading the segment.
+            // If check type valid, we can not get inverted index for variant 
type, and result nullptr.The result for calling
+            // get_inverted_index with variant suffix should return 
corresponding inverted index meta.
+            bool check_inverted_index_by_type = false;
             // Use segment’s own index_meta, for compatibility with future 
indexing needs to default to lowercase.
             RETURN_IF_ERROR(_segment->new_inverted_index_iterator(
                     _opts.tablet_schema->column(cid),
-                    
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid)),
+                    
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid),
+                                                                 
check_inverted_index_by_type),
                     _opts, &_inverted_index_iterators[cid]));
         }
     }
diff --git a/regression-test/data/variant_github_events_p2/load.out 
b/regression-test/data/variant_github_events_p2/load.out
index 8d5e3327e3c..4bee99a71a9 100644
--- a/regression-test/data/variant_github_events_p2/load.out
+++ b/regression-test/data/variant_github_events_p2/load.out
@@ -11,3 +11,13 @@
 5451   
{"actor":{"avatar_url":"https://avatars.githubusercontent.com/u/3437916?","gravatar_id":"","id":3437916,"login":"misol","url":"https://api.github.com/users/misol"},"created_at":"2015-01-01T02:48:28Z","id":"2489433218","org":{"avatar_url":"https://avatars.githubusercontent.com/u/1429259?","gravatar_id":"","id":1429259,"login":"xpressengine","url":"https://api.github.com/orgs/xpressengine"},"payload":{"action":"created","comment":{"body":"Html5
 도 같이 지원하는 업로더였으면 좋겠어요! 구글링 해보면 꽤 나와요 :)" [...]
 5995   
{"actor":{"avatar_url":"https://avatars.githubusercontent.com/u/3437916?","gravatar_id":"","id":3437916,"login":"misol","url":"https://api.github.com/users/misol"},"created_at":"2015-01-01T01:47:44Z","id":"2489414108","org":{"avatar_url":"https://avatars.githubusercontent.com/u/1429259?","gravatar_id":"","id":1429259,"login":"xpressengine","url":"https://api.github.com/orgs/xpressengine"},"payload":{"action":"opened","number":1120,"pull_request":{"_links":{"comments":{"href":"https:
 [...]
 
+-- !sql --
+\N
+\N
+\N
+\N
+4748
+
+-- !sql --
+135
+
diff --git a/regression-test/suites/variant_github_events_p2/load.groovy 
b/regression-test/suites/variant_github_events_p2/load.groovy
index 20f9d9bbe21..808b9acd63e 100644
--- a/regression-test/suites/variant_github_events_p2/load.groovy
+++ b/regression-test/suites/variant_github_events_p2/load.groovy
@@ -169,7 +169,7 @@ suite("regression_test_variant_github_events_p2", 
"nonConcurrent,p2"){
 
     // build inverted index at middle of loading the data
     // ADD INDEX
-    sql """ ALTER TABLE github_events ADD INDEX idx_var (`v`) USING INVERTED 
PROPERTIES("parser" = "chinese", "parser_mode" = "fine_grained", 
"support_phrase" = "true") """
+    sql """ ALTER TABLE github_events ADD INDEX idx_var (`v`) USING INVERTED 
PROPERTIES("parser" = "english", "support_phrase" = "true") """
     wait_for_latest_op_on_table_finish("github_events", timeout)
 
     // 2022
@@ -212,9 +212,11 @@ suite("regression_test_variant_github_events_p2", 
"nonConcurrent,p2"){
         } while (running)
     }
 
-    
-    // TODO fix compaction issue, this case could be stable
+    sql """set enable_match_without_inverted_index = false""" 
+    // filter by bloom filter
     qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int)  
from github_events where cast(v["repo"]["name"] as string) = 
'xpressengine/xe-core' order by 1;"""
     qt_sql """select * from github_events where  cast(v["repo"]["name"] as 
string) = 'xpressengine/xe-core' order by 1 limit 10"""
-    // TODO add test case that some certain columns are materialized in some 
file while others are not materilized(sparse)
+    // query with inverted index
+    qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int)  
from github_events where v["repo"]["name"] match 'xpressengine' order by 1;"""
+    qt_sql """select count()  from github_events where v["repo"]["name"] match 
'apache' order by 1;"""
 }
\ No newline at end of file
diff --git a/regression-test/suites/variant_p0/with_index/load.groovy 
b/regression-test/suites/variant_p0/with_index/load.groovy
index 2ff781a2008..93737e8a5b9 100644
--- a/regression-test/suites/variant_p0/with_index/load.groovy
+++ b/regression-test/suites/variant_p0/with_index/load.groovy
@@ -61,6 +61,7 @@ suite("regression_test_variant_with_index", "nonConcurrent"){
         properties("replication_num" = "1", "disable_auto_compaction" = 
"true");
     """
     sql """insert into var_with_index values(1, '{"a" : 0, "b": 3}', 'hello 
world'), (2, '{"a" : 123}', 'world'),(3, '{"a" : 123}', 'hello world')"""
+    sql """set enable_match_without_inverted_index = false""" 
     qt_sql_inv_1 """select v["a"] from var_with_index where inv match 'hello' 
order by k"""
     qt_sql_inv_2 """select v["a"] from var_with_index where inv match 'hello' 
and cast(v['a'] as int) > 0 order by k"""
     qt_sql_inv_3 """select * from var_with_index where inv match 'hello' and 
cast(v["a"] as int) > 0 order by k"""
diff --git a/regression-test/suites/variant_p0/with_index/var_index.groovy 
b/regression-test/suites/variant_p0/with_index/var_index.groovy
index 8c7afaa4a26..67f7236c260 100644
--- a/regression-test/suites/variant_p0/with_index/var_index.groovy
+++ b/regression-test/suites/variant_p0/with_index/var_index.groovy
@@ -33,7 +33,9 @@ suite("regression_test_variant_var_index", "p0"){
     sql """insert into var_index values(2, '{"a" : 18811, "b" : "hello world", 
"c" : 1181111}')"""
     sql """insert into var_index values(3, '{"a" : 18811, "b" : "hello 
wworld", "c" : 11111}')"""
     sql """insert into var_index values(4, '{"a" : 1234, "b" : "hello xxx 
world", "c" : 8181111}')"""
+    sql """set enable_match_without_inverted_index = false""" 
     qt_sql """select * from var_index where cast(v["a"] as smallint) > 123 and 
cast(v["b"] as string) match 'hello' and cast(v["c"] as int) > 1024 order by 
k"""
+    sql """set enable_match_without_inverted_index = true""" 
     sql """insert into var_index values(5, '{"a" : 123456789, "b" : 123456, 
"c" : 8181111}')"""
     qt_sql """select * from var_index where cast(v["a"] as int) > 123 and 
cast(v["b"] as string) match 'hello' and cast(v["c"] as int) > 11111 order by 
k"""
     // insert double/float/array/json


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to