This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 00fca3eb24b [fix](inverted index) disable range query in 
StringTypeInvertedIndexReader (#38218)
00fca3eb24b is described below

commit 00fca3eb24b9646c585b404d79586c62d67d50e2
Author: Sun Chenyang <csun5...@gmail.com>
AuthorDate: Thu Jul 25 17:24:47 2024 +0800

    [fix](inverted index) disable range query in StringTypeInvertedIndexReader 
(#38218)
    
    ## Proposed changes
    disable range query in StringTypeInvertedIndexReader
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 13 +++++-
 .../test_ignore_above_in_index.out                 |  3 ++
 .../test_ignore_above_in_index.groovy              | 47 ++++++++++++++++++++++
 3 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 51245b57022..ce8b3d30c72 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -920,7 +920,8 @@ bool 
SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
     if (_opts.runtime_state && 
!_opts.runtime_state->query_options().enable_inverted_index_query) {
         return false;
     }
-    if (_inverted_index_iterators[pred->column_id()] == nullptr) {
+    auto pred_column_id = pred->column_id();
+    if (_inverted_index_iterators[pred_column_id] == nullptr) {
         //this column without inverted index
         return false;
     }
@@ -935,13 +936,21 @@ bool 
SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
         return false;
     }
 
+    // UNTOKENIZED strings exceed ignore_above, they are written as null, 
causing range query errors
+    if (PredicateTypeTraits::is_range(pred->type()) &&
+        _inverted_index_iterators[pred_column_id] != nullptr &&
+        
_inverted_index_iterators[pred_column_id]->get_inverted_index_reader_type() ==
+                InvertedIndexReaderType::STRING_TYPE) {
+        return false;
+    }
+
     // Function filter no apply inverted index
     if (dynamic_cast<LikeColumnPredicate<TYPE_CHAR>*>(pred) != nullptr ||
         dynamic_cast<LikeColumnPredicate<TYPE_STRING>*>(pred) != nullptr) {
         return false;
     }
 
-    bool handle_by_fulltext = _column_has_fulltext_index(pred->column_id());
+    bool handle_by_fulltext = _column_has_fulltext_index(pred_column_id);
     if (handle_by_fulltext) {
         // when predicate in compound condition which except leafNode of 
andNode,
         // only can apply match query for fulltext index,
diff --git 
a/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out 
b/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out
index f88a155567e..718bd29e5d9 100644
--- a/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out
+++ b/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out
@@ -2,3 +2,6 @@
 -- !sql --
 3
 
+-- !sql --
+772
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy 
b/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy
index de508d9d263..d6b42661441 100644
--- a/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy
+++ b/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy
@@ -39,4 +39,51 @@ suite("test_ignore_above_in_index", "p0") {
     sql "insert into ${tableName} values (20, '1234567890');"
     sql "insert into ${tableName} values (20, '1234567890');"
     qt_sql "select count() from ${tableName} where c = '1234567890';"
+
+    def tableName2 = "test_ignore_above_in_index2"
+    sql "DROP TABLE IF EXISTS ${tableName2}"
+    sql """
+        CREATE TABLE ${tableName2} (
+          `@timestamp` int(11) NULL COMMENT "",
+          `clientip` string NULL COMMENT "",
+          `request` string NULL COMMENT "",
+          `status` int NULL COMMENT "",
+          `size` int NULL COMMENT "",
+          INDEX clientip_idx (`clientip`) USING INVERTED 
PROPERTIES("ignore_above"="5") COMMENT '',
+          INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"unicode", "support_phrase" = "true") COMMENT '',
+          INDEX status_idx (`status`) USING INVERTED COMMENT '',
+          INDEX size_idx (`size`) USING INVERTED COMMENT ''
+          ) ENGINE=OLAP
+          DUPLICATE KEY(`@timestamp`)
+          COMMENT "OLAP"
+          DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
+          PROPERTIES (
+          "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    // load the json data
+    streamLoad {
+        table "${tableName2}"
+        
+        set 'read_json_by_line', 'true'
+        set 'format', 'json'
+        file 'documents-1000.json' // import json file
+        time 10000 // limit inflight 10s
+
+        // if declared a check callback, the default check condition will 
ignore.
+        // So you must check all condition
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(json.NumberTotalRows, json.NumberLoadedRows + 
json.NumberUnselectedRows)
+            assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+        }
+    }
+
+    qt_sql "select count() from ${tableName2} where clientip > '17.0';"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to