This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 00fca3eb24b [fix](inverted index) disable range query in StringTypeInvertedIndexReader (#38218) 00fca3eb24b is described below commit 00fca3eb24b9646c585b404d79586c62d67d50e2 Author: Sun Chenyang <csun5...@gmail.com> AuthorDate: Thu Jul 25 17:24:47 2024 +0800 [fix](inverted index) disable range query in StringTypeInvertedIndexReader (#38218) ## Proposed changes disable range query in StringTypeInvertedIndexReader --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 13 +++++- .../test_ignore_above_in_index.out | 3 ++ .../test_ignore_above_in_index.groovy | 47 ++++++++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 51245b57022..ce8b3d30c72 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -920,7 +920,8 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) { return false; } - if (_inverted_index_iterators[pred->column_id()] == nullptr) { + auto pred_column_id = pred->column_id(); + if (_inverted_index_iterators[pred_column_id] == nullptr) { //this column without inverted index return false; } @@ -935,13 +936,21 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool return false; } + // UNTOKENIZED strings exceed ignore_above, they are written as null, causing range query errors + if (PredicateTypeTraits::is_range(pred->type()) && + _inverted_index_iterators[pred_column_id] != nullptr && + _inverted_index_iterators[pred_column_id]->get_inverted_index_reader_type() == + InvertedIndexReaderType::STRING_TYPE) { + return false; + } + // Function filter no apply inverted index if (dynamic_cast<LikeColumnPredicate<TYPE_CHAR>*>(pred) != nullptr || dynamic_cast<LikeColumnPredicate<TYPE_STRING>*>(pred) != nullptr) { return false; } - bool handle_by_fulltext = _column_has_fulltext_index(pred->column_id()); + bool handle_by_fulltext = _column_has_fulltext_index(pred_column_id); if (handle_by_fulltext) { // when predicate in compound condition which except leafNode of andNode, // only can apply match query for fulltext index, diff --git a/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out b/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out index f88a155567e..718bd29e5d9 100644 --- a/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out +++ b/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out @@ -2,3 +2,6 @@ -- !sql -- 3 +-- !sql -- +772 + diff --git a/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy b/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy index de508d9d263..d6b42661441 100644 --- a/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy +++ b/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy @@ -39,4 +39,51 @@ suite("test_ignore_above_in_index", "p0") { sql "insert into ${tableName} values (20, '1234567890');" sql "insert into ${tableName} values (20, '1234567890');" qt_sql "select count() from ${tableName} where c = '1234567890';" + + def tableName2 = "test_ignore_above_in_index2" + sql "DROP TABLE IF EXISTS ${tableName2}" + sql """ + CREATE TABLE ${tableName2} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` string NULL COMMENT "", + `request` string NULL COMMENT "", + `status` int NULL COMMENT "", + `size` int NULL COMMENT "", + INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("ignore_above"="5") COMMENT '', + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '', + INDEX status_idx (`status`) USING INVERTED COMMENT '', + INDEX size_idx (`size`) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + // load the json data + streamLoad { + table "${tableName2}" + + set 'read_json_by_line', 'true' + set 'format', 'json' + file 'documents-1000.json' // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + + qt_sql "select count() from ${tableName2} where clientip > '17.0';" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org