This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 8edb0189692 [Fix](inverted index) fix comparison for string after refactor compound #40338 (#40585) 8edb0189692 is described below commit 8edb01896920ab218306af6766fff8fa67275ab0 Author: airborne12 <airborn...@gmail.com> AuthorDate: Tue Sep 10 14:27:28 2024 +0800 [Fix](inverted index) fix comparison for string after refactor compound #40338 (#40585) cherry pick from #40338 --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 25 +++++----------------- be/src/olap/rowset/segment_v2/segment_iterator.h | 3 +-- be/src/vec/functions/functions_comparison.h | 23 ++++++++++++-------- 3 files changed, 20 insertions(+), 31 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 288a5df19ff..fdb0b929306 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -751,18 +751,7 @@ Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr return Status::OK(); } -bool SegmentIterator::_check_apply_by_inverted_index(ColumnId col_id) { - if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) { - return false; - } - if (_inverted_index_iterators[col_id] == nullptr) { - //this column without inverted index - return false; - } - return true; -} - -bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound) { +bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred) { if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) { return false; } @@ -798,15 +787,11 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool bool handle_by_fulltext = _column_has_fulltext_index(pred_column_id); if (handle_by_fulltext) { - // when predicate in compound condition which except leafNode of andNode, - // only can apply match query for fulltext index, // when predicate is leafNode of andNode, - // can apply 'match qeury' and 'equal query' and 'list query' for fulltext index. - return (pred_in_compound ? pred->type() == PredicateType::MATCH - : (pred->type() == PredicateType::MATCH || - pred->type() == PredicateType::IS_NULL || - pred->type() == PredicateType::IS_NOT_NULL || - PredicateTypeTraits::is_equal_or_list(pred->type()))); + // can apply 'match query' and 'equal query' and 'list query' for fulltext index. + return pred->type() == PredicateType::MATCH || pred->type() == PredicateType::IS_NULL || + pred->type() == PredicateType::IS_NOT_NULL || + PredicateTypeTraits::is_equal_or_list(pred->type()); } return true; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 6ffd1666f60..f5c133485aa 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -298,8 +298,7 @@ private: void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* predicate); - bool _check_apply_by_inverted_index(ColumnId col_id); - bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound = false); + bool _check_apply_by_inverted_index(ColumnPredicate* pred); void _output_index_result_column_for_expr(uint16_t* sel_rowid_idx, uint16_t select_size, vectorized::Block* block); diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h index f6aa46fe1c4..bb1666ab864 100644 --- a/be/src/vec/functions/functions_comparison.h +++ b/be/src/vec/functions/functions_comparison.h @@ -546,14 +546,6 @@ public: //NOT support comparison predicate when parser is FULLTEXT for expr inverted index evaluate. return Status::OK(); } - std::string column_name = data_type_with_name.first; - Field param_value; - arguments[0].column->get(0, param_value); - auto param_type = arguments[0].type->get_type_as_type_descriptor().type; - - std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr; - RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( - param_type, ¶m_value, query_param)); segment_v2::InvertedIndexQueryType query_type; std::string_view name_view(name); if (name_view == NameEquals::name || name_view == NameNotEquals::name) { @@ -570,6 +562,19 @@ public: return Status::InvalidArgument("invalid comparison op type {}", Name::name); } + if (segment_v2::is_range_query(query_type) && + iter->get_inverted_index_reader_type() == + segment_v2::InvertedIndexReaderType::STRING_TYPE) { + // untokenized strings exceed ignore_above, they are written as null, causing range query errors + return Status::OK(); + } + std::string column_name = data_type_with_name.first; + Field param_value; + arguments[0].column->get(0, param_value); + auto param_type = arguments[0].type->get_type_as_type_descriptor().type; + std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr; + RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( + param_type, ¶m_value, query_param)); std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>(); RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( param_type, ¶m_value, query_param)); @@ -585,7 +590,7 @@ public: bitmap_result = result; bitmap_result.mask_out_null(); - if (name == "ne") { + if (name_view == NameNotEquals::name) { roaring::Roaring full_result; full_result.addRange(0, num_rows); bitmap_result.op_not(&full_result); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org