This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 5b7d93df5e1 [Pick](Variant) pick 2 PRs to correct tmp column name to go fast execute #36277 #36313 (#36527) 5b7d93df5e1 is described below commit 5b7d93df5e1dade1c0b1a3ce6a08fbc4ea06bd97 Author: lihangyu <15605149...@163.com> AuthorDate: Wed Jun 19 19:07:47 2024 +0800 [Pick](Variant) pick 2 PRs to correct tmp column name to go fast execute #36277 #36313 (#36527) --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 34 +++++++++++++++++--- be/src/olap/rowset/segment_v2/segment_iterator.h | 1 + regression-test/data/variant_p0/load.out | 15 ++++++++- regression-test/suites/variant_p0/load.groovy | 36 ++++++++++++++++++++++ 4 files changed, 81 insertions(+), 5 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index c95f0610562..c31ac3c659a 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -754,7 +754,9 @@ Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode( auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { + auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr); _column_predicate_info->column_name = expr->expr_name(); + _column_predicate_info->column_id = slot_expr->column_id(); } else if (_is_literal_node(node_type)) { auto v_literal_expr = std::dynamic_pointer_cast<doris::vectorized::VLiteral>(expr); _column_predicate_info->query_values.insert(v_literal_expr->value()); @@ -774,6 +776,7 @@ Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode( // get child condition result in compound conditions auto pred_result_sign = _gen_predicate_result_sign(_column_predicate_info.get()); _column_predicate_info.reset(new ColumnPredicateInfo()); + VLOG_DEBUG << "_gen_predicate_result_sign " << pred_result_sign; if (_rowid_result_for_index.count(pred_result_sign) > 0 && _rowid_result_for_index[pred_result_sign].first) { auto apply_result = _rowid_result_for_index[pred_result_sign].second; @@ -974,18 +977,38 @@ std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicate* predica auto column_desc = _schema->column(predicate->column_id()); auto pred_type = predicate->type(); auto predicate_params = predicate->predicate_params(); - pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_desc->name() + "_" + + + std::string col_name = column_desc->name(); + + if (column_desc->path() != nullptr) { + const static std::string pattern = "(CAST {}(Nullable(Variant)) TO {})"; + // indicate a subcolumn access for variant, using the expression pattern as pred result sign name + col_name = fmt::format(pattern, col_name, + _storage_name_and_type[predicate->column_id()].second->get_name()); + } + + pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + col_name + "_" + predicate->pred_type_string(pred_type) + "_" + join(predicate_params->values, ","); - + VLOG_DEBUG << "_gen_predicate_result_sign: " << pred_result_sign; return pred_result_sign; } std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicateInfo* predicate_info) { + auto column_desc = _schema->column(_schema->column_id(predicate_info->column_id)); + std::string col_name = predicate_info->column_name; + if (column_desc->path() != nullptr) { + const static std::string pattern = "(CAST {}(Nullable(Variant)) TO {})"; + // indicate a subcolumn access for variant, using the expression pattern as pred result sign name + col_name = fmt::format(pattern, col_name, + _storage_name_and_type[_schema->column_id(predicate_info->column_id)] + .second->get_name()); + } std::string pred_result_sign; - pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + predicate_info->column_name + "_" + + pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + col_name + "_" + predicate_info->query_op + "_" + boost::join(predicate_info->query_values, ","); + VLOG_DEBUG << "_gen_predicate_result_sign: " << pred_result_sign; return pred_result_sign; } @@ -2631,7 +2654,7 @@ bool SegmentIterator::_check_column_pred_all_push_down(const std::string& column auto preds_in_remaining_vconjuct = _column_pred_in_remaining_vconjunct[column_name]; for (auto pred_info : preds_in_remaining_vconjuct) { auto column_sign = _gen_predicate_result_sign(&pred_info); - if (_rowid_result_for_index.count(column_sign) < 1) { + if (!_rowid_result_for_index.contains(column_sign)) { return false; } } @@ -2656,13 +2679,16 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root( auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { + auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr); if (_column_predicate_info->column_name.empty()) { _column_predicate_info->column_name = expr->expr_name(); + _column_predicate_info->column_id = slot_expr->column_id(); } else { // If column name already exists, create a new ColumnPredicateInfo // if expr is columnA > columnB, then column name will exist, in this situation, we need to add it to _column_pred_in_remaining_vconjunct auto new_column_pred_info = std::make_shared<ColumnPredicateInfo>(); new_column_pred_info->column_name = expr->expr_name(); + new_column_pred_info->column_id = slot_expr->column_id(); _column_pred_in_remaining_vconjunct[new_column_pred_info->column_name].push_back( *new_column_pred_info); } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index a0ef11ece48..6383a9435e8 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -104,6 +104,7 @@ struct ColumnPredicateInfo { // use set to ensure the consistent order of predicate_result_sign generated by inlist. std::set<std::string> query_values; std::string query_op; + int32_t column_id; }; class SegmentIterator : public RowwiseIterator { diff --git a/regression-test/data/variant_p0/load.out b/regression-test/data/variant_p0/load.out index d8f145d5fc4..3cbbd432b0e 100644 --- a/regression-test/data/variant_p0/load.out +++ b/regression-test/data/variant_p0/load.out @@ -212,7 +212,7 @@ [123] -- !sql_25 -- -50000 54999.9999999998 6150000 +50000 54999.99999999684 6150000 -- !sql_26 -- 5000 @@ -337,3 +337,16 @@ kaana -- !sql_39 -- [1] +-- !sql_records1 -- +{"id":"85321037218054145","id0":"8301","id12":"32030","id16":"39960","id20":"17202","id24":"24592","id28":"42035","id32":"29819","id36":"4680","id4":"4848","id40":"47892","id44":"29400","id48":"7799","id52":"49678","id56":"40585","id60":"23572","id64":"28579","id68":"11477","id72":"35416","id76":"9577","id8":"25758","id80":"45204","id84":"16132","id88":"1007","id92":"32630","id96":"15443","num10":310671794,"num14":317675907,"num18":173663246,"num2":68835462,"num22":919923967,"num26":9891 [...] + +-- !sql_records2 -- + +-- !sql_records3 -- +{"id":"85321037218054145","id0":"8301","id12":"32030","id16":"39960","id20":"17202","id24":"24592","id28":"42035","id32":"29819","id36":"4680","id4":"4848","id40":"47892","id44":"29400","id48":"7799","id52":"49678","id56":"40585","id60":"23572","id64":"28579","id68":"11477","id72":"35416","id76":"9577","id8":"25758","id80":"45204","id84":"16132","id88":"1007","id92":"32630","id96":"15443","num10":310671794,"num14":317675907,"num18":173663246,"num2":68835462,"num22":919923967,"num26":9891 [...] + +-- !sql_records4 -- + +-- !sql_records5 -- +{"id":"85321037218054145","id0":"8301","id12":"32030","id16":"39960","id20":"17202","id24":"24592","id28":"42035","id32":"29819","id36":"4680","id4":"4848","id40":"47892","id44":"29400","id48":"7799","id52":"49678","id56":"40585","id60":"23572","id64":"28579","id68":"11477","id72":"35416","id76":"9577","id8":"25758","id80":"45204","id84":"16132","id88":"1007","id92":"32630","id96":"15443","num10":310671794,"num14":317675907,"num18":173663246,"num2":68835462,"num22":919923967,"num26":9891 [...] + diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy index 77a975687ee..899f7218b8e 100644 --- a/regression-test/suites/variant_p0/load.groovy +++ b/regression-test/suites/variant_p0/load.groovy @@ -404,6 +404,42 @@ suite("regression_test_variant", "nonConcurrent"){ qt_sql_39 "select k, json_type(cast(v as json), '\$') from variant_cast order by k" qt_sql_39 "select cast(v as array<text>) from variant_cast where k = 1 order by k" qt_sql_39 "select cast(v as string) from variant_cast where k = 2 order by k" + + sql "DROP TABLE IF EXISTS records" + sql """ + CREATE TABLE `records` ( + `id` VARCHAR(20) NOT NULL, + `entity_id` VARCHAR(20) NOT NULL, + `value` VARIANT NOT NULL, + INDEX idx_value (`value`) USING INVERTED PROPERTIES("parser" = "unicode", "lower_case" = "true") COMMENT 'inverted index for value' + ) ENGINE=OLAP + UNIQUE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "storage_medium" = "hdd", + "storage_format" = "V2", + "inverted_index_storage_format" = "V1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "group_commit_interval_ms" = "10000", + "group_commit_data_bytes" = "134217728" + ); + """ + sql """ + insert into records values ('85321037218054145', 'A100', '{"id":"85321037218054145","id0":"8301","id12":"32030","id16":"39960","id20":"17202","id24":"24592","id28":"42035","id32":"29819","id36":"4680","id4":"4848","id40":"47892","id44":"29400","id48":"7799","id52":"49678","id56":"40585","id60":"23572","id64":"28579","id68":"11477","id72":"35416","id76":"9577","id8":"25758","id80":"45204","id84":"16132","id88":"1007","id92":"32630","id96":"15443","num10":310671794,"num14":3176 [...] + """ + qt_sql_records1 """SELECT value FROM records WHERE value['text3'] MATCH_ALL '刘伊心 是 来 搞笑 的' OR ( value['text83'] MATCH_ALL '攻略 吧 转帖楼主 救' ) OR ( value['text15'] MATCH_ALL '个 天秤 女 攻略 吧 转帖楼主 ' ) LIMIT 0, 100""" + qt_sql_records2 """SELECT value FROM records WHERE entity_id = 'A100' and value['id16'] = '39960' AND ( value['text59'] = '非 明显 是 一 付 很 嫌') AND ( value['text99'] = '来 广州 但是嗯嗯 还 不能 在') LIMIT 0, 100;""" + qt_sql_records3 """SELECT value FROM records WHERE value['text99'] MATCH_ALL '来 广州 但是嗯嗯 还 不能 在' OR ( value['text47'] MATCH_ALL '你 觉得 超 好看 的 动' ) OR ( value['text43'] MATCH_ALL ' 楼主 拒绝 了 一个 女生 我 傻逼 吗手' ) LIMIT 0, 100""" + qt_sql_records4 """SELECT value FROM records WHERE value['id16'] = '39960' AND ( value['text59'] = '非 明显 是 一 付 很 嫌') AND ( value['text99'] = '来 广州 但是嗯嗯 还 不能 在 ') """ + qt_sql_records5 """SELECT value FROM records WHERE value['text3'] MATCH_ALL '伊心 是 来 搞笑 的' LIMIT 0, 100""" } finally { // reset flags set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org