This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 5b7d93df5e1 [Pick](Variant) pick 2 PRs to correct tmp column name to 
go fast execute #36277 #36313  (#36527)
5b7d93df5e1 is described below

commit 5b7d93df5e1dade1c0b1a3ce6a08fbc4ea06bd97
Author: lihangyu <15605149...@163.com>
AuthorDate: Wed Jun 19 19:07:47 2024 +0800

    [Pick](Variant) pick 2 PRs to correct tmp column name to go fast execute 
#36277 #36313  (#36527)
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 34 +++++++++++++++++---
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  1 +
 regression-test/data/variant_p0/load.out           | 15 ++++++++-
 regression-test/suites/variant_p0/load.groovy      | 36 ++++++++++++++++++++++
 4 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index c95f0610562..c31ac3c659a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -754,7 +754,9 @@ Status 
SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
 
     auto node_type = expr->node_type();
     if (node_type == TExprNodeType::SLOT_REF) {
+        auto slot_expr = 
std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
         _column_predicate_info->column_name = expr->expr_name();
+        _column_predicate_info->column_id = slot_expr->column_id();
     } else if (_is_literal_node(node_type)) {
         auto v_literal_expr = 
std::dynamic_pointer_cast<doris::vectorized::VLiteral>(expr);
         _column_predicate_info->query_values.insert(v_literal_expr->value());
@@ -774,6 +776,7 @@ Status 
SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
         // get child condition result in compound conditions
         auto pred_result_sign = 
_gen_predicate_result_sign(_column_predicate_info.get());
         _column_predicate_info.reset(new ColumnPredicateInfo());
+        VLOG_DEBUG << "_gen_predicate_result_sign " << pred_result_sign;
         if (_rowid_result_for_index.count(pred_result_sign) > 0 &&
             _rowid_result_for_index[pred_result_sign].first) {
             auto apply_result = 
_rowid_result_for_index[pred_result_sign].second;
@@ -974,18 +977,38 @@ std::string 
SegmentIterator::_gen_predicate_result_sign(ColumnPredicate* predica
     auto column_desc = _schema->column(predicate->column_id());
     auto pred_type = predicate->type();
     auto predicate_params = predicate->predicate_params();
-    pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + 
column_desc->name() + "_" +
+
+    std::string col_name = column_desc->name();
+
+    if (column_desc->path() != nullptr) {
+        const static std::string pattern = "(CAST {}(Nullable(Variant)) TO 
{})";
+        // indicate a subcolumn access for variant, using the expression 
pattern as pred result sign name
+        col_name = fmt::format(pattern, col_name,
+                               
_storage_name_and_type[predicate->column_id()].second->get_name());
+    }
+
+    pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + col_name + "_" +
                        predicate->pred_type_string(pred_type) + "_" +
                        join(predicate_params->values, ",");
-
+    VLOG_DEBUG << "_gen_predicate_result_sign: " << pred_result_sign;
     return pred_result_sign;
 }
 
 std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicateInfo* 
predicate_info) {
+    auto column_desc = 
_schema->column(_schema->column_id(predicate_info->column_id));
+    std::string col_name = predicate_info->column_name;
+    if (column_desc->path() != nullptr) {
+        const static std::string pattern = "(CAST {}(Nullable(Variant)) TO 
{})";
+        // indicate a subcolumn access for variant, using the expression 
pattern as pred result sign name
+        col_name = fmt::format(pattern, col_name,
+                               
_storage_name_and_type[_schema->column_id(predicate_info->column_id)]
+                                       .second->get_name());
+    }
     std::string pred_result_sign;
-    pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + 
predicate_info->column_name + "_" +
+    pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + col_name + "_" +
                        predicate_info->query_op + "_" +
                        boost::join(predicate_info->query_values, ",");
+    VLOG_DEBUG << "_gen_predicate_result_sign: " << pred_result_sign;
     return pred_result_sign;
 }
 
@@ -2631,7 +2654,7 @@ bool 
SegmentIterator::_check_column_pred_all_push_down(const std::string& column
         auto preds_in_remaining_vconjuct = 
_column_pred_in_remaining_vconjunct[column_name];
         for (auto pred_info : preds_in_remaining_vconjuct) {
             auto column_sign = _gen_predicate_result_sign(&pred_info);
-            if (_rowid_result_for_index.count(column_sign) < 1) {
+            if (!_rowid_result_for_index.contains(column_sign)) {
                 return false;
             }
         }
@@ -2656,13 +2679,16 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
 
     auto node_type = expr->node_type();
     if (node_type == TExprNodeType::SLOT_REF) {
+        auto slot_expr = 
std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
         if (_column_predicate_info->column_name.empty()) {
             _column_predicate_info->column_name = expr->expr_name();
+            _column_predicate_info->column_id = slot_expr->column_id();
         } else {
             // If column name already exists, create a new ColumnPredicateInfo
             // if expr is columnA > columnB, then column name will exist, in 
this situation, we need to add it to _column_pred_in_remaining_vconjunct
             auto new_column_pred_info = 
std::make_shared<ColumnPredicateInfo>();
             new_column_pred_info->column_name = expr->expr_name();
+            new_column_pred_info->column_id = slot_expr->column_id();
             
_column_pred_in_remaining_vconjunct[new_column_pred_info->column_name].push_back(
                     *new_column_pred_info);
         }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index a0ef11ece48..6383a9435e8 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -104,6 +104,7 @@ struct ColumnPredicateInfo {
     // use set to ensure the consistent order of predicate_result_sign 
generated by inlist.
     std::set<std::string> query_values;
     std::string query_op;
+    int32_t column_id;
 };
 
 class SegmentIterator : public RowwiseIterator {
diff --git a/regression-test/data/variant_p0/load.out 
b/regression-test/data/variant_p0/load.out
index d8f145d5fc4..3cbbd432b0e 100644
--- a/regression-test/data/variant_p0/load.out
+++ b/regression-test/data/variant_p0/load.out
@@ -212,7 +212,7 @@
 [123]
 
 -- !sql_25 --
-50000  54999.9999999998        6150000
+50000  54999.99999999684       6150000
 
 -- !sql_26 --
 5000
@@ -337,3 +337,16 @@ kaana
 -- !sql_39 --
 [1]
 
+-- !sql_records1 --
+{"id":"85321037218054145","id0":"8301","id12":"32030","id16":"39960","id20":"17202","id24":"24592","id28":"42035","id32":"29819","id36":"4680","id4":"4848","id40":"47892","id44":"29400","id48":"7799","id52":"49678","id56":"40585","id60":"23572","id64":"28579","id68":"11477","id72":"35416","id76":"9577","id8":"25758","id80":"45204","id84":"16132","id88":"1007","id92":"32630","id96":"15443","num10":310671794,"num14":317675907,"num18":173663246,"num2":68835462,"num22":919923967,"num26":9891
 [...]
+
+-- !sql_records2 --
+
+-- !sql_records3 --
+{"id":"85321037218054145","id0":"8301","id12":"32030","id16":"39960","id20":"17202","id24":"24592","id28":"42035","id32":"29819","id36":"4680","id4":"4848","id40":"47892","id44":"29400","id48":"7799","id52":"49678","id56":"40585","id60":"23572","id64":"28579","id68":"11477","id72":"35416","id76":"9577","id8":"25758","id80":"45204","id84":"16132","id88":"1007","id92":"32630","id96":"15443","num10":310671794,"num14":317675907,"num18":173663246,"num2":68835462,"num22":919923967,"num26":9891
 [...]
+
+-- !sql_records4 --
+
+-- !sql_records5 --
+{"id":"85321037218054145","id0":"8301","id12":"32030","id16":"39960","id20":"17202","id24":"24592","id28":"42035","id32":"29819","id36":"4680","id4":"4848","id40":"47892","id44":"29400","id48":"7799","id52":"49678","id56":"40585","id60":"23572","id64":"28579","id68":"11477","id72":"35416","id76":"9577","id8":"25758","id80":"45204","id84":"16132","id88":"1007","id92":"32630","id96":"15443","num10":310671794,"num14":317675907,"num18":173663246,"num2":68835462,"num22":919923967,"num26":9891
 [...]
+
diff --git a/regression-test/suites/variant_p0/load.groovy 
b/regression-test/suites/variant_p0/load.groovy
index 77a975687ee..899f7218b8e 100644
--- a/regression-test/suites/variant_p0/load.groovy
+++ b/regression-test/suites/variant_p0/load.groovy
@@ -404,6 +404,42 @@ suite("regression_test_variant", "nonConcurrent"){
         qt_sql_39 "select k, json_type(cast(v as json), '\$')  from 
variant_cast order by k" 
         qt_sql_39 "select cast(v as array<text>)  from variant_cast where k = 
1 order by k" 
         qt_sql_39 "select cast(v as string)  from variant_cast where k = 2 
order by k" 
+
+        sql "DROP TABLE IF EXISTS records"
+        sql """
+            CREATE TABLE `records` (
+                  `id` VARCHAR(20) NOT NULL,
+                  `entity_id` VARCHAR(20) NOT NULL,
+                  `value` VARIANT NOT NULL,
+                  INDEX idx_value (`value`) USING INVERTED PROPERTIES("parser" 
= "unicode", "lower_case" = "true") COMMENT 'inverted index for value'
+                ) ENGINE=OLAP
+                UNIQUE KEY(`id`)
+                COMMENT 'OLAP'
+                DISTRIBUTED BY HASH(`id`) BUCKETS 10
+                PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1",
+                "min_load_replica_num" = "-1",
+                "is_being_synced" = "false",
+                "storage_medium" = "hdd",
+                "storage_format" = "V2",
+                "inverted_index_storage_format" = "V1",
+                "enable_unique_key_merge_on_write" = "true",
+                "light_schema_change" = "true",
+                "store_row_column" = "true",
+                "disable_auto_compaction" = "false",
+                "enable_single_replica_compaction" = "false",
+                "group_commit_interval_ms" = "10000",
+                "group_commit_data_bytes" = "134217728"
+                );
+        """
+        sql """
+            insert into records values ('85321037218054145', 'A100', 
'{"id":"85321037218054145","id0":"8301","id12":"32030","id16":"39960","id20":"17202","id24":"24592","id28":"42035","id32":"29819","id36":"4680","id4":"4848","id40":"47892","id44":"29400","id48":"7799","id52":"49678","id56":"40585","id60":"23572","id64":"28579","id68":"11477","id72":"35416","id76":"9577","id8":"25758","id80":"45204","id84":"16132","id88":"1007","id92":"32630","id96":"15443","num10":310671794,"num14":3176
 [...]
+        """
+        qt_sql_records1 """SELECT value FROM records WHERE  value['text3']  
MATCH_ALL '刘伊心 是 来 搞笑 的'  OR ( value['text83']  MATCH_ALL '攻略 吧 转帖楼主 救' ) OR (  
value['text15']  MATCH_ALL '个 天秤 女 攻略 吧 转帖楼主 ' )  LIMIT 0, 100"""
+        qt_sql_records2 """SELECT value FROM records WHERE entity_id = 'A100'  
and  value['id16'] = '39960' AND (  value['text59'] = '非 明显 是 一 付 很 嫌') AND (  
value['text99'] = '来 广州 但是嗯嗯 还 不能 在')  LIMIT 0, 100;"""
+        qt_sql_records3 """SELECT value FROM records WHERE   value['text99'] 
MATCH_ALL '来 广州 但是嗯嗯 还 不能 在'  OR (  value['text47'] MATCH_ALL '你 觉得 超 好看 的 动' ) 
OR (  value['text43'] MATCH_ALL ' 楼主 拒绝 了 一个 女生 我 傻逼 吗手' )  LIMIT 0, 100"""
+        qt_sql_records4 """SELECT value FROM records WHERE  value['id16'] = 
'39960' AND (  value['text59'] = '非 明显 是 一 付 很 嫌') AND (  value['text99'] = '来 
广州 但是嗯嗯 还 不能 在 ')  """
+        qt_sql_records5 """SELECT value FROM records WHERE  value['text3'] 
MATCH_ALL '伊心 是 来 搞笑 的'  LIMIT 0, 100"""
     } finally {
         // reset flags
         set_be_config.call("variant_ratio_of_defaults_as_sparse_column", 
"0.95")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to