BiteTheDDDDt commented on code in PR #52701:
URL: https://github.com/apache/doris/pull/52701#discussion_r2218121033


##########
be/src/runtime/descriptors.cpp:
##########
@@ -61,7 +64,31 @@ SlotDescriptor::SlotDescriptor(const TSlotDescriptor& tdesc)
           _is_key(tdesc.is_key),
           _column_paths(tdesc.column_paths),
           _is_auto_increment(tdesc.__isset.is_auto_increment ? 
tdesc.is_auto_increment : false),
-          _col_default_value(tdesc.__isset.col_default_value ? 
tdesc.col_default_value : "") {}
+          _col_default_value(tdesc.__isset.col_default_value ? 
tdesc.col_default_value : "") {
+    if (tdesc.__isset.virtual_column_expr) {
+        // Make sure virtual column is valid.
+        if (tdesc.virtual_column_expr.nodes.empty()) {
+            LOG_ERROR("Virtual column expr node is empty, col_name={}, 
col_unique_id={}",
+                      tdesc.colName, tdesc.col_unique_id);
+
+            throw doris::Exception(doris::ErrorCode::FATAL_ERROR,
+                                   "Virtual column expr node is empty, 
col_name: {}, "
+                                   "col_unique_id: {}",
+                                   tdesc.colName, tdesc.col_unique_id);
+        }
+        const auto& node = tdesc.virtual_column_expr.nodes[0];
+        if (node.node_type == TExprNodeType::SLOT_REF) {
+            LOG_ERROR(
+                    "Virtual column expr node is slot ref, col_name={}, 
col_unique_id={}, expr: {}",

Review Comment:
   ditto



##########
be/src/pipeline/exec/scan_operator.h:
##########
@@ -180,7 +182,12 @@ class ScanLocalState : public ScanLocalStateBase {
                 continue;
             }
             if (_push_down_topn(pred) == push_down) {
+                LOG_INFO("push down topn filter, source node id: {}, target 
node id: {}", id,

Review Comment:
   use vlog or something else



##########
be/src/runtime/descriptors.cpp:
##########
@@ -61,7 +64,31 @@ SlotDescriptor::SlotDescriptor(const TSlotDescriptor& tdesc)
           _is_key(tdesc.is_key),
           _column_paths(tdesc.column_paths),
           _is_auto_increment(tdesc.__isset.is_auto_increment ? 
tdesc.is_auto_increment : false),
-          _col_default_value(tdesc.__isset.col_default_value ? 
tdesc.col_default_value : "") {}
+          _col_default_value(tdesc.__isset.col_default_value ? 
tdesc.col_default_value : "") {
+    if (tdesc.__isset.virtual_column_expr) {
+        // Make sure virtual column is valid.
+        if (tdesc.virtual_column_expr.nodes.empty()) {
+            LOG_ERROR("Virtual column expr node is empty, col_name={}, 
col_unique_id={}",

Review Comment:
   seems duplicated with exception



##########
be/src/vec/exec/scan/olap_scanner.cpp:
##########
@@ -457,6 +471,16 @@ Status OlapScanner::_init_variant_columns() {
 }
 
 Status OlapScanner::_init_return_columns() {
+#ifndef NDEBUG
+    std::vector<std::string> debug_strings;
+    for (const auto* slot : _output_tuple_desc->slots()) {
+        debug_strings.push_back(slot->debug_string());

Review Comment:
   we should not type too many logs even in debug mode



##########
be/src/vec/exprs/virtual_slot_ref.cpp:
##########
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/virtual_slot_ref.h"
+
+#include <gen_cpp/Exprs_types.h>
+#include <glog/logging.h>
+#include <thrift/protocol/TDebugProtocol.h>
+
+#include <ostream>
+
+#include "common/exception.h"
+#include "common/logging.h"
+#include "common/status.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nothing.h"
+#include "vec/core/block.h"
+#include "vec/core/column_with_type_and_name.h"
+#include "vec/exprs/vectorized_fn_call.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/exprs/vexpr_fwd.h"
+
+namespace doris::vectorized {
+
+VirtualSlotRef::VirtualSlotRef(const doris::TExprNode& node)
+        : VExpr(node),
+          _column_id(-1),
+          _slot_id(node.slot_ref.slot_id),
+          _column_name(nullptr),
+          _column_label(node.label) {}
+
+VirtualSlotRef::VirtualSlotRef(const SlotDescriptor* desc)
+        : VExpr(desc->type(), false), _column_id(-1), _slot_id(desc->id()), 
_column_name(nullptr) {}
+
+Status VirtualSlotRef::prepare(doris::RuntimeState* state, const 
doris::RowDescriptor& desc,
+                               VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    DCHECK_EQ(_children.size(), 0);
+    if (_slot_id == -1) {
+        _prepare_finished = true;
+        return Status::OK();
+    }
+
+    const SlotDescriptor* slot_desc = 
state->desc_tbl().get_slot_descriptor(_slot_id);
+    if (slot_desc == nullptr) {
+        return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                "couldn't resolve slot descriptor {}, desc: {}", _slot_id,
+                state->desc_tbl().debug_string());
+    }
+
+    if (slot_desc->get_virtual_column_expr() == nullptr) {
+        return Status::InternalError(
+                "VirtualSlotRef {} has no virtual column expr, slot_id: {}, 
desc: {}, "
+                "slot_desc: {}, desc_tbl: {}",
+                *_column_name, _slot_id, desc.debug_string(), 
slot_desc->debug_string(),
+                state->desc_tbl().debug_string());
+    }
+
+    _column_name = &slot_desc->col_name();
+    _column_data_type = slot_desc->get_data_type_ptr();
+    DCHECK(_column_data_type != nullptr);
+    if (!context->force_materialize_slot() && !slot_desc->is_materialized()) {
+        // slot should be ignored manually
+        _column_id = -1;
+        _prepare_finished = true;
+        return Status::OK();
+    }
+
+    _column_id = desc.get_column_id(_slot_id, 
context->force_materialize_slot());
+    if (_column_id < 0) {
+        return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                "VirtualSlotRef {} has invalid slot id: "
+                "{}.\nslot_desc:\n{},\ndesc:\n{},\ndesc_tbl:\n{}",
+                *_column_name, _slot_id, slot_desc->debug_string(), 
desc.debug_string(),
+                state->desc_tbl().debug_string());
+    }
+    const TExpr& expr = *slot_desc->get_virtual_column_expr();
+    // LOG_INFO("Virtual column expr is {}", 
apache::thrift::ThriftDebugString(expr));
+    // Create a temp_ctx only for create_expr_tree.
+    VExprContextSPtr temp_ctx;
+    RETURN_IF_ERROR(VExpr::create_expr_tree(expr, temp_ctx));
+    _virtual_column_expr = temp_ctx->root();
+    // Virtual column expr should do prepare with original context.
+    RETURN_IF_ERROR(_virtual_column_expr->prepare(state, desc, context));
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status VirtualSlotRef::open(RuntimeState* state, VExprContext* context,
+                            FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    RETURN_IF_ERROR(_virtual_column_expr->open(state, context, scope));
+    RETURN_IF_ERROR(VExpr::open(state, context, scope));
+    _open_finished = true;
+    return Status::OK();
+}
+
+Status VirtualSlotRef::execute(VExprContext* context, Block* block, int* 
result_column_id) {
+    if (_column_id >= 0 && _column_id >= block->columns()) {
+        return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                "input block not contain slot column {}, column_id={}, 
block={}", *_column_name,
+                _column_id, block->dump_structure());
+    }
+
+    ColumnWithTypeAndName col_type_name = block->get_by_position(_column_id);
+
+    if (!col_type_name.column) {
+        // Maybe we need to create a column in this situation.
+        return Status::InternalError(
+                "VirtualSlotRef column is null, column_id: {}, column_name: 
{}", _column_id,
+                *_column_name);
+    }
+
+    const vectorized::ColumnNothing* col_nothing =
+            check_and_get_column<ColumnNothing>(col_type_name.column.get());
+
+    if (this->_virtual_column_expr != nullptr) {
+        if (col_nothing != nullptr) {

Review Comment:
   _virtual_column_expr和col_nothing什么时候会是null?



##########
be/src/vec/exprs/virtual_slot_ref.cpp:
##########
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/virtual_slot_ref.h"
+
+#include <gen_cpp/Exprs_types.h>
+#include <glog/logging.h>
+#include <thrift/protocol/TDebugProtocol.h>
+
+#include <ostream>
+
+#include "common/exception.h"
+#include "common/logging.h"
+#include "common/status.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nothing.h"
+#include "vec/core/block.h"
+#include "vec/core/column_with_type_and_name.h"
+#include "vec/exprs/vectorized_fn_call.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/exprs/vexpr_fwd.h"
+
+namespace doris::vectorized {
+
+VirtualSlotRef::VirtualSlotRef(const doris::TExprNode& node)
+        : VExpr(node),
+          _column_id(-1),
+          _slot_id(node.slot_ref.slot_id),
+          _column_name(nullptr),
+          _column_label(node.label) {}
+
+VirtualSlotRef::VirtualSlotRef(const SlotDescriptor* desc)
+        : VExpr(desc->type(), false), _column_id(-1), _slot_id(desc->id()), 
_column_name(nullptr) {}
+
+Status VirtualSlotRef::prepare(doris::RuntimeState* state, const 
doris::RowDescriptor& desc,
+                               VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    DCHECK_EQ(_children.size(), 0);
+    if (_slot_id == -1) {
+        _prepare_finished = true;
+        return Status::OK();
+    }
+
+    const SlotDescriptor* slot_desc = 
state->desc_tbl().get_slot_descriptor(_slot_id);
+    if (slot_desc == nullptr) {
+        return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                "couldn't resolve slot descriptor {}, desc: {}", _slot_id,
+                state->desc_tbl().debug_string());
+    }
+
+    if (slot_desc->get_virtual_column_expr() == nullptr) {
+        return Status::InternalError(
+                "VirtualSlotRef {} has no virtual column expr, slot_id: {}, 
desc: {}, "
+                "slot_desc: {}, desc_tbl: {}",
+                *_column_name, _slot_id, desc.debug_string(), 
slot_desc->debug_string(),
+                state->desc_tbl().debug_string());
+    }
+
+    _column_name = &slot_desc->col_name();
+    _column_data_type = slot_desc->get_data_type_ptr();
+    DCHECK(_column_data_type != nullptr);
+    if (!context->force_materialize_slot() && !slot_desc->is_materialized()) {
+        // slot should be ignored manually
+        _column_id = -1;
+        _prepare_finished = true;
+        return Status::OK();
+    }
+
+    _column_id = desc.get_column_id(_slot_id, 
context->force_materialize_slot());
+    if (_column_id < 0) {
+        return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                "VirtualSlotRef {} has invalid slot id: "
+                "{}.\nslot_desc:\n{},\ndesc:\n{},\ndesc_tbl:\n{}",
+                *_column_name, _slot_id, slot_desc->debug_string(), 
desc.debug_string(),
+                state->desc_tbl().debug_string());
+    }
+    const TExpr& expr = *slot_desc->get_virtual_column_expr();
+    // LOG_INFO("Virtual column expr is {}", 
apache::thrift::ThriftDebugString(expr));
+    // Create a temp_ctx only for create_expr_tree.
+    VExprContextSPtr temp_ctx;
+    RETURN_IF_ERROR(VExpr::create_expr_tree(expr, temp_ctx));
+    _virtual_column_expr = temp_ctx->root();
+    // Virtual column expr should do prepare with original context.
+    RETURN_IF_ERROR(_virtual_column_expr->prepare(state, desc, context));
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status VirtualSlotRef::open(RuntimeState* state, VExprContext* context,
+                            FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    RETURN_IF_ERROR(_virtual_column_expr->open(state, context, scope));
+    RETURN_IF_ERROR(VExpr::open(state, context, scope));
+    _open_finished = true;
+    return Status::OK();
+}
+
+Status VirtualSlotRef::execute(VExprContext* context, Block* block, int* 
result_column_id) {
+    if (_column_id >= 0 && _column_id >= block->columns()) {
+        return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                "input block not contain slot column {}, column_id={}, 
block={}", *_column_name,
+                _column_id, block->dump_structure());
+    }
+
+    ColumnWithTypeAndName col_type_name = block->get_by_position(_column_id);
+
+    if (!col_type_name.column) {
+        // Maybe we need to create a column in this situation.
+        return Status::InternalError(
+                "VirtualSlotRef column is null, column_id: {}, column_name: 
{}", _column_id,
+                *_column_name);
+    }
+
+    const vectorized::ColumnNothing* col_nothing =
+            check_and_get_column<ColumnNothing>(col_type_name.column.get());
+
+    if (this->_virtual_column_expr != nullptr) {
+        if (col_nothing != nullptr) {
+            // Virtual column is not materialized, so we need to materialize 
it.
+            // Note: After executing 'execute', we cannot use the column from 
line 120 in subsequent code,
+            // because the vector might be resized during execution, causing 
previous references to become invalid.
+            int tmp_column_id = -1;
+            RETURN_IF_ERROR(_virtual_column_expr->execute(context, block, 
&tmp_column_id));
+
+            // Maybe do clone.
+            block->replace_by_position(_column_id,

Review Comment:
   
这里看着很奇怪,按理说expr计算不应该改变block原有的数据,而是只产生新的数据,这个_column_id的列能保证不被其他普通的expr引用吗?另外如果被多个vslotexpr进行修改会怎么样



##########
be/src/vec/exec/scan/scanner_scheduler.cpp:
##########
@@ -359,4 +364,42 @@ int ScannerScheduler::get_remote_scan_thread_queue_size() {
     return config::doris_remote_scanner_thread_pool_queue_size;
 }
 
+void ScannerScheduler::_make_sure_virtual_col_is_materialized(
+        const std::shared_ptr<Scanner>& scanner, vectorized::Block* 
free_block) {
+#ifndef NDEBUG
+    // Currently, virtual column can only be used on olap table.
+    std::shared_ptr<OlapScanner> olap_scanner = 
std::dynamic_pointer_cast<OlapScanner>(scanner);
+    if (olap_scanner == nullptr) {
+        return;
+    }
+
+    size_t idx = 0;
+    for (const auto& entry : *free_block) {
+        // Virtual column must be materialized on the end of SegmentIterator's 
next batch method.
+        const vectorized::ColumnNothing* column_nothing =
+                
vectorized::check_and_get_column<vectorized::ColumnNothing>(entry.column.get());
+        if (column_nothing == nullptr) {
+            idx++;
+            continue;
+        }
+
+        std::vector<std::string> vcid_to_idx;
+
+        for (const auto& pair : olap_scanner->_vir_cid_to_idx_in_block) {
+            vcid_to_idx.push_back(fmt::format("{}-{}", pair.first, 
pair.second));
+        }
+
+        std::string error_msg = fmt::format(
+                "Column in idx {} is nothing, block columns {}, normal_columns 
"
+                "{}, "
+                "vir_cid_to_idx_in_block_msg {}",
+                idx, free_block->columns(), 
olap_scanner->_return_columns.size(),
+                fmt::format("_vir_cid_to_idx_in_block:[{}]", 
fmt::join(vcid_to_idx, ",")));
+
+        LOG_ERROR(error_msg);

Review Comment:
   ditto



##########
be/src/vec/exprs/virtual_slot_ref.cpp:
##########
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/virtual_slot_ref.h"
+
+#include <gen_cpp/Exprs_types.h>
+#include <glog/logging.h>
+#include <thrift/protocol/TDebugProtocol.h>
+
+#include <ostream>
+
+#include "common/exception.h"
+#include "common/logging.h"
+#include "common/status.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nothing.h"
+#include "vec/core/block.h"
+#include "vec/core/column_with_type_and_name.h"
+#include "vec/exprs/vectorized_fn_call.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/exprs/vexpr_fwd.h"
+
+namespace doris::vectorized {
+
+VirtualSlotRef::VirtualSlotRef(const doris::TExprNode& node)
+        : VExpr(node),
+          _column_id(-1),
+          _slot_id(node.slot_ref.slot_id),
+          _column_name(nullptr),
+          _column_label(node.label) {}
+
+VirtualSlotRef::VirtualSlotRef(const SlotDescriptor* desc)
+        : VExpr(desc->type(), false), _column_id(-1), _slot_id(desc->id()), 
_column_name(nullptr) {}
+
+Status VirtualSlotRef::prepare(doris::RuntimeState* state, const 
doris::RowDescriptor& desc,
+                               VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    DCHECK_EQ(_children.size(), 0);
+    if (_slot_id == -1) {
+        _prepare_finished = true;
+        return Status::OK();
+    }
+
+    const SlotDescriptor* slot_desc = 
state->desc_tbl().get_slot_descriptor(_slot_id);
+    if (slot_desc == nullptr) {
+        return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                "couldn't resolve slot descriptor {}, desc: {}", _slot_id,
+                state->desc_tbl().debug_string());
+    }
+
+    if (slot_desc->get_virtual_column_expr() == nullptr) {
+        return Status::InternalError(
+                "VirtualSlotRef {} has no virtual column expr, slot_id: {}, 
desc: {}, "
+                "slot_desc: {}, desc_tbl: {}",
+                *_column_name, _slot_id, desc.debug_string(), 
slot_desc->debug_string(),
+                state->desc_tbl().debug_string());
+    }
+
+    _column_name = &slot_desc->col_name();
+    _column_data_type = slot_desc->get_data_type_ptr();
+    DCHECK(_column_data_type != nullptr);
+    if (!context->force_materialize_slot() && !slot_desc->is_materialized()) {
+        // slot should be ignored manually
+        _column_id = -1;
+        _prepare_finished = true;
+        return Status::OK();
+    }
+
+    _column_id = desc.get_column_id(_slot_id, 
context->force_materialize_slot());
+    if (_column_id < 0) {
+        return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                "VirtualSlotRef {} has invalid slot id: "
+                "{}.\nslot_desc:\n{},\ndesc:\n{},\ndesc_tbl:\n{}",
+                *_column_name, _slot_id, slot_desc->debug_string(), 
desc.debug_string(),
+                state->desc_tbl().debug_string());
+    }
+    const TExpr& expr = *slot_desc->get_virtual_column_expr();
+    // LOG_INFO("Virtual column expr is {}", 
apache::thrift::ThriftDebugString(expr));
+    // Create a temp_ctx only for create_expr_tree.
+    VExprContextSPtr temp_ctx;
+    RETURN_IF_ERROR(VExpr::create_expr_tree(expr, temp_ctx));
+    _virtual_column_expr = temp_ctx->root();
+    // Virtual column expr should do prepare with original context.
+    RETURN_IF_ERROR(_virtual_column_expr->prepare(state, desc, context));
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status VirtualSlotRef::open(RuntimeState* state, VExprContext* context,
+                            FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    RETURN_IF_ERROR(_virtual_column_expr->open(state, context, scope));
+    RETURN_IF_ERROR(VExpr::open(state, context, scope));
+    _open_finished = true;
+    return Status::OK();
+}
+
+Status VirtualSlotRef::execute(VExprContext* context, Block* block, int* 
result_column_id) {
+    if (_column_id >= 0 && _column_id >= block->columns()) {

Review Comment:
   vslot没有children吗?那如果expr顺序被打乱了是否可能会有问题



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to