github-actions[bot] commented on code in PR #26432: URL: https://github.com/apache/doris/pull/26432#discussion_r1383323741
########## be/src/vec/sink/vrow_distribution.cpp: ########## @@ -0,0 +1,288 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include <gen_cpp/FrontendService.h> +#include <gen_cpp/FrontendService_types.h> +#include "runtime/client_cache.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "util/thrift_rpc_helper.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_vector.h" +#include "vec/sink/vrow_distribution.h" +#include "vec/sink/writer/vtablet_writer.h" + +namespace doris::vectorized { + +std::pair<vectorized::VExprContextSPtr, vectorized::VExprSPtr> +VRowDistribution::_get_partition_function() { + return {_vpartition->get_part_func_ctx(), _vpartition->get_partition_function()}; +} + +void VRowDistribution::_save_missing_values(vectorized::ColumnPtr col, + vectorized::DataTypePtr value_type) { + _partitions_need_create.clear(); + std::set<std::string> deduper; + // de-duplication + for (auto row : _missing_map) { + deduper.emplace(value_type->to_string(*col, row)); + } + for (auto& value : deduper) { + TStringLiteral node; + node.value = value; + _partitions_need_create.emplace_back(std::vector {node}); // only 1 partition column now + } +} + +Status VRowDistribution::_automatic_create_partition() { + SCOPED_TIMER(_add_partition_request_timer); + TCreatePartitionRequest request; + TCreatePartitionResult result; + request.__set_txn_id(_txn_id); + request.__set_db_id(_vpartition->db_id()); + request.__set_table_id(_vpartition->table_id()); + request.__set_partitionValues(_partitions_need_create); + + VLOG(1) << "automatic partition rpc begin request " << request; + TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address; + int time_out = _state->execution_timeout() * 1000; + RETURN_IF_ERROR(ThriftRpcHelper::rpc<FrontendServiceClient>( + master_addr.hostname, master_addr.port, + [&request, &result](FrontendServiceConnection& client) { + client->createPartition(result, request); + }, + time_out)); + + Status status(Status::create(result.status)); + VLOG(1) << "automatic partition rpc end response " << result; + if (result.status.status_code == TStatusCode::OK) { + // add new created partitions + RETURN_IF_ERROR(_vpartition->add_partitions(result.partitions)); + RETURN_IF_ERROR(_on_partitions_created(_caller, &result)); + } + + return status; +} + +void VRowDistribution::_get_tablet_ids(vectorized::Block* block, int32_t index_idx, Review Comment: warning: method '_get_tablet_ids' can be made static [readability-convert-member-functions-to-static] ```suggestion static void VRowDistribution::_get_tablet_ids(vectorized::Block* block, int32_t index_idx, ``` ########## be/src/vec/sink/vrow_distribution.cpp: ########## @@ -0,0 +1,288 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include <gen_cpp/FrontendService.h> +#include <gen_cpp/FrontendService_types.h> +#include "runtime/client_cache.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "util/thrift_rpc_helper.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_vector.h" +#include "vec/sink/vrow_distribution.h" +#include "vec/sink/writer/vtablet_writer.h" + +namespace doris::vectorized { + +std::pair<vectorized::VExprContextSPtr, vectorized::VExprSPtr> +VRowDistribution::_get_partition_function() { + return {_vpartition->get_part_func_ctx(), _vpartition->get_partition_function()}; +} + +void VRowDistribution::_save_missing_values(vectorized::ColumnPtr col, + vectorized::DataTypePtr value_type) { + _partitions_need_create.clear(); + std::set<std::string> deduper; + // de-duplication + for (auto row : _missing_map) { + deduper.emplace(value_type->to_string(*col, row)); + } + for (auto& value : deduper) { + TStringLiteral node; + node.value = value; + _partitions_need_create.emplace_back(std::vector {node}); // only 1 partition column now + } +} + +Status VRowDistribution::_automatic_create_partition() { + SCOPED_TIMER(_add_partition_request_timer); + TCreatePartitionRequest request; + TCreatePartitionResult result; + request.__set_txn_id(_txn_id); + request.__set_db_id(_vpartition->db_id()); + request.__set_table_id(_vpartition->table_id()); + request.__set_partitionValues(_partitions_need_create); + + VLOG(1) << "automatic partition rpc begin request " << request; + TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address; + int time_out = _state->execution_timeout() * 1000; + RETURN_IF_ERROR(ThriftRpcHelper::rpc<FrontendServiceClient>( + master_addr.hostname, master_addr.port, + [&request, &result](FrontendServiceConnection& client) { + client->createPartition(result, request); + }, + time_out)); + + Status status(Status::create(result.status)); + VLOG(1) << "automatic partition rpc end response " << result; + if (result.status.status_code == TStatusCode::OK) { + // add new created partitions + RETURN_IF_ERROR(_vpartition->add_partitions(result.partitions)); + RETURN_IF_ERROR(_on_partitions_created(_caller, &result)); + } + + return status; +} + +void VRowDistribution::_get_tablet_ids(vectorized::Block* block, int32_t index_idx, + std::vector<int64_t>& tablet_ids) { + tablet_ids.reserve(block->rows()); + for (int row_idx = 0; row_idx < block->rows(); row_idx++) { + if (_skip[row_idx]) { + continue; + } + auto& partition = _partitions[row_idx]; + auto& tablet_index = _tablet_indexes[row_idx]; + auto& index = partition->indexes[index_idx]; + + auto tablet_id = index.tablets[tablet_index]; + tablet_ids[row_idx] = tablet_id; + } +} + +void VRowDistribution::_filter_block_by_skip(vectorized::Block* block, Review Comment: warning: method '_filter_block_by_skip' can be made static [readability-convert-member-functions-to-static] ```suggestion static void VRowDistribution::_filter_block_by_skip(vectorized::Block* block, ``` ########## be/src/vec/sink/vrow_distribution.h: ########## @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +// IWYU pragma: no_include <bits/chrono.h> +#include <string> +#include <unordered_map> +#include <vector> + +#include "common/status.h" +#include "exec/tablet_info.h" +#include "runtime/types.h" +#include "util/runtime_profile.h" +#include "util/stopwatch.hpp" +#include "vec/core/block.h" +#include "vec/data_types/data_type.h" +#include "vec/exprs/vexpr_fwd.h" +#include "vec/sink/vtablet_block_convertor.h" +#include "vec/sink/vtablet_finder.h" + +namespace doris::vectorized { + +class IndexChannel; +class VNodeChannel; + +// <row_idx, partition_id, tablet_id> +using RowPartTabletTuple = std::tuple<std::vector<int64_t>, std::vector<int64_t>, + std::vector<int64_t>>; + +typedef Status (*OnPartitionsCreated)(void*, TCreatePartitionResult*); + +class VRowDistributionContext { +public: + RuntimeState* state = nullptr; // not owned, set when open + std::vector<std::shared_ptr<IndexChannel>>* channels; + OlapTableBlockConvertor* block_convertor = nullptr; + OlapTabletFinder* tablet_finder = nullptr; + VOlapTablePartitionParam* vpartition = nullptr; + RuntimeProfile::Counter* add_partition_request_timer = nullptr; + int64_t txn_id = -1; + ObjectPool* pool; + OlapTableLocationParam* location; + const VExprContextSPtrs* vec_output_expr_ctxs; + OnPartitionsCreated on_partitions_created; + void* caller; + std::shared_ptr<OlapTableSchemaParam> schema; +}; + +class VRowDistribution { +public: + VRowDistribution() { + } + + void init(VRowDistributionContext *ctx) { + _state = ctx->state; + _channels = ctx->channels; + _block_convertor = ctx->block_convertor; + _tablet_finder = ctx->tablet_finder; + _vpartition = ctx->vpartition; + _add_partition_request_timer = ctx->add_partition_request_timer; + _txn_id = ctx->txn_id; + _pool = ctx->pool; + _location = ctx->location; + _vec_output_expr_ctxs = ctx->vec_output_expr_ctxs; + _on_partitions_created = ctx->on_partitions_created; + _caller = ctx->caller; + _schema = ctx->schema; + } + + // auto partition + // mv where clause + // v1 needs index->node->row_ids - tabletids + // v2 needs index,tablet->rowids + Status generate_rows_distribution(vectorized::Block& input_block, + std::shared_ptr<vectorized::Block>& block, + int64_t& filtered_rows, bool& has_filtered_rows, + std::vector<RowPartTabletTuple>& row_part_tablet_tuples); + +private: + std::pair<vectorized::VExprContextSPtr, vectorized::VExprSPtr> _get_partition_function(); + + void _save_missing_values(vectorized::ColumnPtr col, vectorized::DataTypePtr value_type); + + // create partitions when need for auto-partition table using #_partitions_need_create. + Status _automatic_create_partition(); + + void _get_tablet_ids(vectorized::Block* block, int32_t index_idx, + std::vector<int64_t>& tablet_ids); + + void _filter_block_by_skip(vectorized::Block* block, + RowPartTabletTuple& row_part_tablet_tuple); + + Status _filter_block_by_skip_and_where_clause(vectorized::Block* block, + const vectorized::VExprContextSPtr& where_clause, RowPartTabletTuple& row_part_tablet_tuple); + + Status _filter_block(vectorized::Block* block, std::vector<RowPartTabletTuple>& row_part_tablet_tuples); + + Status _generate_rows_distribution_for_auto_parititon( + vectorized::Block* block, bool has_filtered_rows, + std::vector<RowPartTabletTuple>& row_part_tablet_tuples); + + Status _generate_rows_distribution_for_non_auto_parititon( + vectorized::Block* block, bool has_filtered_rows, + std::vector<RowPartTabletTuple>& row_part_tablet_tuples); + +private: Review Comment: warning: redundant access specifier has the same accessibility as the previous access specifier [readability-redundant-access-specifiers] ```suggestion ``` <details> <summary>Additional context</summary> **be/src/vec/sink/vrow_distribution.h:93:** previously declared here ```cpp private: ^ ``` </details> ########## be/src/vec/sink/vrow_distribution.cpp: ########## @@ -0,0 +1,288 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include <gen_cpp/FrontendService.h> +#include <gen_cpp/FrontendService_types.h> +#include "runtime/client_cache.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "util/thrift_rpc_helper.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_vector.h" +#include "vec/sink/vrow_distribution.h" +#include "vec/sink/writer/vtablet_writer.h" + +namespace doris::vectorized { + +std::pair<vectorized::VExprContextSPtr, vectorized::VExprSPtr> +VRowDistribution::_get_partition_function() { + return {_vpartition->get_part_func_ctx(), _vpartition->get_partition_function()}; +} + +void VRowDistribution::_save_missing_values(vectorized::ColumnPtr col, + vectorized::DataTypePtr value_type) { + _partitions_need_create.clear(); + std::set<std::string> deduper; + // de-duplication + for (auto row : _missing_map) { + deduper.emplace(value_type->to_string(*col, row)); + } + for (auto& value : deduper) { + TStringLiteral node; + node.value = value; + _partitions_need_create.emplace_back(std::vector {node}); // only 1 partition column now + } +} + +Status VRowDistribution::_automatic_create_partition() { + SCOPED_TIMER(_add_partition_request_timer); + TCreatePartitionRequest request; + TCreatePartitionResult result; + request.__set_txn_id(_txn_id); + request.__set_db_id(_vpartition->db_id()); + request.__set_table_id(_vpartition->table_id()); + request.__set_partitionValues(_partitions_need_create); + + VLOG(1) << "automatic partition rpc begin request " << request; + TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address; + int time_out = _state->execution_timeout() * 1000; + RETURN_IF_ERROR(ThriftRpcHelper::rpc<FrontendServiceClient>( + master_addr.hostname, master_addr.port, + [&request, &result](FrontendServiceConnection& client) { + client->createPartition(result, request); + }, + time_out)); + + Status status(Status::create(result.status)); + VLOG(1) << "automatic partition rpc end response " << result; + if (result.status.status_code == TStatusCode::OK) { + // add new created partitions + RETURN_IF_ERROR(_vpartition->add_partitions(result.partitions)); + RETURN_IF_ERROR(_on_partitions_created(_caller, &result)); + } + + return status; +} + +void VRowDistribution::_get_tablet_ids(vectorized::Block* block, int32_t index_idx, + std::vector<int64_t>& tablet_ids) { + tablet_ids.reserve(block->rows()); + for (int row_idx = 0; row_idx < block->rows(); row_idx++) { + if (_skip[row_idx]) { + continue; + } + auto& partition = _partitions[row_idx]; + auto& tablet_index = _tablet_indexes[row_idx]; + auto& index = partition->indexes[index_idx]; + + auto tablet_id = index.tablets[tablet_index]; + tablet_ids[row_idx] = tablet_id; + } +} + +void VRowDistribution::_filter_block_by_skip(vectorized::Block* block, + RowPartTabletTuple& row_part_tablet_tuple) { + auto& row_ids = std::get<0>(row_part_tablet_tuple); + auto& partition_ids = std::get<1>(row_part_tablet_tuple); + auto& tablet_ids = std::get<2>(row_part_tablet_tuple); + + for (size_t i = 0; i < block->rows(); i++) { + if (!_skip[i]) { + row_ids.emplace_back(i); + partition_ids.emplace_back(_partitions[i]->id); + tablet_ids.emplace_back(_tablet_ids[i]); + } + } +} + +Status VRowDistribution::_filter_block_by_skip_and_where_clause(vectorized::Block* block, Review Comment: warning: method '_filter_block_by_skip_and_where_clause' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status VRowDistribution::_filter_block_by_skip_and_where_clause(vectorized::Block* block, ``` ########## be/src/vec/sink/vrow_distribution.cpp: ########## @@ -0,0 +1,288 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include <gen_cpp/FrontendService.h> +#include <gen_cpp/FrontendService_types.h> +#include "runtime/client_cache.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "util/thrift_rpc_helper.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_vector.h" +#include "vec/sink/vrow_distribution.h" +#include "vec/sink/writer/vtablet_writer.h" + +namespace doris::vectorized { + +std::pair<vectorized::VExprContextSPtr, vectorized::VExprSPtr> +VRowDistribution::_get_partition_function() { + return {_vpartition->get_part_func_ctx(), _vpartition->get_partition_function()}; +} + +void VRowDistribution::_save_missing_values(vectorized::ColumnPtr col, + vectorized::DataTypePtr value_type) { + _partitions_need_create.clear(); + std::set<std::string> deduper; + // de-duplication + for (auto row : _missing_map) { + deduper.emplace(value_type->to_string(*col, row)); + } + for (auto& value : deduper) { + TStringLiteral node; + node.value = value; + _partitions_need_create.emplace_back(std::vector {node}); // only 1 partition column now + } +} + +Status VRowDistribution::_automatic_create_partition() { + SCOPED_TIMER(_add_partition_request_timer); + TCreatePartitionRequest request; + TCreatePartitionResult result; + request.__set_txn_id(_txn_id); + request.__set_db_id(_vpartition->db_id()); + request.__set_table_id(_vpartition->table_id()); + request.__set_partitionValues(_partitions_need_create); + + VLOG(1) << "automatic partition rpc begin request " << request; + TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address; + int time_out = _state->execution_timeout() * 1000; + RETURN_IF_ERROR(ThriftRpcHelper::rpc<FrontendServiceClient>( + master_addr.hostname, master_addr.port, + [&request, &result](FrontendServiceConnection& client) { + client->createPartition(result, request); + }, + time_out)); + + Status status(Status::create(result.status)); + VLOG(1) << "automatic partition rpc end response " << result; + if (result.status.status_code == TStatusCode::OK) { + // add new created partitions + RETURN_IF_ERROR(_vpartition->add_partitions(result.partitions)); + RETURN_IF_ERROR(_on_partitions_created(_caller, &result)); + } + + return status; +} + +void VRowDistribution::_get_tablet_ids(vectorized::Block* block, int32_t index_idx, + std::vector<int64_t>& tablet_ids) { + tablet_ids.reserve(block->rows()); + for (int row_idx = 0; row_idx < block->rows(); row_idx++) { + if (_skip[row_idx]) { + continue; + } + auto& partition = _partitions[row_idx]; + auto& tablet_index = _tablet_indexes[row_idx]; + auto& index = partition->indexes[index_idx]; + + auto tablet_id = index.tablets[tablet_index]; + tablet_ids[row_idx] = tablet_id; + } +} + +void VRowDistribution::_filter_block_by_skip(vectorized::Block* block, + RowPartTabletTuple& row_part_tablet_tuple) { + auto& row_ids = std::get<0>(row_part_tablet_tuple); + auto& partition_ids = std::get<1>(row_part_tablet_tuple); + auto& tablet_ids = std::get<2>(row_part_tablet_tuple); + + for (size_t i = 0; i < block->rows(); i++) { + if (!_skip[i]) { + row_ids.emplace_back(i); + partition_ids.emplace_back(_partitions[i]->id); + tablet_ids.emplace_back(_tablet_ids[i]); + } + } +} + +Status VRowDistribution::_filter_block_by_skip_and_where_clause(vectorized::Block* block, + const vectorized::VExprContextSPtr& where_clause, RowPartTabletTuple& row_part_tablet_tuple) { + + // TODO + //SCOPED_RAW_TIMER(&_stat.where_clause_ns); + int result_index = -1; + size_t column_number = block->columns(); + RETURN_IF_ERROR(where_clause->execute(block, &result_index)); + + auto filter_column = block->get_by_position(result_index).column; + + auto& row_ids = std::get<0>(row_part_tablet_tuple); + auto& partition_ids = std::get<1>(row_part_tablet_tuple); + auto& tablet_ids = std::get<2>(row_part_tablet_tuple); + if (auto* nullable_column = + vectorized::check_and_get_column<vectorized::ColumnNullable>(*filter_column)) { + for (size_t i = 0; i < block->rows(); i++) { + if (nullable_column->get_bool_inline(i) && !_skip[i]) { + row_ids.emplace_back(i); + partition_ids.emplace_back(_partitions[i]->id); + tablet_ids.emplace_back(_tablet_ids[i]); + } + } + } else if (auto* const_column = vectorized::check_and_get_column<vectorized::ColumnConst>( + *filter_column)) { + bool ret = const_column->get_bool(0); + if (!ret) { + return Status::OK(); + } + // should we optimize? + _filter_block_by_skip(block, row_part_tablet_tuple); + } else { + auto& filter = assert_cast<const vectorized::ColumnUInt8&>(*filter_column).get_data(); + for (size_t i = 0; i < block->rows(); i++) { + if (filter[i] != 0 && !_skip[i]) { + row_ids.emplace_back(i); + partition_ids.emplace_back(_partitions[i]->id); + tablet_ids.emplace_back(_tablet_ids[i]); + } + } + } + + for (size_t i = block->columns() - 1; i >= column_number; i--) { + block->erase(i); + } + return Status::OK(); +} + +Status VRowDistribution::_filter_block(vectorized::Block* block, + std::vector<RowPartTabletTuple>& row_part_tablet_tuples) { + for (int i = 0; i < _schema->indexes().size(); i++) { + _get_tablet_ids(block, i, _tablet_ids); + auto& where_clause = _schema->indexes()[i]->where_clause; + if (where_clause != nullptr) { + RETURN_IF_ERROR(_filter_block_by_skip_and_where_clause(block, where_clause, row_part_tablet_tuples[i])); + } else { + _filter_block_by_skip(block, row_part_tablet_tuples[i]); + } + } + return Status::OK(); +} + +Status VRowDistribution::_generate_rows_distribution_for_non_auto_parititon( Review Comment: warning: method '_generate_rows_distribution_for_non_auto_parititon' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status VRowDistribution::_generate_rows_distribution_for_non_auto_parititon( ``` ########## be/src/vec/sink/vrow_distribution.cpp: ########## @@ -0,0 +1,288 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include <gen_cpp/FrontendService.h> +#include <gen_cpp/FrontendService_types.h> +#include "runtime/client_cache.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "util/thrift_rpc_helper.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_vector.h" +#include "vec/sink/vrow_distribution.h" +#include "vec/sink/writer/vtablet_writer.h" + +namespace doris::vectorized { + +std::pair<vectorized::VExprContextSPtr, vectorized::VExprSPtr> +VRowDistribution::_get_partition_function() { + return {_vpartition->get_part_func_ctx(), _vpartition->get_partition_function()}; +} + +void VRowDistribution::_save_missing_values(vectorized::ColumnPtr col, + vectorized::DataTypePtr value_type) { + _partitions_need_create.clear(); + std::set<std::string> deduper; + // de-duplication + for (auto row : _missing_map) { + deduper.emplace(value_type->to_string(*col, row)); + } + for (auto& value : deduper) { + TStringLiteral node; + node.value = value; + _partitions_need_create.emplace_back(std::vector {node}); // only 1 partition column now + } +} + +Status VRowDistribution::_automatic_create_partition() { + SCOPED_TIMER(_add_partition_request_timer); + TCreatePartitionRequest request; + TCreatePartitionResult result; + request.__set_txn_id(_txn_id); + request.__set_db_id(_vpartition->db_id()); + request.__set_table_id(_vpartition->table_id()); + request.__set_partitionValues(_partitions_need_create); + + VLOG(1) << "automatic partition rpc begin request " << request; + TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address; + int time_out = _state->execution_timeout() * 1000; + RETURN_IF_ERROR(ThriftRpcHelper::rpc<FrontendServiceClient>( + master_addr.hostname, master_addr.port, + [&request, &result](FrontendServiceConnection& client) { + client->createPartition(result, request); + }, + time_out)); + + Status status(Status::create(result.status)); + VLOG(1) << "automatic partition rpc end response " << result; + if (result.status.status_code == TStatusCode::OK) { + // add new created partitions + RETURN_IF_ERROR(_vpartition->add_partitions(result.partitions)); + RETURN_IF_ERROR(_on_partitions_created(_caller, &result)); + } + + return status; +} + +void VRowDistribution::_get_tablet_ids(vectorized::Block* block, int32_t index_idx, + std::vector<int64_t>& tablet_ids) { + tablet_ids.reserve(block->rows()); + for (int row_idx = 0; row_idx < block->rows(); row_idx++) { + if (_skip[row_idx]) { + continue; + } + auto& partition = _partitions[row_idx]; + auto& tablet_index = _tablet_indexes[row_idx]; + auto& index = partition->indexes[index_idx]; + + auto tablet_id = index.tablets[tablet_index]; + tablet_ids[row_idx] = tablet_id; + } +} + +void VRowDistribution::_filter_block_by_skip(vectorized::Block* block, + RowPartTabletTuple& row_part_tablet_tuple) { + auto& row_ids = std::get<0>(row_part_tablet_tuple); + auto& partition_ids = std::get<1>(row_part_tablet_tuple); + auto& tablet_ids = std::get<2>(row_part_tablet_tuple); + + for (size_t i = 0; i < block->rows(); i++) { + if (!_skip[i]) { + row_ids.emplace_back(i); + partition_ids.emplace_back(_partitions[i]->id); + tablet_ids.emplace_back(_tablet_ids[i]); + } + } +} + +Status VRowDistribution::_filter_block_by_skip_and_where_clause(vectorized::Block* block, + const vectorized::VExprContextSPtr& where_clause, RowPartTabletTuple& row_part_tablet_tuple) { + + // TODO + //SCOPED_RAW_TIMER(&_stat.where_clause_ns); + int result_index = -1; + size_t column_number = block->columns(); + RETURN_IF_ERROR(where_clause->execute(block, &result_index)); + + auto filter_column = block->get_by_position(result_index).column; + + auto& row_ids = std::get<0>(row_part_tablet_tuple); + auto& partition_ids = std::get<1>(row_part_tablet_tuple); + auto& tablet_ids = std::get<2>(row_part_tablet_tuple); + if (auto* nullable_column = + vectorized::check_and_get_column<vectorized::ColumnNullable>(*filter_column)) { + for (size_t i = 0; i < block->rows(); i++) { + if (nullable_column->get_bool_inline(i) && !_skip[i]) { + row_ids.emplace_back(i); + partition_ids.emplace_back(_partitions[i]->id); + tablet_ids.emplace_back(_tablet_ids[i]); + } + } + } else if (auto* const_column = vectorized::check_and_get_column<vectorized::ColumnConst>( + *filter_column)) { + bool ret = const_column->get_bool(0); + if (!ret) { + return Status::OK(); + } + // should we optimize? + _filter_block_by_skip(block, row_part_tablet_tuple); + } else { + auto& filter = assert_cast<const vectorized::ColumnUInt8&>(*filter_column).get_data(); + for (size_t i = 0; i < block->rows(); i++) { + if (filter[i] != 0 && !_skip[i]) { + row_ids.emplace_back(i); + partition_ids.emplace_back(_partitions[i]->id); + tablet_ids.emplace_back(_tablet_ids[i]); + } + } + } + + for (size_t i = block->columns() - 1; i >= column_number; i--) { + block->erase(i); + } + return Status::OK(); +} + +Status VRowDistribution::_filter_block(vectorized::Block* block, Review Comment: warning: method '_filter_block' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status VRowDistribution::_filter_block(vectorized::Block* block, ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org