yixiutt commented on code in PR #19237: URL: https://github.com/apache/doris/pull/19237#discussion_r1192117135
########## be/src/olap/olap_server.cpp: ########## @@ -532,6 +580,165 @@ void StorageEngine::_compaction_tasks_producer_callback() { } while (!_stop_background_threads_latch.wait_for(std::chrono::milliseconds(interval))); } +void StorageEngine::_tablet_replicas_info_update_callback() { +#ifdef GOOGLE_PROFILER + ProfilerRegisterThread(); +#endif + LOG(INFO) << "try to start tablet replicas info update process!"; + + int64_t interval = config::tablet_replicas_info_update_interval_seconds; + do { + if (config::enable_single_replica_compaction) { + auto all_tablets = _tablet_manager->get_all_tablet([](Tablet* t) { + return t->is_used() && t->tablet_state() == TABLET_RUNNING && + !t->tablet_meta()->tablet_schema()->disable_auto_compaction(); + }); + TMasterInfo* master_info = ExecEnv::GetInstance()->master_info(); + if (master_info == nullptr) { + LOG(WARNING)<< "Have not get FE Master heartbeat yet"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + continue; + } + TNetworkAddress master_addr = master_info->network_address; + if (master_addr.hostname == "" || master_addr.port == 0) { + LOG(WARNING)<< "Have not get FE Master heartbeat yet"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + continue; + } + + int start = 0; + int tablet_size = all_tablets.size(); + while (start < tablet_size) { + int batch_size = std::min(100, tablet_size - start); + int end = start + batch_size; + TGetTabletReplicaInfosRequest request; + TGetTabletReplicaInfosResult result; + for (int i = start; i < end; i++) { + request.tablet_ids.emplace_back(all_tablets[i]->tablet_id()); + } + Status rpc_st = ThriftRpcHelper::rpc<FrontendServiceClient>( + master_addr.hostname, master_addr.port, + [&request, &result](FrontendServiceConnection& client) { + client->getTabletReplicaInfos(result, request); + }); + + if (!rpc_st.ok()) { + LOG(WARNING)<< "Failed to get tablet replicas info, encounter rpc failure, tablet start: " + << start << " end: " << end; + start = end; + continue; + } + + std::unordered_map<int64_t, TReplicaInfo> tablet_master; + for (const auto& it : result.tablet_replica_infos) { + auto tablet_id = it.first; + auto tablet = _tablet_manager->get_tablet(tablet_id); + if (tablet == nullptr) { + VLOG_CRITICAL << "tablet is nullptr"; + continue; + } + + int64_t my_replica_id = tablet->replica_id(); + VLOG_CRITICAL << tablet_id << " tablet has " << it.second.size() << " peer replicas"; Review Comment: vlog_notice ########## be/src/olap/olap_server.cpp: ########## @@ -532,6 +580,165 @@ void StorageEngine::_compaction_tasks_producer_callback() { } while (!_stop_background_threads_latch.wait_for(std::chrono::milliseconds(interval))); } +void StorageEngine::_tablet_replicas_info_update_callback() { +#ifdef GOOGLE_PROFILER + ProfilerRegisterThread(); +#endif + LOG(INFO) << "try to start tablet replicas info update process!"; + + int64_t interval = config::tablet_replicas_info_update_interval_seconds; + do { + if (config::enable_single_replica_compaction) { + auto all_tablets = _tablet_manager->get_all_tablet([](Tablet* t) { + return t->is_used() && t->tablet_state() == TABLET_RUNNING && + !t->tablet_meta()->tablet_schema()->disable_auto_compaction(); + }); + TMasterInfo* master_info = ExecEnv::GetInstance()->master_info(); + if (master_info == nullptr) { + LOG(WARNING)<< "Have not get FE Master heartbeat yet"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + continue; + } + TNetworkAddress master_addr = master_info->network_address; + if (master_addr.hostname == "" || master_addr.port == 0) { + LOG(WARNING)<< "Have not get FE Master heartbeat yet"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + continue; + } + + int start = 0; + int tablet_size = all_tablets.size(); + while (start < tablet_size) { + int batch_size = std::min(100, tablet_size - start); + int end = start + batch_size; + TGetTabletReplicaInfosRequest request; + TGetTabletReplicaInfosResult result; + for (int i = start; i < end; i++) { + request.tablet_ids.emplace_back(all_tablets[i]->tablet_id()); + } + Status rpc_st = ThriftRpcHelper::rpc<FrontendServiceClient>( + master_addr.hostname, master_addr.port, + [&request, &result](FrontendServiceConnection& client) { + client->getTabletReplicaInfos(result, request); + }); + + if (!rpc_st.ok()) { + LOG(WARNING)<< "Failed to get tablet replicas info, encounter rpc failure, tablet start: " + << start << " end: " << end; + start = end; + continue; + } + + std::unordered_map<int64_t, TReplicaInfo> tablet_master; + for (const auto& it : result.tablet_replica_infos) { + auto tablet_id = it.first; + auto tablet = _tablet_manager->get_tablet(tablet_id); + if (tablet == nullptr) { + VLOG_CRITICAL << "tablet is nullptr"; + continue; + } + + int64_t my_replica_id = tablet->replica_id(); + VLOG_CRITICAL << tablet_id << " tablet has " << it.second.size() << " peer replicas"; + uint64_t min_hash = HashUtil::hash64(&my_replica_id, sizeof(my_replica_id), DEFAULT_SEED); + bool is_master_replica = true; + TReplicaInfo master_replica; + for (const auto& replica : it.second) { + int64_t peer_replica_id = replica.replica_id; + uint64_t hash = HashUtil::hash64(&peer_replica_id, sizeof(peer_replica_id), DEFAULT_SEED); + if (hash < min_hash) { + is_master_replica = false; + master_replica = replica; + min_hash = hash; + } + } + VLOG_CRITICAL << tablet_id << " tablet master replica is: " << is_master_replica; + + if (!is_master_replica) { + tablet_master[tablet_id] = master_replica; + } + } + VLOG_CRITICAL << "get tablet replica infos from fe, size is " << end - start Review Comment: notice ########## be/src/olap/olap_server.cpp: ########## @@ -665,6 +872,19 @@ void StorageEngine::_pop_tablet_from_submitted_compaction(TabletSharedPtr tablet Status StorageEngine::_submit_compaction_task(TabletSharedPtr tablet, CompactionType compaction_type) { + if (config::enable_single_replica_compaction) { + if (!is_master_replica(tablet->tablet_id())) { Review Comment: ?? ########## be/src/olap/single_replica_compaction.cpp: ########## @@ -0,0 +1,510 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/single_replica_compaction.h" + +#include "common/logging.h" +#include "gen_cpp/internal_service.pb.h" +#include "gen_cpp/Types_constants.h" +#include "gutil/strings/split.h" +#include "gutil/strings/stringpiece.h" +#include "http/http_client.h" +#include "io/fs/file_system.h" +#include "io/fs/local_file_system.h" +#include "io/fs/path.h" +#include "olap/rowset/rowset_factory.h" +#include "olap/rowset/rowset_meta.h" +#include "olap/snapshot_manager.h" +#include "olap/storage_engine.h" +#include "olap/tablet_meta.h" +#include "runtime/client_cache.h" +#include "runtime/memory/mem_tracker_limiter.h" +#include "service/brpc.h" +#include "task/engine_clone_task.h" +#include "util/brpc_client_cache.h" +#include "util/doris_metrics.h" +#include "util/thrift_rpc_helper.h" +#include "util/trace.h" + +namespace doris { +using namespace ErrorCode; + +SingleReplicaCompaction::SingleReplicaCompaction(const TabletSharedPtr& tablet) + : Compaction(tablet, "SingleReplicaCompaction:" + std::to_string(tablet->tablet_id())) {} + + +SingleReplicaCompaction::~SingleReplicaCompaction() {} + +Status SingleReplicaCompaction::prepare_compact() { + VLOG_CRITICAL << _tablet->tablet_id() << " prepare single replcia compaction and pick rowsets!"; + if (!_tablet->init_succeeded()) { + return Status::Error<CUMULATIVE_INVALID_PARAMETERS>(); + } + + std::unique_lock<std::mutex> lock_cumu(_tablet->get_cumulative_compaction_lock(), std::try_to_lock); + if (!lock_cumu.owns_lock()) { + LOG(INFO) << "The tablet is under cumulative compaction. tablet=" << _tablet->full_name(); + return Status::Error<TRY_LOCK_FAILED>(); + } + TRACE("got cumulative compaction lock"); + + std::unique_lock<std::mutex> lock_base(_tablet->get_base_compaction_lock(), std::try_to_lock); + if (!lock_base.owns_lock()) { + LOG(WARNING) << "another base compaction is running. tablet=" << _tablet->full_name(); + return Status::Error<TRY_LOCK_FAILED>(); + } + TRACE("got base compaction lock"); + + // 1. pick rowsets to compact + RETURN_IF_ERROR(pick_rowsets_to_compact()); + TRACE("rowsets picked"); + _tablet->set_clone_occurred(false); + if (_input_rowsets.size() == 1) { + return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>(); + } + + return Status::OK(); +} + +Status SingleReplicaCompaction::pick_rowsets_to_compact() { + auto candidate_rowsets = _tablet->pick_candidate_rowsets_to_single_replica_compaction(); + if (candidate_rowsets.empty()) { + return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>(); + } + _input_rowsets.clear(); + for(const auto& rowset : candidate_rowsets) { + _input_rowsets.emplace_back(rowset); + } + + return Status::OK(); +} + +Status SingleReplicaCompaction::execute_compact_impl() { + std::unique_lock<std::mutex> lock_cumu(_tablet->get_cumulative_compaction_lock(), std::try_to_lock); + if (!lock_cumu.owns_lock()) { + LOG(INFO) << "The tablet is under cumulative compaction. tablet=" << _tablet->full_name(); + return Status::Error<TRY_LOCK_FAILED>(); + } + TRACE("got cumulative compaction lock"); + + std::unique_lock<std::mutex> lock_base(_tablet->get_base_compaction_lock(), std::try_to_lock); + if (!lock_base.owns_lock()) { + LOG(WARNING) << "another base compaction is running. tablet=" << _tablet->full_name(); + return Status::Error<TRY_LOCK_FAILED>(); + } + TRACE("got base compaction lock"); + + // Clone task may happen after compaction task is submitted to thread pool, and rowsets picked + // for compaction may change. In this case, current compaction task should not be executed. + if (_tablet->get_clone_occurred()) { + _tablet->set_clone_occurred(false); + return Status::Error<BE_CLONE_OCCURRED>(); + } + + SCOPED_ATTACH_TASK(_mem_tracker); + + // 2. do single replica compaction + RETURN_IF_ERROR(_do_single_replica_compaction()); + TRACE("compaction finished"); + + // 3. set state to success + _state = CompactionState::SUCCESS; + + return Status::OK(); +} + +Status SingleReplicaCompaction::_do_single_replica_compaction() { + TRACE("start to do single replica compaction"); + if(_tablet->should_fetch_from_master(_output_version)) { Review Comment: not use master ########## be/src/olap/olap_server.cpp: ########## @@ -532,6 +580,165 @@ void StorageEngine::_compaction_tasks_producer_callback() { } while (!_stop_background_threads_latch.wait_for(std::chrono::milliseconds(interval))); } +void StorageEngine::_tablet_replicas_info_update_callback() { +#ifdef GOOGLE_PROFILER + ProfilerRegisterThread(); +#endif + LOG(INFO) << "try to start tablet replicas info update process!"; + + int64_t interval = config::tablet_replicas_info_update_interval_seconds; + do { + if (config::enable_single_replica_compaction) { + auto all_tablets = _tablet_manager->get_all_tablet([](Tablet* t) { + return t->is_used() && t->tablet_state() == TABLET_RUNNING && + !t->tablet_meta()->tablet_schema()->disable_auto_compaction(); + }); + TMasterInfo* master_info = ExecEnv::GetInstance()->master_info(); + if (master_info == nullptr) { + LOG(WARNING)<< "Have not get FE Master heartbeat yet"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + continue; + } + TNetworkAddress master_addr = master_info->network_address; + if (master_addr.hostname == "" || master_addr.port == 0) { + LOG(WARNING)<< "Have not get FE Master heartbeat yet"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + continue; + } + + int start = 0; + int tablet_size = all_tablets.size(); + while (start < tablet_size) { + int batch_size = std::min(100, tablet_size - start); + int end = start + batch_size; + TGetTabletReplicaInfosRequest request; + TGetTabletReplicaInfosResult result; + for (int i = start; i < end; i++) { + request.tablet_ids.emplace_back(all_tablets[i]->tablet_id()); + } + Status rpc_st = ThriftRpcHelper::rpc<FrontendServiceClient>( + master_addr.hostname, master_addr.port, + [&request, &result](FrontendServiceConnection& client) { + client->getTabletReplicaInfos(result, request); + }); + + if (!rpc_st.ok()) { + LOG(WARNING)<< "Failed to get tablet replicas info, encounter rpc failure, tablet start: " + << start << " end: " << end; + start = end; + continue; + } + + std::unordered_map<int64_t, TReplicaInfo> tablet_master; + for (const auto& it : result.tablet_replica_infos) { + auto tablet_id = it.first; + auto tablet = _tablet_manager->get_tablet(tablet_id); + if (tablet == nullptr) { + VLOG_CRITICAL << "tablet is nullptr"; + continue; + } + + int64_t my_replica_id = tablet->replica_id(); + VLOG_CRITICAL << tablet_id << " tablet has " << it.second.size() << " peer replicas"; + uint64_t min_hash = HashUtil::hash64(&my_replica_id, sizeof(my_replica_id), DEFAULT_SEED); + bool is_master_replica = true; + TReplicaInfo master_replica; + for (const auto& replica : it.second) { + int64_t peer_replica_id = replica.replica_id; + uint64_t hash = HashUtil::hash64(&peer_replica_id, sizeof(peer_replica_id), DEFAULT_SEED); + if (hash < min_hash) { + is_master_replica = false; + master_replica = replica; + min_hash = hash; + } + } + VLOG_CRITICAL << tablet_id << " tablet master replica is: " << is_master_replica; Review Comment: notice -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org