yixiutt commented on code in PR #19237:
URL: https://github.com/apache/doris/pull/19237#discussion_r1197604846


##########
be/src/olap/olap_server.cpp:
##########
@@ -532,6 +581,154 @@ void StorageEngine::_compaction_tasks_producer_callback() 
{
     } while 
(!_stop_background_threads_latch.wait_for(std::chrono::milliseconds(interval)));
 }
 
+void StorageEngine::_update_peer_replica_infos_callback() {
+#ifdef GOOGLE_PROFILER
+    ProfilerRegisterThread();
+#endif
+    LOG(INFO) << "start to update peer replica infos!";
+
+    int64_t interval = config::update_peer_replica_infos_interval_seconds;
+    do {
+        if (config::enable_single_replica_compaction) {
+            auto all_tablets = _tablet_manager->get_all_tablet([](Tablet* t) {
+                return t->is_used() && t->tablet_state() == TABLET_RUNNING &&
+                       
!t->tablet_meta()->tablet_schema()->disable_auto_compaction();
+            });
+            TMasterInfo* master_info = ExecEnv::GetInstance()->master_info();
+            if (master_info == nullptr) {
+                LOG(WARNING) << "Have not get FE Master heartbeat yet";
+                std::this_thread::sleep_for(std::chrono::seconds(2));
+                continue;
+            }
+            TNetworkAddress master_addr = master_info->network_address;
+            if (master_addr.hostname == "" || master_addr.port == 0) {
+                LOG(WARNING) << "Have not get FE Master heartbeat yet";
+                std::this_thread::sleep_for(std::chrono::seconds(2));
+                continue;
+            }
+
+            int start = 0;
+            int tablet_size = all_tablets.size();
+            while (start < tablet_size) {
+                int batch_size = std::min(100, tablet_size - start);
+                int end = start + batch_size;
+                TGetTabletReplicaInfosRequest request;
+                TGetTabletReplicaInfosResult result;
+                for (int i = start; i < end; i++) {
+                    
request.tablet_ids.emplace_back(all_tablets[i]->tablet_id());
+                }
+                Status rpc_st = ThriftRpcHelper::rpc<FrontendServiceClient>(
+                        master_addr.hostname, master_addr.port,
+                        [&request, &result](FrontendServiceConnection& client) 
{
+                            client->getTabletReplicaInfos(result, request);
+                        });
+
+                if (!rpc_st.ok()) {
+                    LOG(WARNING) << "Failed to get tablet replica infos, 
encounter rpc failure, "
+                                    "tablet start: "
+                                 << start << " end: " << end;
+                    start = end;
+                    continue;
+                }
+
+                std::unique_lock<std::mutex> lock(_peer_replica_infos_mutex);
+                for (const auto& it : result.tablet_replica_infos) {
+                    auto tablet_id = it.first;
+                    auto tablet = _tablet_manager->get_tablet(tablet_id);
+                    if (tablet == nullptr) {
+                        VLOG_CRITICAL << "tablet ptr is nullptr";
+                        continue;
+                    }
+
+                    VLOG_NOTICE << tablet_id << " tablet has " << 
it.second.size() << " replicas";
+                    uint64_t min_modulo = MOD_PRIME;
+                    TReplicaInfo peer_replica;
+                    for (const auto& replica : it.second) {
+                        int64_t peer_replica_id = replica.replica_id;
+                        uint64_t modulo = HashUtil::hash64(&peer_replica_id,
+                                                           
sizeof(peer_replica_id), DEFAULT_SEED) %
+                                          MOD_PRIME;
+                        if (modulo < min_modulo) {
+                            peer_replica = replica;
+                            min_modulo = modulo;
+                        }
+                    }
+                    VLOG_NOTICE << "tablet " << tablet_id << ", peer replica 
host is "
+                                << peer_replica.host;
+                    _peer_replica_infos[tablet_id] = peer_replica;
+                }
+                _token = result.token;
+                VLOG_NOTICE << "get tablet replica infos from fe, size is " << 
end - start
+                            << " token = " << result.token;
+                start = end;
+            }
+            interval = config::update_peer_replica_infos_interval_seconds;

Review Comment:
   这行多余的



##########
be/src/olap/single_replica_compaction.cpp:
##########
@@ -0,0 +1,613 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/single_replica_compaction.h"
+
+#include "common/logging.h"
+#include "gen_cpp/Types_constants.h"
+#include "gen_cpp/internal_service.pb.h"
+#include "gutil/strings/split.h"
+#include "gutil/strings/stringpiece.h"
+#include "http/http_client.h"
+#include "io/fs/file_system.h"
+#include "io/fs/local_file_system.h"
+#include "io/fs/path.h"
+#include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/rowset_meta.h"
+#include "olap/snapshot_manager.h"
+#include "olap/storage_engine.h"
+#include "olap/tablet_meta.h"
+#include "runtime/client_cache.h"
+#include "runtime/memory/mem_tracker_limiter.h"
+#include "service/brpc.h"
+#include "task/engine_clone_task.h"
+#include "util/brpc_client_cache.h"
+#include "util/doris_metrics.h"
+#include "util/thrift_rpc_helper.h"
+#include "util/trace.h"
+
+namespace doris {
+using namespace ErrorCode;
+
+SingleReplicaCompaction::SingleReplicaCompaction(const TabletSharedPtr& tablet,
+                                                 const CompactionType& 
compaction_type)
+        : Compaction(tablet, "SingleReplicaCompaction:" + 
std::to_string(tablet->tablet_id())),
+          _compaction_type(compaction_type) {}
+
+SingleReplicaCompaction::~SingleReplicaCompaction() {}
+
+Status SingleReplicaCompaction::prepare_compact() {
+    VLOG_CRITICAL << _tablet->tablet_id() << " prepare single replcia 
compaction and pick rowsets!";
+    if (!_tablet->init_succeeded()) {
+        return Status::Error<CUMULATIVE_INVALID_PARAMETERS>();
+    }
+
+    std::unique_lock<std::mutex> 
lock_cumu(_tablet->get_cumulative_compaction_lock(),
+                                           std::try_to_lock);
+    if (!lock_cumu.owns_lock()) {
+        LOG(INFO) << "The tablet is under cumulative compaction. tablet=" << 
_tablet->full_name();
+        return Status::Error<TRY_LOCK_FAILED>();
+    }
+    TRACE("got cumulative compaction lock");
+    std::unique_lock<std::mutex> 
lock_base(_tablet->get_base_compaction_lock(), std::try_to_lock);
+    if (!lock_base.owns_lock()) {
+        LOG(WARNING) << "another base compaction is running. tablet=" << 
_tablet->full_name();
+        return Status::Error<TRY_LOCK_FAILED>();
+    }
+    TRACE("got base compaction lock");
+
+    // 1. pick rowsets to compact
+    RETURN_IF_ERROR(pick_rowsets_to_compact());
+    TRACE("rowsets picked");
+    _tablet->set_clone_occurred(false);
+    if (_input_rowsets.size() == 1) {
+        return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>();
+    }
+
+    return Status::OK();
+}
+
+Status SingleReplicaCompaction::pick_rowsets_to_compact() {
+    auto candidate_rowsets = 
_tablet->pick_candidate_rowsets_to_single_replica_compaction();
+    if (candidate_rowsets.empty()) {
+        return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>();
+    }
+    _input_rowsets.clear();
+    for (const auto& rowset : candidate_rowsets) {
+        _input_rowsets.emplace_back(rowset);
+    }
+
+    return Status::OK();
+}
+
+Status SingleReplicaCompaction::execute_compact_impl() {
+    std::unique_lock<std::mutex> 
lock_cumu(_tablet->get_cumulative_compaction_lock(),
+                                           std::try_to_lock);
+    if (!lock_cumu.owns_lock()) {
+        LOG(INFO) << "The tablet is under cumulative compaction. tablet=" << 
_tablet->full_name();
+        return Status::Error<TRY_LOCK_FAILED>();
+    }
+    TRACE("got cumulative compaction lock");
+
+    std::unique_lock<std::mutex> 
lock_base(_tablet->get_base_compaction_lock(), std::try_to_lock);
+    if (!lock_base.owns_lock()) {
+        LOG(WARNING) << "another base compaction is running. tablet=" << 
_tablet->full_name();
+        return Status::Error<TRY_LOCK_FAILED>();
+    }
+    TRACE("got base compaction lock");
+
+    // Clone task may happen after compaction task is submitted to thread 
pool, and rowsets picked
+    // for compaction may change. In this case, current compaction task should 
not be executed.
+    if (_tablet->get_clone_occurred()) {
+        _tablet->set_clone_occurred(false);
+        return Status::Error<BE_CLONE_OCCURRED>();
+    }
+
+    SCOPED_ATTACH_TASK(_mem_tracker);
+
+    // 2. do single replica compaction
+    RETURN_IF_ERROR(_do_single_replica_compaction());
+    TRACE("single replica compaction finished");
+
+    // 3. set state to success
+    _state = CompactionState::SUCCESS;
+
+    return Status::OK();
+}
+
+Status SingleReplicaCompaction::_do_single_replica_compaction() {
+    TRACE("start to do single replica compaction");
+    TReplicaInfo addr;
+    std::string token;
+    //  1. get peer replica info
+    if 
(!StorageEngine::instance()->get_peer_replica_info(_tablet->tablet_id(), addr, 
token)) {
+        LOG(WARNING) << _tablet->tablet_id() << " tablet don't have peer 
replica";
+        return Status::Aborted("tablet don't have peer replica");
+    }
+
+    // 2. get verisons from peer
+    std::vector<Version> peer_versions;
+    RETURN_IF_ERROR(_get_verisons_from_peer(addr, peer_versions));
+
+    // 3. find_version_to_fetch
+    if (!_find_version_to_fetch(peer_versions)) {
+        LOG(WARNING) << _tablet->tablet_id() << " tablet don't need to fetch, 
no matched version";
+        return Status::Aborted("no matched version to fetch");
+    }
+    _tablet->data_dir()->disks_compaction_num_increment(1);
+    Status st = _do_single_replica_compaction_impl(addr, token);
+    _tablet->data_dir()->disks_compaction_num_increment(-1);
+
+    return Status::OK();
+}
+
+Status 
SingleReplicaCompaction::_do_single_replica_compaction_impl(TReplicaInfo& addr,
+                                                                   
std::string& token) {
+    // 4. fetch compaction result
+    RETURN_IF_ERROR(_fetch_compaction_result(addr, token));
+    // 5. adjust input rowset
+    _adjust_input_rowset();
+    // 6. modify rowsets in memory
+    RETURN_IF_ERROR(modify_rowsets());
+    TRACE("modify rowsets finished");
+
+    // 7. update last success compaction time
+    if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION) {
+        _tablet->set_last_cumu_compaction_success_time(UnixMillis());
+    } else if (compaction_type() == ReaderType::READER_BASE_COMPACTION) {
+        _tablet->set_last_base_compaction_success_time(UnixMillis());
+    }
+
+    int64_t current_max_version;
+    {
+        std::shared_lock rdlock(_tablet->get_header_lock());
+        RowsetSharedPtr max_rowset = _tablet->rowset_with_max_version();
+        if (max_rowset == nullptr) {
+            current_max_version = -1;
+        } else {
+            current_max_version = 
_tablet->rowset_with_max_version()->end_version();
+        }
+    }
+
+    LOG(INFO) << "succeed to do single replica compaction"
+              << ". tablet=" << _tablet->full_name() << ", output_version=" << 
_output_version
+              << ", current_max_version=" << current_max_version
+              << ", input_rowset_size=" << _input_rowsets_size
+              << ", input_row_num=" << _input_row_num
+              << ", input_segments_num=" << _input_num_segments
+              << ", _input_index_size=" << _input_index_size
+              << ", output_rowset_data_size=" << 
_output_rowset->data_disk_size()
+              << ", output_row_num=" << _output_rowset->num_rows()
+              << ", output_segments_num=" << _output_rowset->num_segments();
+    return Status::OK();
+}
+
+Status SingleReplicaCompaction::_get_verisons_from_peer(TReplicaInfo& addr,
+                                                        std::vector<Version>& 
peer_versions) {

Review Comment:
   const &



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to