zhannngchen commented on code in PR #38331: URL: https://github.com/apache/doris/pull/38331#discussion_r1691351024
########## be/src/olap/rowset/rowset_meta_manager.cpp: ########## @@ -533,4 +533,74 @@ Status RowsetMetaManager::load_json_rowset_meta(OlapMeta* meta, return status; } +Status RowsetMetaManager::save_partial_update_info( + OlapMeta* meta, int64_t tablet_id, const RowsetId& rowset_id, + const PartialUpdateInfoPB& partial_update_info_pb) { + std::string key = + fmt::format("{}{}_{}", PARTIAL_UPDATE_INFO_PREFIX, tablet_id, rowset_id.to_string()); + std::string value; + if (!partial_update_info_pb.SerializeToString(&value)) { + return Status::Error<SERIALIZE_PROTOBUF_ERROR>( + "serialize partial update info failed. key:{}", key); + } + LOG_INFO("save partial update info, key={}, value_size={}", key, value.size()); Review Comment: change to VLOG ########## be/src/olap/rowset/rowset_meta_manager.cpp: ########## @@ -533,4 +533,74 @@ Status RowsetMetaManager::load_json_rowset_meta(OlapMeta* meta, return status; } +Status RowsetMetaManager::save_partial_update_info( + OlapMeta* meta, int64_t tablet_id, const RowsetId& rowset_id, + const PartialUpdateInfoPB& partial_update_info_pb) { + std::string key = + fmt::format("{}{}_{}", PARTIAL_UPDATE_INFO_PREFIX, tablet_id, rowset_id.to_string()); Review Comment: tablet_id + txn_id might be better? the cost of key compare is lower ########## be/src/olap/storage_engine.cpp: ########## @@ -1022,6 +1027,39 @@ void StorageEngine::_clean_unused_pending_publish_info() { } } +void StorageEngine::_clean_unused_partial_update_info() { + std::vector<std::pair<int64_t, RowsetId>> remove_infos; + auto unused_partial_update_info_collector = [this, &remove_infos]( + int64_t tablet_id, RowsetId rowset_id, + std::string_view value) -> bool { + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id); + if (tablet == nullptr) { + remove_infos.emplace_back(tablet_id, rowset_id); + return true; + } + RowsetSharedPtr rowset = tablet->get_rowset(rowset_id); + if (rowset == nullptr) { Review Comment: should check txn id, rather than rowset Whether the rowset is in the tablet or not, there is no way to confirm that this partial update info is useless, only use the txn id can confirm that. And you can't find a rowset in the tablet until it's published. ########## be/src/olap/txn_manager.cpp: ########## @@ -514,6 +553,21 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id, return status; } + if (tablet_txn_info->unique_key_merge_on_write && tablet_txn_info->partial_update_info && + tablet_txn_info->partial_update_info->is_partial_update) { + status = + RowsetMetaManager::remove_partial_update_info(meta, tablet_id, rowset->rowset_id()); Review Comment: should not remove here,it better to clear PartialUpdateInfo at the same time with the clear of rowset(RowsetMetaManager::remove) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org