qidaye commented on code in PR #33415: URL: https://github.com/apache/doris/pull/33415#discussion_r1560997266
########## be/src/service/backend_service.cpp: ########## @@ -307,41 +307,81 @@ void _ingest_binlog(StorageEngine& engine, IngestBinlogArg* arg) { std::vector<uint64_t> segment_index_file_sizes; std::vector<std::string> segment_index_file_names; auto tablet_schema = rowset_meta->tablet_schema(); - for (const auto& index : tablet_schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; + if (tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { + for (const auto& index : tablet_schema->indexes()) { + if (index.index_type() != IndexType::INVERTED) { + continue; + } + auto index_id = index.index_id(); + for (int64_t segment_index = 0; segment_index < num_segments; ++segment_index) { + auto get_segment_index_file_size_url = fmt::format( + "{}?method={}&tablet_id={}&rowset_id={}&segment_index={}&segment_index_id={" + "}", + binlog_api_url, "get_segment_index_file", request.remote_tablet_id, + remote_rowset_id, segment_index, index_id); + uint64_t segment_index_file_size; + auto get_segment_index_file_size_cb = + [&get_segment_index_file_size_url, + &segment_index_file_size](HttpClient* client) { + RETURN_IF_ERROR(client->init(get_segment_index_file_size_url)); + client->set_timeout_ms(kMaxTimeoutMs); + RETURN_IF_ERROR(client->head()); + return client->get_content_length(&segment_index_file_size); + }; + auto index_file = InvertedIndexDescriptor::inverted_index_file_path( + local_tablet->tablet_path(), rowset_meta->rowset_id(), segment_index, + index_id, index.get_index_suffix()); + segment_index_file_names.push_back(index_file); + + status = HttpClient::execute_with_retry(max_retry, 1, + get_segment_index_file_size_cb); + if (!status.ok()) { + LOG(WARNING) << "failed to get segment file size from " + << get_segment_index_file_size_url + << ", status=" << status.to_string(); + status.to_thrift(&tstatus); + return; + } + + segment_index_file_sizes.push_back(segment_index_file_size); + segment_index_file_urls.push_back(std::move(get_segment_index_file_size_url)); + } } - auto index_id = index.index_id(); + } else { for (int64_t segment_index = 0; segment_index < num_segments; ++segment_index) { - auto get_segment_index_file_size_url = fmt::format( - "{}?method={}&tablet_id={}&rowset_id={}&segment_index={}&segment_index_id={" - "}", - binlog_api_url, "get_segment_index_file", request.remote_tablet_id, - remote_rowset_id, segment_index, index_id); - uint64_t segment_index_file_size; - auto get_segment_index_file_size_cb = [&get_segment_index_file_size_url, - &segment_index_file_size](HttpClient* client) { - RETURN_IF_ERROR(client->init(get_segment_index_file_size_url)); - client->set_timeout_ms(kMaxTimeoutMs); - RETURN_IF_ERROR(client->head()); - return client->get_content_length(&segment_index_file_size); - }; - auto index_file = InvertedIndexDescriptor::inverted_index_file_path( - local_tablet->tablet_path(), rowset_meta->rowset_id(), segment_index, index_id, - index.get_index_suffix()); - segment_index_file_names.push_back(index_file); - - status = HttpClient::execute_with_retry(max_retry, 1, get_segment_index_file_size_cb); - if (!status.ok()) { - LOG(WARNING) << "failed to get segment file size from " - << get_segment_index_file_size_url - << ", status=" << status.to_string(); - status.to_thrift(&tstatus); - return; - } + if (tablet_schema->has_inverted_index()) { + auto get_segment_index_file_size_url = fmt::format( + "{}?method={}&tablet_id={}&rowset_id={}&segment_index={}&segment_index_id={" + "}", + binlog_api_url, "get_segment_index_file", request.remote_tablet_id, + remote_rowset_id, segment_index, -1); + uint64_t segment_index_file_size; + auto get_segment_index_file_size_cb = + [&get_segment_index_file_size_url, + &segment_index_file_size](HttpClient* client) { + RETURN_IF_ERROR(client->init(get_segment_index_file_size_url)); + client->set_timeout_ms(kMaxTimeoutMs); + RETURN_IF_ERROR(client->head()); + return client->get_content_length(&segment_index_file_size); + }; + auto local_segment_path = BetaRowset::segment_file_path( + local_tablet->tablet_path(), rowset_meta->rowset_id(), segment_index); + auto index_file = InvertedIndexDescriptor::get_index_file_name(local_segment_path); + segment_index_file_names.push_back(index_file); + + status = HttpClient::execute_with_retry(max_retry, 1, + get_segment_index_file_size_cb); + if (!status.ok()) { + LOG(WARNING) << "failed to get segment file size from " + << get_segment_index_file_size_url + << ", status=" << status.to_string(); + status.to_thrift(&tstatus); + return; + } - segment_index_file_sizes.push_back(segment_index_file_size); - segment_index_file_urls.push_back(std::move(get_segment_index_file_size_url)); + segment_index_file_sizes.push_back(segment_index_file_size); + segment_index_file_urls.push_back(std::move(get_segment_index_file_size_url)); Review Comment: V1 is separated index files by column, V2 is one compound file for all columns. So the logic can not merge together. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org