w41ter commented on code in PR #49233:
URL: https://github.com/apache/doris/pull/49233#discussion_r2020338971


##########
be/test/runtime/snapshot_loader_test.cpp:
##########
@@ -339,5 +380,115 @@ TEST_F(SnapshotLoaderTest, DirMoveTaskIsIdempotent) {
     ASSERT_EQ(version.first, last_version.first);
     ASSERT_EQ(version.second, last_version.second);
 }
+TEST_F(SnapshotLoaderTest, TestLinkSameRowsetFiles) {
+    // 1. Create a tablet
+    int64_t tablet_id = 222;
+    int32_t schema_hash = 333;
+    int64_t partition_id = 444;
+    TCreateTabletReq req = create_tablet(partition_id, tablet_id, schema_hash);
+    RuntimeProfile profile("CreateTablet");
+    Status status = engine_ref->create_tablet(req, &profile);
+    EXPECT_TRUE(status.ok());
+    TabletSharedPtr tablet = 
engine_ref->tablet_manager()->get_tablet(tablet_id);
+    EXPECT_TRUE(tablet != nullptr);
+
+    // 2. Add a rowset to the tablet
+    add_rowset(tablet_id, schema_hash, partition_id, 100, 100);
+    auto version = tablet->max_version();
+    std::cout << "Original version: " << version.first << ", " << 
version.second << std::endl;
 
+    // 3. Make a snapshot of the tablet
+    string snapshot_path;
+    bool allow_incremental_clone = false;
+    TSnapshotRequest snapshot_request;
+    snapshot_request.tablet_id = tablet_id;
+    snapshot_request.schema_hash = schema_hash;
+    snapshot_request.version = version.second;
+    status = engine_ref->snapshot_mgr()->make_snapshot(snapshot_request, 
&snapshot_path,
+                                                       
&allow_incremental_clone);
+    ASSERT_TRUE(status.ok());
+    std::cout << "snapshot_path: " << snapshot_path << std::endl;
+    snapshot_path = fmt::format("{}/{}/{}", snapshot_path, tablet_id, 
schema_hash);
+
+    // 4. Create a destination path for "remote" snapshot
+    std::string remote_snapshot_dir = storage_root_path + "/remote_snapshot";
+    
ASSERT_TRUE(io::global_local_filesystem()->create_directory(remote_snapshot_dir).ok());
+    std::string remote_tablet_path =
+            fmt::format("{}/{}/{}", remote_snapshot_dir, tablet_id, 
schema_hash);
+    
ASSERT_TRUE(io::global_local_filesystem()->create_directory(remote_tablet_path).ok());
+
+    // 5. Copy snapshot files to remote path and calls convert_rowset_ids
+    std::vector<io::FileInfo> snapshot_files;
+    bool is_exists = false;
+    ASSERT_TRUE(io::global_local_filesystem()
+                        ->list(snapshot_path, true, &snapshot_files, 
&is_exists)
+                        .ok());
+    for (const auto& file : snapshot_files) {
+        std::string src_file = snapshot_path + "/" + file.file_name;
+        std::string dst_file = remote_tablet_path + "/" + file.file_name;
+        ASSERT_TRUE(io::global_local_filesystem()->copy_path(src_file, 
dst_file).ok());
+    }
+
+    int64_t dest_tablet_id = 333;
+    int32_t dest_schema_hash = 444;
+    std::string dest_path = fmt::format("{}/dest_snapshot/{}/{}", 
storage_root_path, dest_tablet_id,
+                                        dest_schema_hash);
+    
ASSERT_TRUE(io::global_local_filesystem()->create_directory(dest_path).ok());
+
+    std::string src_hdr = remote_tablet_path + "/" + std::to_string(tablet_id) 
+ ".hdr";
+    std::string dst_hdr = remote_tablet_path + "/" + 
std::to_string(dest_tablet_id) + ".hdr";
+    ASSERT_TRUE(io::global_local_filesystem()->rename(src_hdr, dst_hdr).ok());
+    auto guards = engine_ref->snapshot_mgr()->convert_rowset_ids(
+            remote_tablet_path, dest_tablet_id, 0, 0, partition_id, 
dest_schema_hash);
+
+    // 7. Setup a remote tablet snapshot for download
+    TRemoteTabletSnapshot remote_snapshot;
+    remote_snapshot.remote_tablet_id = dest_tablet_id;
+    remote_snapshot.local_tablet_id = tablet_id;
+    remote_snapshot.local_snapshot_path = snapshot_path;
+    remote_snapshot.remote_snapshot_path = remote_tablet_path;
+    remote_snapshot.remote_be_addr.hostname = "127.0.0.1";
+    remote_snapshot.remote_be_addr.port = 1234;
+    remote_snapshot.remote_token = "fake_token";
+
+    // 8. Download the snapshot
+    std::vector<TRemoteTabletSnapshot> remote_snapshots = {remote_snapshot};
+    std::vector<int64_t> downloaded_tablet_ids;
+    SnapshotLoader loader(*engine_ref, ExecEnv::GetInstance(), 3L, tablet_id);
+    status = loader.remote_http_download(remote_snapshots, 
&downloaded_tablet_ids);
+    ASSERT_TRUE(status.ok());
+
+    // 9. Verify file linking - check if files were properly linked rather 
than downloaded
+
+    // Find rowset data files in the download directory
+    std::vector<io::FileInfo> downloaded_files;
+    bool exists = false;
+    ASSERT_TRUE(io::global_local_filesystem()
+                        ->list(snapshot_path, true, &downloaded_files, &exists)
+                        .ok());
+    ASSERT_TRUE(exists);
+
+    // Look for linked files (files containing the new rowset ID that should 
be linked)
+    bool found_linked_file = false;
+    for (const auto& file : downloaded_files) {
+        if (file.file_name.find(".dat") != std::string::npos) {
+            found_linked_file = true;
+
+            // This is our linked file - verify it exists and has proper size
+            struct stat file_stat;
+            std::string file_path = snapshot_path + "/" + file.file_name;
+            int ret = stat(file_path.c_str(), &file_stat);
+            ASSERT_EQ(ret, 0) << "Failed to stat file: " << file_path;
+
+            // Verify file size matches expected data file size
+            ASSERT_GT(file_stat.st_size, 0) << "Linked file is empty: " << 
file_path;
+
+            std::cout << "Found linked file: " << file_path << ", size: " << 
file_stat.st_size
+                      << " bytes" << std::endl;
+            break;
+        }
+    }

Review Comment:
   The goal here is to verify whether the file download step is skipped if the 
remote file has a resource_rowset_id that points to the local rowsets.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to