This is an automated email from the ASF dual-hosted git repository.

liaoxin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9d34c0416dd [opt](packed-file) Avoid unnecessary HEAD Object requests 
when opening an S3 object (#59414)
9d34c0416dd is described below

commit 9d34c0416ddf25891ceaffe4334aa713e0ba8ccc
Author: Xin Liao <[email protected]>
AuthorDate: Sun Jan 4 19:05:03 2026 +0800

    [opt](packed-file) Avoid unnecessary HEAD Object requests when opening an 
S3 object (#59414)
---
 be/src/cloud/cloud_rowset_writer.cpp |  2 +-
 be/src/io/fs/packed_file_manager.cpp |  9 +++++++++
 be/src/io/fs/packed_file_manager.h   |  1 +
 be/src/io/fs/packed_file_system.cpp  | 10 ++++------
 be/src/olap/rowset/rowset_meta.cpp   |  2 ++
 be/src/olap/rowset/rowset_meta.h     |  3 ++-
 gensrc/proto/olap_file.proto         |  1 +
 7 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/be/src/cloud/cloud_rowset_writer.cpp 
b/be/src/cloud/cloud_rowset_writer.cpp
index 7f3af0347b1..66067bb9428 100644
--- a/be/src/cloud/cloud_rowset_writer.cpp
+++ b/be/src/cloud/cloud_rowset_writer.cpp
@@ -195,7 +195,7 @@ Status 
CloudRowsetWriter::_collect_packed_slice_location(io::FileWriter* file_wr
     }
 
     rowset_meta->add_packed_slice_location(file_path, index.packed_file_path, 
index.offset,
-                                           index.size);
+                                           index.size, index.packed_file_size);
     LOG(INFO) << "collect packed file index: " << file_path << " -> " << 
index.packed_file_path
               << ", offset: " << index.offset << ", size: " << index.size;
     return Status::OK();
diff --git a/be/src/io/fs/packed_file_manager.cpp 
b/be/src/io/fs/packed_file_manager.cpp
index 377f4cce67e..a41ba59b142 100644
--- a/be/src/io/fs/packed_file_manager.cpp
+++ b/be/src/io/fs/packed_file_manager.cpp
@@ -609,6 +609,15 @@ void PackedFileManager::process_uploading_packed_files() {
             first_slice = false;
             slices_stream << small_file_path << "(txn=" << index.txn_id
                           << ", offset=" << index.offset << ", size=" << 
index.size << ")";
+
+            // Update packed_file_size in global index
+            {
+                std::lock_guard<std::mutex> global_lock(_global_index_mutex);
+                auto it = _global_slice_locations.find(small_file_path);
+                if (it != _global_slice_locations.end()) {
+                    it->second.packed_file_size = packed_file->total_size;
+                }
+            }
         }
         LOG(INFO) << "Packed file " << packed_file->packed_file_path
                   << " uploaded; slices=" << 
packed_file->slice_locations.size()
diff --git a/be/src/io/fs/packed_file_manager.h 
b/be/src/io/fs/packed_file_manager.h
index 5957ce15e8a..beef51d0603 100644
--- a/be/src/io/fs/packed_file_manager.h
+++ b/be/src/io/fs/packed_file_manager.h
@@ -49,6 +49,7 @@ struct PackedSliceLocation {
     std::string rowset_id;
     std::string resource_id;
     int64_t txn_id = 0;
+    int64_t packed_file_size = -1; // Total size of the packed file, -1 means 
not set
 };
 
 struct PackedAppendContext {
diff --git a/be/src/io/fs/packed_file_system.cpp 
b/be/src/io/fs/packed_file_system.cpp
index be392ce0f37..dd5b136ba3b 100644
--- a/be/src/io/fs/packed_file_system.cpp
+++ b/be/src/io/fs/packed_file_system.cpp
@@ -71,13 +71,11 @@ Status PackedFileSystem::open_file_impl(const Path& file, 
FileReaderSPtr* reader
         FileReaderSPtr inner_reader;
         // Create a new FileReaderOptions with the correct file size
         FileReaderOptions local_opts = opts ? *opts : FileReaderOptions();
-        // DCHECK(opts->file_size == -1 || opts->file_size == index.size)
-        //         << "file size is not correct, expected: " << index.size
-        //         << ", actual: " << opts->file_size;
-        // local_opts.file_size = index.size + index.offset;
-        local_opts.file_size = -1;
+        // Set file_size to packed file size to avoid head object request
+        local_opts.file_size = index.packed_file_size;
         LOG(INFO) << "open packed file: " << index.packed_file_path << ", 
file: " << file.native()
-                  << ", offset: " << index.offset << ", size: " << index.size;
+                  << ", offset: " << index.offset << ", size: " << index.size
+                  << ", packed_file_size: " << index.packed_file_size;
         RETURN_IF_ERROR(
                 _inner_fs->open_file(Path(index.packed_file_path), 
&inner_reader, &local_opts));
 
diff --git a/be/src/olap/rowset/rowset_meta.cpp 
b/be/src/olap/rowset/rowset_meta.cpp
index e17b9c31174..63408a8452e 100644
--- a/be/src/olap/rowset/rowset_meta.cpp
+++ b/be/src/olap/rowset/rowset_meta.cpp
@@ -143,6 +143,8 @@ io::FileSystemSPtr RowsetMeta::fs() {
             index.packed_file_path = index_pb.packed_file_path();
             index.offset = index_pb.offset();
             index.size = index_pb.size();
+            index.packed_file_size =
+                    index_pb.has_packed_file_size() ? 
index_pb.packed_file_size() : -1;
             index.tablet_id = tablet_id();
             index.rowset_id = _rowset_id.to_string();
             index.resource_id = wrapped->id();
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 5a567f04c81..48f55075ef1 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -426,12 +426,13 @@ public:
 
     void add_packed_slice_location(const std::string& segment_path,
                                    const std::string& packed_file_path, 
int64_t offset,
-                                   int64_t size) {
+                                   int64_t size, int64_t packed_file_size) {
         auto* index_map = _rowset_meta_pb.mutable_packed_slice_locations();
         auto& index_pb = (*index_map)[segment_path];
         index_pb.set_packed_file_path(packed_file_path);
         index_pb.set_offset(offset);
         index_pb.set_size(size);
+        index_pb.set_packed_file_size(packed_file_size);
     }
 
 private:
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 35a8f3b54a4..cac152a6a55 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -69,6 +69,7 @@ message PackedSliceLocationPB {
     optional string packed_file_path = 1;
     optional int64 offset = 2;
     optional int64 size = 3;
+    optional int64 packed_file_size = 4;  // Total size of the packed file
 }
 
 // ATTN: When adding or deleting fields, please update `message 
RowsetMetaCloudPB`


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to