This is an automated email from the ASF dual-hosted git repository.
liaoxin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9d34c0416dd [opt](packed-file) Avoid unnecessary HEAD Object requests
when opening an S3 object (#59414)
9d34c0416dd is described below
commit 9d34c0416ddf25891ceaffe4334aa713e0ba8ccc
Author: Xin Liao <[email protected]>
AuthorDate: Sun Jan 4 19:05:03 2026 +0800
[opt](packed-file) Avoid unnecessary HEAD Object requests when opening an
S3 object (#59414)
---
be/src/cloud/cloud_rowset_writer.cpp | 2 +-
be/src/io/fs/packed_file_manager.cpp | 9 +++++++++
be/src/io/fs/packed_file_manager.h | 1 +
be/src/io/fs/packed_file_system.cpp | 10 ++++------
be/src/olap/rowset/rowset_meta.cpp | 2 ++
be/src/olap/rowset/rowset_meta.h | 3 ++-
gensrc/proto/olap_file.proto | 1 +
7 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/be/src/cloud/cloud_rowset_writer.cpp
b/be/src/cloud/cloud_rowset_writer.cpp
index 7f3af0347b1..66067bb9428 100644
--- a/be/src/cloud/cloud_rowset_writer.cpp
+++ b/be/src/cloud/cloud_rowset_writer.cpp
@@ -195,7 +195,7 @@ Status
CloudRowsetWriter::_collect_packed_slice_location(io::FileWriter* file_wr
}
rowset_meta->add_packed_slice_location(file_path, index.packed_file_path,
index.offset,
- index.size);
+ index.size, index.packed_file_size);
LOG(INFO) << "collect packed file index: " << file_path << " -> " <<
index.packed_file_path
<< ", offset: " << index.offset << ", size: " << index.size;
return Status::OK();
diff --git a/be/src/io/fs/packed_file_manager.cpp
b/be/src/io/fs/packed_file_manager.cpp
index 377f4cce67e..a41ba59b142 100644
--- a/be/src/io/fs/packed_file_manager.cpp
+++ b/be/src/io/fs/packed_file_manager.cpp
@@ -609,6 +609,15 @@ void PackedFileManager::process_uploading_packed_files() {
first_slice = false;
slices_stream << small_file_path << "(txn=" << index.txn_id
<< ", offset=" << index.offset << ", size=" <<
index.size << ")";
+
+ // Update packed_file_size in global index
+ {
+ std::lock_guard<std::mutex> global_lock(_global_index_mutex);
+ auto it = _global_slice_locations.find(small_file_path);
+ if (it != _global_slice_locations.end()) {
+ it->second.packed_file_size = packed_file->total_size;
+ }
+ }
}
LOG(INFO) << "Packed file " << packed_file->packed_file_path
<< " uploaded; slices=" <<
packed_file->slice_locations.size()
diff --git a/be/src/io/fs/packed_file_manager.h
b/be/src/io/fs/packed_file_manager.h
index 5957ce15e8a..beef51d0603 100644
--- a/be/src/io/fs/packed_file_manager.h
+++ b/be/src/io/fs/packed_file_manager.h
@@ -49,6 +49,7 @@ struct PackedSliceLocation {
std::string rowset_id;
std::string resource_id;
int64_t txn_id = 0;
+ int64_t packed_file_size = -1; // Total size of the packed file, -1 means
not set
};
struct PackedAppendContext {
diff --git a/be/src/io/fs/packed_file_system.cpp
b/be/src/io/fs/packed_file_system.cpp
index be392ce0f37..dd5b136ba3b 100644
--- a/be/src/io/fs/packed_file_system.cpp
+++ b/be/src/io/fs/packed_file_system.cpp
@@ -71,13 +71,11 @@ Status PackedFileSystem::open_file_impl(const Path& file,
FileReaderSPtr* reader
FileReaderSPtr inner_reader;
// Create a new FileReaderOptions with the correct file size
FileReaderOptions local_opts = opts ? *opts : FileReaderOptions();
- // DCHECK(opts->file_size == -1 || opts->file_size == index.size)
- // << "file size is not correct, expected: " << index.size
- // << ", actual: " << opts->file_size;
- // local_opts.file_size = index.size + index.offset;
- local_opts.file_size = -1;
+ // Set file_size to packed file size to avoid head object request
+ local_opts.file_size = index.packed_file_size;
LOG(INFO) << "open packed file: " << index.packed_file_path << ",
file: " << file.native()
- << ", offset: " << index.offset << ", size: " << index.size;
+ << ", offset: " << index.offset << ", size: " << index.size
+ << ", packed_file_size: " << index.packed_file_size;
RETURN_IF_ERROR(
_inner_fs->open_file(Path(index.packed_file_path),
&inner_reader, &local_opts));
diff --git a/be/src/olap/rowset/rowset_meta.cpp
b/be/src/olap/rowset/rowset_meta.cpp
index e17b9c31174..63408a8452e 100644
--- a/be/src/olap/rowset/rowset_meta.cpp
+++ b/be/src/olap/rowset/rowset_meta.cpp
@@ -143,6 +143,8 @@ io::FileSystemSPtr RowsetMeta::fs() {
index.packed_file_path = index_pb.packed_file_path();
index.offset = index_pb.offset();
index.size = index_pb.size();
+ index.packed_file_size =
+ index_pb.has_packed_file_size() ?
index_pb.packed_file_size() : -1;
index.tablet_id = tablet_id();
index.rowset_id = _rowset_id.to_string();
index.resource_id = wrapped->id();
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 5a567f04c81..48f55075ef1 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -426,12 +426,13 @@ public:
void add_packed_slice_location(const std::string& segment_path,
const std::string& packed_file_path,
int64_t offset,
- int64_t size) {
+ int64_t size, int64_t packed_file_size) {
auto* index_map = _rowset_meta_pb.mutable_packed_slice_locations();
auto& index_pb = (*index_map)[segment_path];
index_pb.set_packed_file_path(packed_file_path);
index_pb.set_offset(offset);
index_pb.set_size(size);
+ index_pb.set_packed_file_size(packed_file_size);
}
private:
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 35a8f3b54a4..cac152a6a55 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -69,6 +69,7 @@ message PackedSliceLocationPB {
optional string packed_file_path = 1;
optional int64 offset = 2;
optional int64 size = 3;
+ optional int64 packed_file_size = 4; // Total size of the packed file
}
// ATTN: When adding or deleting fields, please update `message
RowsetMetaCloudPB`
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]