This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 2b7bc87022e branch-4.0: [fix](cloud) Fix packed file write path
bypassing encryption #60629 (#60869)
2b7bc87022e is described below
commit 2b7bc87022ed97668c50f888356fa8a739b4c517
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Mar 2 10:04:51 2026 +0800
branch-4.0: [fix](cloud) Fix packed file write path bypassing encryption
#60629 (#60869)
Cherry-picked from #60629
Co-authored-by: Xin Liao <[email protected]>
---
be/src/cloud/cloud_rowset_writer.cpp | 24 ++++++++++++++++++------
be/src/io/fs/file_writer.h | 4 ++++
be/src/io/fs/packed_file_writer.h | 3 +++
be/src/olap/rowset/rowset_meta.cpp | 8 +++++---
be/src/olap/rowset/rowset_writer_context.h | 12 ++++++------
5 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/be/src/cloud/cloud_rowset_writer.cpp
b/be/src/cloud/cloud_rowset_writer.cpp
index 0c1b79392e9..c5b58049ae4 100644
--- a/be/src/cloud/cloud_rowset_writer.cpp
+++ b/be/src/cloud/cloud_rowset_writer.cpp
@@ -17,8 +17,10 @@
#include "cloud/cloud_rowset_writer.h"
+#include "common/logging.h"
#include "common/status.h"
#include "io/cache/block_file_cache_factory.h"
+#include "io/fs/packed_file_manager.h"
#include "io/fs/packed_file_writer.h"
#include "olap/rowset/rowset_factory.h"
@@ -80,6 +82,9 @@ Status CloudRowsetWriter::init(const RowsetWriterContext&
rowset_writer_context)
}
Status CloudRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool
check_segment_num) {
+ VLOG_NOTICE << "start to build rowset meta. tablet_id=" <<
rowset_meta->tablet_id()
+ << ", rowset_id=" << rowset_meta->rowset_id()
+ << ", check_segment_num=" << check_segment_num;
// Call base class implementation
RETURN_IF_ERROR(BaseBetaRowsetWriter::_build_rowset_meta(rowset_meta,
check_segment_num));
@@ -157,6 +162,8 @@ Status CloudRowsetWriter::build(RowsetSharedPtr& rowset) {
}
Status CloudRowsetWriter::_collect_all_packed_slice_locations(RowsetMeta*
rowset_meta) {
+ VLOG_NOTICE << "start to collect packed slice locations for rowset meta.
tablet_id="
+ << rowset_meta->tablet_id() << ", rowset_id=" <<
rowset_meta->rowset_id();
if (!_context.packed_file_active) {
return Status::OK();
}
@@ -189,17 +196,22 @@ Status
CloudRowsetWriter::_collect_all_packed_slice_locations(RowsetMeta* rowset
Status CloudRowsetWriter::_collect_packed_slice_location(io::FileWriter*
file_writer,
const std::string&
file_path,
RowsetMeta*
rowset_meta) {
- // At this point, we only call this when
RowsetWriterContext::merge_file_active is true,
- // and all writers should be MergeFileWriter. So we can safely cast
without extra checks.
- auto* packed_writer = static_cast<io::PackedFileWriter*>(file_writer);
-
- if (packed_writer->state() != io::FileWriter::State::CLOSED) {
+ VLOG_NOTICE << "collect packed slice location for file: " << file_path;
+ // Check if file writer is closed
+ if (file_writer->state() != io::FileWriter::State::CLOSED) {
// Writer is still open; index will be collected after it is closed.
return Status::OK();
}
+ // Check if file is actually in packed file (not direct write for large
files)
+ if (!file_writer->is_in_packed_file()) {
+ return Status::OK();
+ }
+
+ // Get packed slice location directly from PackedFileManager
io::PackedSliceLocation index;
- RETURN_IF_ERROR(packed_writer->get_packed_slice_location(&index));
+ RETURN_IF_ERROR(
+
io::PackedFileManager::instance()->get_packed_slice_location(file_path,
&index));
if (index.packed_file_path.empty()) {
return Status::OK(); // File not in packed file, skip
}
diff --git a/be/src/io/fs/file_writer.h b/be/src/io/fs/file_writer.h
index de298a7a0bf..0cda2b519c4 100644
--- a/be/src/io/fs/file_writer.h
+++ b/be/src/io/fs/file_writer.h
@@ -82,6 +82,10 @@ public:
virtual State state() const = 0;
+ // Returns true if this file's data was written to a packed file.
+ // Used to determine whether to collect packed slice location from
PackedFileManager.
+ virtual bool is_in_packed_file() const { return false; }
+
FileCacheAllocatorBuilder* cache_builder() const {
return _cache_builder == nullptr ? nullptr : _cache_builder.get();
}
diff --git a/be/src/io/fs/packed_file_writer.h
b/be/src/io/fs/packed_file_writer.h
index 9499b0912a4..eaae0a6ed74 100644
--- a/be/src/io/fs/packed_file_writer.h
+++ b/be/src/io/fs/packed_file_writer.h
@@ -57,6 +57,9 @@ public:
// Returns empty index if file is not in merge file
Status get_packed_slice_location(PackedSliceLocation* location) const;
+ // Returns true if this file's data was written to a packed file (not
direct write)
+ bool is_in_packed_file() const override { return !_is_direct_write; }
+
private:
// Async close: submit data without waiting
Status _close_async();
diff --git a/be/src/olap/rowset/rowset_meta.cpp
b/be/src/olap/rowset/rowset_meta.cpp
index 632304ae5f3..fd3647ca8e7 100644
--- a/be/src/olap/rowset/rowset_meta.cpp
+++ b/be/src/olap/rowset/rowset_meta.cpp
@@ -134,9 +134,8 @@ io::FileSystemSPtr RowsetMeta::fs() {
return nullptr;
}
- auto wrapped = io::make_file_system(fs, algorithm.value());
-
- // Apply packed file system if enabled and index_map is not empty
+ // Apply packed file system first if enabled and index_map is not empty
+ io::FileSystemSPtr wrapped = fs;
if (_rowset_meta_pb.packed_slice_locations_size() > 0) {
std::unordered_map<std::string, io::PackedSliceLocation> index_map;
for (const auto& [path, index_pb] :
_rowset_meta_pb.packed_slice_locations()) {
@@ -159,6 +158,9 @@ io::FileSystemSPtr RowsetMeta::fs() {
wrapped = std::make_shared<io::PackedFileSystem>(wrapped,
index_map, append_info);
}
}
+
+ // Then apply encryption on top
+ wrapped = io::make_file_system(wrapped, algorithm.value());
return wrapped;
#else
return fs;
diff --git a/be/src/olap/rowset/rowset_writer_context.h
b/be/src/olap/rowset/rowset_writer_context.h
index 7fba8760602..c70ddd6ee7f 100644
--- a/be/src/olap/rowset/rowset_writer_context.h
+++ b/be/src/olap/rowset/rowset_writer_context.h
@@ -195,12 +195,7 @@ struct RowsetWriterContext {
#endif
}
- // Apply encryption if needed
- if (algorithm.has_value()) {
- fs = io::make_file_system(fs, algorithm.value());
- }
-
- // Apply packed file system for write path if enabled
+ // Apply packed file system first for write path if enabled
// Create empty index_map for write path
// Index information will be populated after write completes
bool has_v1_inverted_index = tablet_schema != nullptr &&
@@ -230,6 +225,11 @@ struct RowsetWriterContext {
fs = std::make_shared<io::PackedFileSystem>(fs, append_info);
}
+ // Then apply encryption on top
+ if (algorithm.has_value()) {
+ fs = io::make_file_system(fs, algorithm.value());
+ }
+
// Cache the result to ensure consistency across multiple calls
_cached_fs = fs;
return fs;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]