This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 2b7bc87022e branch-4.0: [fix](cloud) Fix packed file write path 
bypassing encryption #60629 (#60869)
2b7bc87022e is described below

commit 2b7bc87022ed97668c50f888356fa8a739b4c517
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Mar 2 10:04:51 2026 +0800

    branch-4.0: [fix](cloud) Fix packed file write path bypassing encryption 
#60629 (#60869)
    
    Cherry-picked from #60629
    
    Co-authored-by: Xin Liao <[email protected]>
---
 be/src/cloud/cloud_rowset_writer.cpp       | 24 ++++++++++++++++++------
 be/src/io/fs/file_writer.h                 |  4 ++++
 be/src/io/fs/packed_file_writer.h          |  3 +++
 be/src/olap/rowset/rowset_meta.cpp         |  8 +++++---
 be/src/olap/rowset/rowset_writer_context.h | 12 ++++++------
 5 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/be/src/cloud/cloud_rowset_writer.cpp 
b/be/src/cloud/cloud_rowset_writer.cpp
index 0c1b79392e9..c5b58049ae4 100644
--- a/be/src/cloud/cloud_rowset_writer.cpp
+++ b/be/src/cloud/cloud_rowset_writer.cpp
@@ -17,8 +17,10 @@
 
 #include "cloud/cloud_rowset_writer.h"
 
+#include "common/logging.h"
 #include "common/status.h"
 #include "io/cache/block_file_cache_factory.h"
+#include "io/fs/packed_file_manager.h"
 #include "io/fs/packed_file_writer.h"
 #include "olap/rowset/rowset_factory.h"
 
@@ -80,6 +82,9 @@ Status CloudRowsetWriter::init(const RowsetWriterContext& 
rowset_writer_context)
 }
 
 Status CloudRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool 
check_segment_num) {
+    VLOG_NOTICE << "start to build rowset meta. tablet_id=" << 
rowset_meta->tablet_id()
+                << ", rowset_id=" << rowset_meta->rowset_id()
+                << ", check_segment_num=" << check_segment_num;
     // Call base class implementation
     RETURN_IF_ERROR(BaseBetaRowsetWriter::_build_rowset_meta(rowset_meta, 
check_segment_num));
 
@@ -157,6 +162,8 @@ Status CloudRowsetWriter::build(RowsetSharedPtr& rowset) {
 }
 
 Status CloudRowsetWriter::_collect_all_packed_slice_locations(RowsetMeta* 
rowset_meta) {
+    VLOG_NOTICE << "start to collect packed slice locations for rowset meta. 
tablet_id="
+                << rowset_meta->tablet_id() << ", rowset_id=" << 
rowset_meta->rowset_id();
     if (!_context.packed_file_active) {
         return Status::OK();
     }
@@ -189,17 +196,22 @@ Status 
CloudRowsetWriter::_collect_all_packed_slice_locations(RowsetMeta* rowset
 Status CloudRowsetWriter::_collect_packed_slice_location(io::FileWriter* 
file_writer,
                                                          const std::string& 
file_path,
                                                          RowsetMeta* 
rowset_meta) {
-    // At this point, we only call this when 
RowsetWriterContext::merge_file_active is true,
-    // and all writers should be MergeFileWriter. So we can safely cast 
without extra checks.
-    auto* packed_writer = static_cast<io::PackedFileWriter*>(file_writer);
-
-    if (packed_writer->state() != io::FileWriter::State::CLOSED) {
+    VLOG_NOTICE << "collect packed slice location for file: " << file_path;
+    // Check if file writer is closed
+    if (file_writer->state() != io::FileWriter::State::CLOSED) {
         // Writer is still open; index will be collected after it is closed.
         return Status::OK();
     }
 
+    // Check if file is actually in packed file (not direct write for large 
files)
+    if (!file_writer->is_in_packed_file()) {
+        return Status::OK();
+    }
+
+    // Get packed slice location directly from PackedFileManager
     io::PackedSliceLocation index;
-    RETURN_IF_ERROR(packed_writer->get_packed_slice_location(&index));
+    RETURN_IF_ERROR(
+            
io::PackedFileManager::instance()->get_packed_slice_location(file_path, 
&index));
     if (index.packed_file_path.empty()) {
         return Status::OK(); // File not in packed file, skip
     }
diff --git a/be/src/io/fs/file_writer.h b/be/src/io/fs/file_writer.h
index de298a7a0bf..0cda2b519c4 100644
--- a/be/src/io/fs/file_writer.h
+++ b/be/src/io/fs/file_writer.h
@@ -82,6 +82,10 @@ public:
 
     virtual State state() const = 0;
 
+    // Returns true if this file's data was written to a packed file.
+    // Used to determine whether to collect packed slice location from 
PackedFileManager.
+    virtual bool is_in_packed_file() const { return false; }
+
     FileCacheAllocatorBuilder* cache_builder() const {
         return _cache_builder == nullptr ? nullptr : _cache_builder.get();
     }
diff --git a/be/src/io/fs/packed_file_writer.h 
b/be/src/io/fs/packed_file_writer.h
index 9499b0912a4..eaae0a6ed74 100644
--- a/be/src/io/fs/packed_file_writer.h
+++ b/be/src/io/fs/packed_file_writer.h
@@ -57,6 +57,9 @@ public:
     // Returns empty index if file is not in merge file
     Status get_packed_slice_location(PackedSliceLocation* location) const;
 
+    // Returns true if this file's data was written to a packed file (not 
direct write)
+    bool is_in_packed_file() const override { return !_is_direct_write; }
+
 private:
     // Async close: submit data without waiting
     Status _close_async();
diff --git a/be/src/olap/rowset/rowset_meta.cpp 
b/be/src/olap/rowset/rowset_meta.cpp
index 632304ae5f3..fd3647ca8e7 100644
--- a/be/src/olap/rowset/rowset_meta.cpp
+++ b/be/src/olap/rowset/rowset_meta.cpp
@@ -134,9 +134,8 @@ io::FileSystemSPtr RowsetMeta::fs() {
         return nullptr;
     }
 
-    auto wrapped = io::make_file_system(fs, algorithm.value());
-
-    // Apply packed file system if enabled and index_map is not empty
+    // Apply packed file system first if enabled and index_map is not empty
+    io::FileSystemSPtr wrapped = fs;
     if (_rowset_meta_pb.packed_slice_locations_size() > 0) {
         std::unordered_map<std::string, io::PackedSliceLocation> index_map;
         for (const auto& [path, index_pb] : 
_rowset_meta_pb.packed_slice_locations()) {
@@ -159,6 +158,9 @@ io::FileSystemSPtr RowsetMeta::fs() {
             wrapped = std::make_shared<io::PackedFileSystem>(wrapped, 
index_map, append_info);
         }
     }
+
+    // Then apply encryption on top
+    wrapped = io::make_file_system(wrapped, algorithm.value());
     return wrapped;
 #else
     return fs;
diff --git a/be/src/olap/rowset/rowset_writer_context.h 
b/be/src/olap/rowset/rowset_writer_context.h
index 7fba8760602..c70ddd6ee7f 100644
--- a/be/src/olap/rowset/rowset_writer_context.h
+++ b/be/src/olap/rowset/rowset_writer_context.h
@@ -195,12 +195,7 @@ struct RowsetWriterContext {
 #endif
         }
 
-        // Apply encryption if needed
-        if (algorithm.has_value()) {
-            fs = io::make_file_system(fs, algorithm.value());
-        }
-
-        // Apply packed file system for write path if enabled
+        // Apply packed file system first for write path if enabled
         // Create empty index_map for write path
         // Index information will be populated after write completes
         bool has_v1_inverted_index = tablet_schema != nullptr &&
@@ -230,6 +225,11 @@ struct RowsetWriterContext {
             fs = std::make_shared<io::PackedFileSystem>(fs, append_info);
         }
 
+        // Then apply encryption on top
+        if (algorithm.has_value()) {
+            fs = io::make_file_system(fs, algorithm.value());
+        }
+
         // Cache the result to ensure consistency across multiple calls
         _cached_fs = fs;
         return fs;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to