This is an automated email from the ASF dual-hosted git repository.
liaoxin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 59e5f6bbf7f [Opt](cloud) Support packed file for delete bitmap storage
(#60411)
59e5f6bbf7f is described below
commit 59e5f6bbf7fff4e22a8f528bf6765569a75b718c
Author: Xin Liao <[email protected]>
AuthorDate: Tue Mar 3 21:14:04 2026 +0800
[Opt](cloud) Support packed file for delete bitmap storage (#60411)
Problem Summary:
This PR adds packed file support for delete bitmap storage in cloud
mode.
- Add packed file support for delete bitmap writer/reader
- Add write_file_cache flag to PackedAppendContext to control file cache
behavior
- Delete bitmap files do not use file cache to match original behavior
- Add regression tests for packed delete bitmap scenarios
---
be/src/cloud/cloud_meta_mgr.cpp | 56 +++--
be/src/cloud/delete_bitmap_file_reader.cpp | 40 +++-
be/src/cloud/delete_bitmap_file_reader.h | 13 ++
be/src/cloud/delete_bitmap_file_writer.cpp | 64 +++++-
be/src/cloud/delete_bitmap_file_writer.h | 17 +-
be/src/io/fs/packed_file_manager.cpp | 4 +-
be/src/io/fs/packed_file_manager.h | 1 +
cloud/src/recycler/recycler.cpp | 254 ++++++++++++++++++++-
cloud/src/recycler/recycler.h | 11 +-
gensrc/proto/cloud.proto | 1 +
.../test_packed_delete_bitmap.out | 63 +++++
.../cloud_delete_bitmap/test_cu_compaction.groovy | 6 +-
.../cloud_delete_bitmap/test_load.groovy | 6 +-
.../test_packed_delete_bitmap.groovy | 178 +++++++++++++++
14 files changed, 686 insertions(+), 28 deletions(-)
diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index 0e834ff6554..8eac0c5300e 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -1213,10 +1213,12 @@ Status
CloudMetaMgr::_read_tablet_delete_bitmap_v2(CloudTablet* tablet, int64_t
}
return Status::OK();
};
- auto get_delete_bitmap_from_file = [&](const std::string& rowset_id) {
+ auto get_delete_bitmap_from_file = [&](const std::string& rowset_id,
+ const DeleteBitmapStoragePB&
storage) {
if (config::enable_mow_verbose_log) {
LOG(INFO) << "get delete bitmap for tablet_id=" <<
tablet->tablet_id()
- << ", rowset_id=" << rowset_id << " from file";
+ << ", rowset_id=" << rowset_id << " from file"
+ << ", is_packed=" << storage.has_packed_slice_location();
}
if (rowset_to_resource.find(rowset_id) == rowset_to_resource.end()) {
return Status::InternalError("vault id not found for tablet_id={},
rowset_id={}",
@@ -1229,11 +1231,23 @@ Status
CloudMetaMgr::_read_tablet_delete_bitmap_v2(CloudTablet* tablet, int64_t
return Status::InternalError("vault id not found, maybe not sync,
vault id {}",
resource_id);
}
- DeleteBitmapFileReader reader(tablet->tablet_id(), rowset_id,
storage_resource);
- RETURN_IF_ERROR(reader.init());
+
+ // Use packed file reader if packed_slice_location is present
+ std::unique_ptr<DeleteBitmapFileReader> reader;
+ if (storage.has_packed_slice_location() &&
+ !storage.packed_slice_location().packed_file_path().empty()) {
+ reader =
std::make_unique<DeleteBitmapFileReader>(tablet->tablet_id(), rowset_id,
+ storage_resource,
+
storage.packed_slice_location());
+ } else {
+ reader =
std::make_unique<DeleteBitmapFileReader>(tablet->tablet_id(), rowset_id,
+
storage_resource);
+ }
+
+ RETURN_IF_ERROR(reader->init());
DeleteBitmapPB dbm;
- RETURN_IF_ERROR(reader.read(dbm));
- RETURN_IF_ERROR(reader.close());
+ RETURN_IF_ERROR(reader->read(dbm));
+ RETURN_IF_ERROR(reader->close());
return merge_delete_bitmap(rowset_id, dbm);
};
CloudStorageEngine& engine =
ExecEnv::GetInstance()->storage_engine().to_cloud();
@@ -1247,8 +1261,9 @@ Status
CloudMetaMgr::_read_tablet_delete_bitmap_v2(CloudTablet* tablet, int64_t
DeleteBitmapPB dbm = delete_bitmap_storages[i].delete_bitmap();
RETURN_IF_ERROR(merge_delete_bitmap(rowset_id, dbm));
} else {
- auto submit_st = token->submit_func([&]() {
- auto status = get_delete_bitmap_from_file(rowset_id);
+ const auto& storage = delete_bitmap_storages[i];
+ auto submit_st = token->submit_func([&, rowset_id, storage]() {
+ auto status = get_delete_bitmap_from_file(rowset_id, storage);
if (!status.ok()) {
LOG(WARNING) << "failed to get delete bitmap for
tablet_id="
<< tablet->tablet_id() << ", rowset_id=" <<
rowset_id
@@ -1638,7 +1653,7 @@ static void add_delete_bitmap(DeleteBitmapPB&
delete_bitmap_pb, const DeleteBitm
static Status store_delete_bitmap(std::string& rowset_id, DeleteBitmapPB&
delete_bitmap_pb,
int64_t tablet_id,
std::optional<StorageResource>
storage_resource,
- UpdateDeleteBitmapRequest& req) {
+ UpdateDeleteBitmapRequest& req, int64_t
txn_id) {
if (config::enable_mow_verbose_log) {
std::stringstream ss;
for (int i = 0; i < delete_bitmap_pb.rowset_ids_size(); i++) {
@@ -1658,12 +1673,26 @@ static Status store_delete_bitmap(std::string&
rowset_id, DeleteBitmapPB& delete
DeleteBitmapStoragePB delete_bitmap_storage;
if (config::delete_bitmap_store_v2_max_bytes_in_fdb >= 0 &&
delete_bitmap_pb.ByteSizeLong() >
config::delete_bitmap_store_v2_max_bytes_in_fdb) {
- DeleteBitmapFileWriter file_writer(tablet_id, rowset_id,
storage_resource);
+ // Enable packed file only for load (txn_id > 0)
+ bool enable_packed = config::enable_packed_file && txn_id > 0;
+ DeleteBitmapFileWriter file_writer(tablet_id, rowset_id,
storage_resource, enable_packed,
+ txn_id);
RETURN_IF_ERROR(file_writer.init());
RETURN_IF_ERROR(file_writer.write(delete_bitmap_pb));
RETURN_IF_ERROR(file_writer.close());
delete_bitmap_pb.Clear();
delete_bitmap_storage.set_store_in_fdb(false);
+
+ // Store packed slice location if file was written to packed file
+ if (file_writer.is_packed()) {
+ io::PackedSliceLocation loc;
+ RETURN_IF_ERROR(file_writer.get_packed_slice_location(&loc));
+ auto* packed_loc =
delete_bitmap_storage.mutable_packed_slice_location();
+ packed_loc->set_packed_file_path(loc.packed_file_path);
+ packed_loc->set_offset(loc.offset);
+ packed_loc->set_size(loc.size);
+ packed_loc->set_packed_file_size(loc.packed_file_size);
+ }
} else {
delete_bitmap_storage.set_store_in_fdb(true);
*(delete_bitmap_storage.mutable_delete_bitmap()) =
std::move(delete_bitmap_pb);
@@ -1746,7 +1775,7 @@ Status CloudMetaMgr::update_delete_bitmap(const
CloudTablet& tablet, int64_t loc
if (!pre_rowset_id.empty() &&
delete_bitmap_pb.rowset_ids_size() > 0) {
RETURN_IF_ERROR(store_delete_bitmap(pre_rowset_id,
delete_bitmap_pb,
tablet.tablet_id(), storage_resource,
- req));
+ req, txn_id));
}
pre_rowset_id = cur_rowset_id;
DCHECK_EQ(delete_bitmap_pb.rowset_ids_size(), 0);
@@ -1759,7 +1788,8 @@ Status CloudMetaMgr::update_delete_bitmap(const
CloudTablet& tablet, int64_t loc
if (delete_bitmap_pb.rowset_ids_size() > 0) {
DCHECK(!cur_rowset_id.empty());
RETURN_IF_ERROR(store_delete_bitmap(cur_rowset_id,
delete_bitmap_pb,
- tablet.tablet_id(),
storage_resource, req));
+ tablet.tablet_id(),
storage_resource, req,
+ txn_id));
}
} else {
DeleteBitmapPB delete_bitmap_pb;
@@ -1767,7 +1797,7 @@ Status CloudMetaMgr::update_delete_bitmap(const
CloudTablet& tablet, int64_t loc
add_delete_bitmap(delete_bitmap_pb, key, bitmap);
}
RETURN_IF_ERROR(store_delete_bitmap(rowset_id, delete_bitmap_pb,
tablet.tablet_id(),
- storage_resource, req));
+ storage_resource, req,
txn_id));
}
DCHECK_EQ(req.delta_rowset_ids_size(),
req.delete_bitmap_storages_size());
}
diff --git a/be/src/cloud/delete_bitmap_file_reader.cpp
b/be/src/cloud/delete_bitmap_file_reader.cpp
index 20b5f19f31c..1d27e6176ca 100644
--- a/be/src/cloud/delete_bitmap_file_reader.cpp
+++ b/be/src/cloud/delete_bitmap_file_reader.cpp
@@ -20,6 +20,7 @@
#include "cloud/delete_bitmap_file_writer.h"
#include "common/status.h"
#include "io/fs/file_reader.h"
+#include "io/fs/packed_file_reader.h"
#include "util/coding.h"
namespace doris {
@@ -29,6 +30,20 @@ DeleteBitmapFileReader::DeleteBitmapFileReader(int64_t
tablet_id, const std::str
std::optional<StorageResource>&
storage_resource)
: _tablet_id(tablet_id), _rowset_id(rowset_id),
_storage_resource(storage_resource) {}
+DeleteBitmapFileReader::DeleteBitmapFileReader(int64_t tablet_id, const
std::string& rowset_id,
+ std::optional<StorageResource>&
storage_resource,
+ const PackedSliceLocationPB&
packed_location)
+ : _tablet_id(tablet_id),
+ _rowset_id(rowset_id),
+ _storage_resource(storage_resource),
+ _is_packed(true),
+ _packed_offset(packed_location.offset()),
+ _packed_size(packed_location.size()),
+ _packed_file_path(packed_location.packed_file_path()),
+ _packed_file_size(packed_location.has_packed_file_size()
+ ? packed_location.packed_file_size()
+ : -1) {}
+
DeleteBitmapFileReader::~DeleteBitmapFileReader() = default;
Status DeleteBitmapFileReader::init() {
@@ -45,9 +60,28 @@ Status DeleteBitmapFileReader::init() {
if (!_storage_resource) {
return Status::InternalError("invalid storage resource for
tablet_id={}", _tablet_id);
}
- _path = _storage_resource->remote_delete_bitmap_path(_tablet_id,
_rowset_id);
- io::FileReaderOptions opts;
- return _storage_resource->fs->open_file(_path, &_file_reader, &opts);
+
+ if (_is_packed) {
+ // Read from packed file
+ io::FileReaderSPtr inner_reader;
+ io::FileReaderOptions opts;
+ if (_packed_file_size > 0) {
+ opts.file_size = _packed_file_size;
+ }
+ opts.cache_type = io::FileCachePolicy::NO_CACHE;
+
RETURN_IF_ERROR(_storage_resource->fs->open_file(io::Path(_packed_file_path),
&inner_reader,
+ &opts));
+
+ _path = _storage_resource->remote_delete_bitmap_path(_tablet_id,
_rowset_id);
+ _file_reader = std::make_shared<io::PackedFileReader>(
+ std::move(inner_reader), io::Path(_path), _packed_offset,
_packed_size);
+ } else {
+ // Read from standalone file
+ _path = _storage_resource->remote_delete_bitmap_path(_tablet_id,
_rowset_id);
+ io::FileReaderOptions opts;
+ RETURN_IF_ERROR(_storage_resource->fs->open_file(_path, &_file_reader,
&opts));
+ }
+ return Status::OK();
}
Status DeleteBitmapFileReader::close() {
diff --git a/be/src/cloud/delete_bitmap_file_reader.h
b/be/src/cloud/delete_bitmap_file_reader.h
index a9b26f4b2d0..2cb90f996e6 100644
--- a/be/src/cloud/delete_bitmap_file_reader.h
+++ b/be/src/cloud/delete_bitmap_file_reader.h
@@ -19,6 +19,7 @@
#include "cloud/cloud_storage_engine.h"
#include "common/status.h"
+#include "gen_cpp/olap_file.pb.h"
#include "io/fs/file_reader_writer_fwd.h"
namespace doris {
@@ -27,8 +28,13 @@ class DeleteBitmapPB;
class DeleteBitmapFileReader {
public:
+ // Constructor for standalone files
explicit DeleteBitmapFileReader(int64_t tablet_id, const std::string&
rowset_id,
std::optional<StorageResource>&
storage_resource);
+ // Constructor for packed file reading
+ explicit DeleteBitmapFileReader(int64_t tablet_id, const std::string&
rowset_id,
+ std::optional<StorageResource>&
storage_resource,
+ const PackedSliceLocationPB&
packed_location);
~DeleteBitmapFileReader();
Status init();
@@ -41,6 +47,13 @@ private:
std::optional<StorageResource> _storage_resource;
std::string _path;
io::FileReaderSPtr _file_reader;
+
+ // Packed file support
+ bool _is_packed = false;
+ int64_t _packed_offset = 0;
+ int64_t _packed_size = 0;
+ std::string _packed_file_path;
+ int64_t _packed_file_size = -1;
};
} // namespace doris
\ No newline at end of file
diff --git a/be/src/cloud/delete_bitmap_file_writer.cpp
b/be/src/cloud/delete_bitmap_file_writer.cpp
index e1e5df23404..59f0c6c0274 100644
--- a/be/src/cloud/delete_bitmap_file_writer.cpp
+++ b/be/src/cloud/delete_bitmap_file_writer.cpp
@@ -19,7 +19,9 @@
#include <crc32c/crc32c.h>
+#include "cloud/config.h"
#include "io/fs/file_writer.h"
+#include "io/fs/packed_file_writer.h"
namespace doris {
#include "common/compile_check_begin.h"
@@ -28,6 +30,15 @@ DeleteBitmapFileWriter::DeleteBitmapFileWriter(int64_t
tablet_id, const std::str
std::optional<StorageResource>&
storage_resource)
: _tablet_id(tablet_id), _rowset_id(rowset_id),
_storage_resource(storage_resource) {}
+DeleteBitmapFileWriter::DeleteBitmapFileWriter(int64_t tablet_id, const
std::string& rowset_id,
+ std::optional<StorageResource>&
storage_resource,
+ bool enable_packed_file,
int64_t txn_id)
+ : _tablet_id(tablet_id),
+ _rowset_id(rowset_id),
+ _storage_resource(storage_resource),
+ _enable_packed_file(enable_packed_file),
+ _txn_id(txn_id) {}
+
DeleteBitmapFileWriter::~DeleteBitmapFileWriter() {}
Status DeleteBitmapFileWriter::init() {
@@ -48,8 +59,30 @@ Status DeleteBitmapFileWriter::init() {
}
_path = _storage_resource->remote_delete_bitmap_path(_tablet_id,
_rowset_id);
io::FileWriterOptions opts;
- // opts.write_file_cache = true;
- return _storage_resource->fs->create_file(_path, &_file_writer, &opts);
+
+ if (_enable_packed_file) {
+ // Create underlying file writer
+ io::FileWriterPtr inner_writer;
+ // Disable write_file_cache for inner writer when using
PackedFileWriter.
+ // Small files will be cached separately by PackedFileManager using the
+ // small file path as cache key.
+ opts.write_file_cache = false;
+ RETURN_IF_ERROR(_storage_resource->fs->create_file(_path,
&inner_writer, &opts));
+
+ // Wrap with PackedFileWriter
+ io::PackedAppendContext append_info;
+ append_info.resource_id = _storage_resource->fs->id();
+ append_info.tablet_id = _tablet_id;
+ append_info.rowset_id = _rowset_id;
+ append_info.txn_id = _txn_id;
+ append_info.write_file_cache = false;
+
+ _file_writer =
std::make_unique<io::PackedFileWriter>(std::move(inner_writer),
+ io::Path(_path),
append_info);
+ } else {
+ RETURN_IF_ERROR(_storage_resource->fs->create_file(_path,
&_file_writer, &opts));
+ }
+ return Status::OK();
}
Status DeleteBitmapFileWriter::close() {
@@ -60,8 +93,33 @@ Status DeleteBitmapFileWriter::close() {
auto st = _file_writer->close();
if (!st.ok()) {
LOG(WARNING) << "failed to close delete bitmap file=" << _path << ",
st=" << st.to_string();
+ return st;
}
- return st;
+
+ // Check if file was written to packed file
+ if (_enable_packed_file) {
+ auto* packed_writer =
static_cast<io::PackedFileWriter*>(_file_writer.get());
+ io::PackedSliceLocation loc;
+ st = packed_writer->get_packed_slice_location(&loc);
+ if (!st.ok()) {
+ LOG(WARNING) << "failed to get packed slice location for delete
bitmap file=" << _path
+ << ", st=" << st.to_string();
+ return st;
+ }
+ if (!loc.packed_file_path.empty()) {
+ _is_packed = true;
+ _packed_location = loc;
+ }
+ }
+ return Status::OK();
+}
+
+Status
DeleteBitmapFileWriter::get_packed_slice_location(io::PackedSliceLocation*
location) const {
+ if (!_is_packed) {
+ return Status::InternalError("delete bitmap file is not packed");
+ }
+ *location = _packed_location;
+ return Status::OK();
}
Status DeleteBitmapFileWriter::write(const DeleteBitmapPB& delete_bitmap) {
diff --git a/be/src/cloud/delete_bitmap_file_writer.h
b/be/src/cloud/delete_bitmap_file_writer.h
index 040d9f10a98..98303836c0b 100644
--- a/be/src/cloud/delete_bitmap_file_writer.h
+++ b/be/src/cloud/delete_bitmap_file_writer.h
@@ -20,6 +20,7 @@
#include "cloud/cloud_storage_engine.h"
#include "common/status.h"
#include "io/fs/file_reader_writer_fwd.h"
+#include "io/fs/packed_file_manager.h"
namespace doris {
@@ -29,12 +30,20 @@ class DeleteBitmapFileWriter {
public:
explicit DeleteBitmapFileWriter(int64_t tablet_id, const std::string&
rowset_id,
std::optional<StorageResource>&
storage_resource);
+ // Constructor with packed file support
+ explicit DeleteBitmapFileWriter(int64_t tablet_id, const std::string&
rowset_id,
+ std::optional<StorageResource>&
storage_resource,
+ bool enable_packed_file, int64_t txn_id);
~DeleteBitmapFileWriter();
Status init();
Status write(const DeleteBitmapPB& delete_bitmap);
Status close();
+ // Get packed slice location after close
+ Status get_packed_slice_location(io::PackedSliceLocation* location) const;
+ bool is_packed() const { return _is_packed; }
+
public:
static constexpr const char* DELETE_BITMAP_MAGIC = "DBM1";
static const uint32_t MAGIC_SIZE = 4;
@@ -47,6 +56,12 @@ private:
std::optional<StorageResource> _storage_resource;
std::string _path;
io::FileWriterPtr _file_writer;
+
+ // Packed file support
+ bool _enable_packed_file = false;
+ int64_t _txn_id = 0;
+ bool _is_packed = false;
+ io::PackedSliceLocation _packed_location;
};
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/io/fs/packed_file_manager.cpp
b/be/src/io/fs/packed_file_manager.cpp
index eaf8afe96c0..533a8bfbf80 100644
--- a/be/src/io/fs/packed_file_manager.cpp
+++ b/be/src/io/fs/packed_file_manager.cpp
@@ -371,7 +371,9 @@ Status PackedFileManager::append_small_file(const
std::string& path, const Slice
// Async write data to file cache using small file path as cache key.
// This ensures cache key matches the cleanup key in Rowset::clear_cache(),
// allowing proper cache cleanup when stale rowsets are removed.
- write_small_file_to_cache_async(path, data, info.tablet_id,
info.expiration_time);
+ if (info.write_file_cache) {
+ write_small_file_to_cache_async(path, data, info.tablet_id,
info.expiration_time);
+ }
// Update index
PackedSliceLocation location;
diff --git a/be/src/io/fs/packed_file_manager.h
b/be/src/io/fs/packed_file_manager.h
index 8a9758bf314..7756a3fd85a 100644
--- a/be/src/io/fs/packed_file_manager.h
+++ b/be/src/io/fs/packed_file_manager.h
@@ -59,6 +59,7 @@ struct PackedAppendContext {
std::string rowset_id;
int64_t txn_id = 0;
uint64_t expiration_time = 0; // TTL expiration time in seconds since
epoch, 0 means no TTL
+ bool write_file_cache = true; // Whether to write data to file cache
};
// Global object that manages packing small files into larger files for S3
optimization
diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp
index 632acdb25f0..6cd8971e908 100644
--- a/cloud/src/recycler/recycler.cpp
+++ b/cloud/src/recycler/recycler.cpp
@@ -2981,8 +2981,20 @@ int InstanceRecycler::delete_rowset_data(const
RowsetMetaCloudPB& rs_meta_pb) {
}
}
- // Process delete bitmap
- file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
+ // Process delete bitmap - check if it's stored in packed file
+ bool delete_bitmap_is_packed = false;
+ if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
+
&delete_bitmap_is_packed) != 0) {
+ LOG_WARNING("failed to decrement delete bitmap packed file ref count")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id);
+ return -1;
+ }
+ // Only delete standalone delete bitmap file if not stored in packed file
+ if (!delete_bitmap_is_packed) {
+ file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
+ }
// TODO(AlexYue): seems could do do batch
return accessor->delete_files(file_paths);
}
@@ -3001,6 +3013,7 @@ int
InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCl
.tag("rowset_id", rs_meta_pb.rowset_id_v2());
return 0;
}
+
struct PackedSmallFileInfo {
std::string small_file_path;
};
@@ -3135,6 +3148,7 @@ int
InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCl
break;
}
+ // Calculate remaining files
int64_t left_file_count = 0;
int64_t left_file_bytes = 0;
for (const auto& small_file_entry : packed_info.slices()) {
@@ -3224,6 +3238,225 @@ int
InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCl
return ret;
}
+int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts(int64_t
tablet_id,
+ const
std::string& rowset_id,
+ bool*
out_is_packed) {
+ if (out_is_packed) {
+ *out_is_packed = false;
+ }
+
+ // Get delete bitmap storage info from FDB
+ std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_,
tablet_id, rowset_id});
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to create txn when getting delete bitmap storage")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("err", err);
+ return -1;
+ }
+
+ std::string dbm_val;
+ err = txn->get(dbm_key, &dbm_val);
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ // No delete bitmap for this rowset, nothing to do
+ LOG_INFO("delete bitmap not found, skip packed file ref count
decrement")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id);
+ return 0;
+ }
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to get delete bitmap storage")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("err", err);
+ return -1;
+ }
+
+ DeleteBitmapStoragePB storage;
+ if (!storage.ParseFromString(dbm_val)) {
+ LOG_WARNING("failed to parse delete bitmap storage")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id);
+ return -1;
+ }
+
+ // Check if delete bitmap is stored in packed file
+ if (!storage.has_packed_slice_location() ||
+ storage.packed_slice_location().packed_file_path().empty()) {
+ // Not stored in packed file, nothing to do
+ return 0;
+ }
+
+ if (out_is_packed) {
+ *out_is_packed = true;
+ }
+
+ const auto& packed_loc = storage.packed_slice_location();
+ const std::string& packed_file_path = packed_loc.packed_file_path();
+
+ LOG_INFO("decrementing delete bitmap packed file ref count")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path);
+
+ const int max_retry_times = std::max(1,
config::decrement_packed_file_ref_counts_retry_times);
+ for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
+ std::unique_ptr<Transaction> update_txn;
+ err = txn_kv_->create_txn(&update_txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to create txn for delete bitmap packed file
update")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("err", err);
+ return -1;
+ }
+
+ std::string packed_key = packed_file_key({instance_id_,
packed_file_path});
+ std::string packed_val;
+ err = update_txn->get(packed_key, &packed_val);
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ LOG_WARNING("packed file info not found for delete bitmap")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path);
+ return 0;
+ }
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG_WARNING("failed to get packed file info for delete bitmap")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path)
+ .tag("err", err);
+ return -1;
+ }
+
+ cloud::PackedFileInfoPB packed_info;
+ if (!packed_info.ParseFromString(packed_val)) {
+ LOG_WARNING("failed to parse packed file info for delete bitmap")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path);
+ return -1;
+ }
+
+ // Find and mark the small file entry as deleted
+ // Use tablet_id and rowset_id to match entry instead of path,
+ // because path format may vary with path_version (with or without
shard prefix)
+ auto* entries = packed_info.mutable_slices();
+ bool found = false;
+ bool already_deleted = false;
+ for (auto& entry : *entries) {
+ if (entry.tablet_id() == tablet_id && entry.rowset_id() ==
rowset_id) {
+ if (!entry.deleted()) {
+ entry.set_deleted(true);
+ if (!entry.corrected()) {
+ entry.set_corrected(true);
+ }
+ } else {
+ already_deleted = true;
+ }
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ LOG_WARNING("delete bitmap entry not found in packed file")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path);
+ return 0;
+ }
+
+ if (already_deleted) {
+ LOG_INFO("delete bitmap entry already deleted in packed file")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path);
+ return 0;
+ }
+
+ // Calculate remaining files
+ int64_t left_file_count = 0;
+ int64_t left_file_bytes = 0;
+ for (const auto& entry : packed_info.slices()) {
+ if (!entry.deleted()) {
+ ++left_file_count;
+ left_file_bytes += entry.size();
+ }
+ }
+ packed_info.set_remaining_slice_bytes(left_file_bytes);
+ packed_info.set_ref_cnt(left_file_count);
+
+ if (left_file_count == 0) {
+ packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
+ }
+
+ std::string updated_val;
+ if (!packed_info.SerializeToString(&updated_val)) {
+ LOG_WARNING("failed to serialize packed file info for delete
bitmap")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path);
+ return -1;
+ }
+
+ update_txn->put(packed_key, updated_val);
+ err = update_txn->commit();
+ if (err == TxnErrorCode::TXN_OK) {
+ LOG_INFO("delete bitmap packed file ref count decremented")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path)
+ .tag("left_file_count", left_file_count);
+ if (left_file_count == 0) {
+ if (delete_packed_file_and_kv(packed_file_path, packed_key,
packed_info) != 0) {
+ return -1;
+ }
+ }
+ return 0;
+ }
+ if (err == TxnErrorCode::TXN_CONFLICT) {
+ if (attempt >= max_retry_times) {
+ LOG_WARNING("delete bitmap packed file update conflict after
max retry")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path)
+ .tag("attempt", attempt);
+ return -1;
+ }
+ sleep_for_packed_file_retry();
+ continue;
+ }
+
+ LOG_WARNING("failed to commit delete bitmap packed file update")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id)
+ .tag("packed_file_path", packed_file_path)
+ .tag("err", err);
+ return -1;
+ }
+
+ return -1;
+}
+
int InstanceRecycler::delete_packed_file_and_kv(const std::string&
packed_file_path,
const std::string& packed_key,
const cloud::PackedFileInfoPB&
packed_info) {
@@ -3394,8 +3627,21 @@ int InstanceRecycler::delete_rowset_data(
continue;
}
- // Process delete bitmap
- file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
+ // Process delete bitmap - check if it's stored in packed file
+ bool delete_bitmap_is_packed = false;
+ if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id,
rowset_id,
+
&delete_bitmap_is_packed) != 0) {
+ LOG_WARNING("failed to decrement delete bitmap packed file ref
count")
+ .tag("instance_id", instance_id_)
+ .tag("tablet_id", tablet_id)
+ .tag("rowset_id", rowset_id);
+ ret = -1;
+ continue;
+ }
+ // Only delete standalone delete bitmap file if not stored in packed
file
+ if (!delete_bitmap_is_packed) {
+ file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
+ }
// Process inverted indexes
std::vector<std::pair<int64_t, std::string>> index_ids;
diff --git a/cloud/src/recycler/recycler.h b/cloud/src/recycler/recycler.h
index 61d62f61c5b..0fb882686fd 100644
--- a/cloud/src/recycler/recycler.h
+++ b/cloud/src/recycler/recycler.h
@@ -450,9 +450,18 @@ private:
int delete_rowset_data(const std::map<std::string,
doris::RowsetMetaCloudPB>& rowsets,
RowsetRecyclingState type, RecyclerMetricsContext&
metrics_context);
- // return 0 for success otherwise error
+ // Decrement packed file ref counts for rowset segments.
+ // Returns 0 for success, -1 for error.
int decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB&
rs_meta_pb);
+ // Decrement packed file ref count for delete bitmap if it's stored in
packed file.
+ // Returns 0 for success, -1 for error.
+ // If delete bitmap is not stored in packed file, this function does
nothing and returns 0.
+ // out_is_packed: if not null, will be set to true if delete bitmap is
stored in packed file.
+ int decrement_delete_bitmap_packed_file_ref_counts(int64_t tablet_id,
+ const std::string&
rowset_id,
+ bool* out_is_packed);
+
int delete_packed_file_and_kv(const std::string& packed_file_path,
const std::string& packed_key,
const cloud::PackedFileInfoPB& packed_info);
diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto
index fd799965e37..945382b16f7 100644
--- a/gensrc/proto/cloud.proto
+++ b/gensrc/proto/cloud.proto
@@ -1856,6 +1856,7 @@ enum MetaServiceCode {
message DeleteBitmapStoragePB {
optional bool store_in_fdb = 1;
optional DeleteBitmapPB delete_bitmap = 2;
+ optional PackedSliceLocationPB packed_slice_location = 3;
}
message UpdateDeleteBitmapRequest {
diff --git
a/regression-test/data/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.out
b/regression-test/data/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.out
new file mode 100644
index 00000000000..07c520f3469
--- /dev/null
+++
b/regression-test/data/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.out
@@ -0,0 +1,63 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !before_restart --
+1 10
+2 2
+3 30
+4 4
+
+-- !after_restart --
+1 10
+2 2
+3 30
+4 4
+
+-- !after_insert --
+1 10
+2 20
+3 30
+4 40
+
+-- !multi_rowset --
+0 value_0_v2
+1 value_1_v2
+2 value_2_v2
+3 value_3_v2
+4 value_4_v2
+5 value_5_v2
+6 value_6_v2
+7 value_7_v2
+8 value_8_v2
+9 value_9_v2
+
+-- !multi_rowset_after_restart --
+0 value_0_v2
+1 value_1_v2
+2 value_2_v2
+3 value_3_v2
+4 value_4_v2
+5 value_5_v2
+6 value_6_v2
+7 value_7_v2
+8 value_8_v2
+9 value_9_v2
+
+-- !large_bitmap --
+100
+
+-- !large_bitmap_sample --
+0 value_0_updated
+1 value_1_updated
+2 value_2_updated
+3 value_3_updated
+4 value_4_updated
+
+-- !large_bitmap_after_restart --
+100
+
+-- !large_bitmap_sample_after_restart --
+0 value_0_updated
+1 value_1_updated
+2 value_2_updated
+3 value_3_updated
+4 value_4_updated
+
diff --git
a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_cu_compaction.groovy
b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_cu_compaction.groovy
index 9ac4da3b1dc..c7aeff6419d 100644
---
a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_cu_compaction.groovy
+++
b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_cu_compaction.groovy
@@ -19,6 +19,9 @@ import org.apache.doris.regression.suite.ClusterOptions
suite("test_cu_compaction", "docker") {
def options = new ClusterOptions()
+ Random random = new Random()
+ def enablePackedFile = random.nextBoolean()
+ logger.info("enable_packed_file: ${enablePackedFile}")
options.beConfigs += [
'delete_bitmap_store_version=2',
'delete_bitmap_max_bytes_store_in_fdb=-1',
@@ -30,7 +33,8 @@ suite("test_cu_compaction", "docker") {
'path_gc_check_interval_second=1',
'trash_file_expire_time_sec=0',
'tablet_rowset_stale_sweep_time_sec=1',
- 'min_garbage_sweep_interval=1'
+ 'min_garbage_sweep_interval=1',
+ "enable_packed_file=${enablePackedFile}"
]
options.setFeNum(1)
options.setBeNum(1)
diff --git
a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_load.groovy
b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_load.groovy
index 438f7f99a6c..d88598e5cd7 100644
---
a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_load.groovy
+++
b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_load.groovy
@@ -19,13 +19,17 @@ import org.apache.doris.regression.suite.ClusterOptions
suite("test_load", "docker") {
def options = new ClusterOptions()
+ Random random = new Random()
+ def enablePackedFile = random.nextBoolean()
+ logger.info("enable_packed_file: ${enablePackedFile}")
options.beConfigs += [
'delete_bitmap_store_write_version=2',
'delete_bitmap_store_read_version=2',
'delete_bitmap_store_v2_max_bytes_in_fdb=-1',
'enable_sync_tablet_delete_bitmap_by_cache=false',
'enable_delete_bitmap_store_v2_check_correctness=true',
- 'enable_java_support=false'
+ 'enable_java_support=false',
+ "enable_packed_file=${enablePackedFile}"
]
options.setFeNum(1)
options.setBeNum(1)
diff --git
a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.groovy
b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.groovy
new file mode 100644
index 00000000000..0c9e93f760e
--- /dev/null
+++
b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.groovy
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.doris.regression.suite.ClusterOptions
+
+suite("test_packed_delete_bitmap", "docker") {
+ if (!isCloudMode()) {
+ return
+ }
+
+ // Test 1: BE restart with packed delete bitmap
+ def options1 = new ClusterOptions()
+ options1.beConfigs += [
+ 'delete_bitmap_store_write_version=2',
+ 'delete_bitmap_store_read_version=2',
+ 'delete_bitmap_store_v2_max_bytes_in_fdb=0',
+ 'enable_sync_tablet_delete_bitmap_by_cache=false',
+ 'enable_delete_bitmap_store_v2_check_correctness=true',
+ 'enable_java_support=false',
+ 'enable_packed_file=true'
+ ]
+ options1.setFeNum(1)
+ options1.setBeNum(1)
+ options1.cloudMode = true
+
+ docker(options1) {
+ def tableName = "test_be_restart"
+ sql """ DROP TABLE IF EXISTS ${tableName}; """
+ sql """
+ CREATE TABLE ${tableName} (
+ `k` int(11) NOT NULL,
+ `v` int(11) NOT NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS 1
+ PROPERTIES (
+ "disable_auto_compaction"="true",
+ "replication_num" = "1"
+ );
+ """
+
+ // Insert data to create delete bitmap in packed file
+ sql """ INSERT INTO ${tableName} VALUES(1, 1), (2, 2); """
+ sql """ INSERT INTO ${tableName} VALUES(3, 3), (4, 4); """
+ sql """ INSERT INTO ${tableName} VALUES(1, 10), (3, 30); """
+
+ order_qt_before_restart "SELECT * FROM ${tableName};"
+
+ // Restart BE
+ logger.info("Restarting backends...")
+ cluster.restartBackends()
+
+ // Query after restart - should read delete bitmap from packed file
+ order_qt_after_restart "SELECT * FROM ${tableName};"
+
+ // Insert more data after restart
+ sql """ INSERT INTO ${tableName} VALUES(2, 20), (4, 40); """
+ order_qt_after_insert "SELECT * FROM ${tableName};"
+ }
+
+ // Test 2: Multiple rowsets write delete bitmap to same packed file
+ def options2 = new ClusterOptions()
+ options2.beConfigs += [
+ 'delete_bitmap_store_write_version=2',
+ 'delete_bitmap_store_read_version=2',
+ 'delete_bitmap_store_v2_max_bytes_in_fdb=0',
+ 'enable_sync_tablet_delete_bitmap_by_cache=false',
+ 'enable_delete_bitmap_store_v2_check_correctness=true',
+ 'enable_java_support=false',
+ 'enable_packed_file=true',
+ 'packed_file_size_threshold_bytes=10485760' // 10MB - large enough to
hold multiple delete bitmaps
+ ]
+ options2.setFeNum(1)
+ options2.setBeNum(1)
+ options2.cloudMode = true
+
+ docker(options2) {
+ def tableName = "test_multi_rowset"
+ sql """ DROP TABLE IF EXISTS ${tableName}; """
+ sql """
+ CREATE TABLE ${tableName} (
+ `k` int(11) NOT NULL,
+ `v` varchar(100) NOT NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS 1
+ PROPERTIES (
+ "disable_auto_compaction"="true",
+ "replication_num" = "1"
+ );
+ """
+
+ // Insert multiple rowsets - their delete bitmaps should go to same
packed file
+ for (int i = 0; i < 10; i++) {
+ sql """ INSERT INTO ${tableName} VALUES(${i}, 'value_${i}_v1'); """
+ }
+
+ // Update some rows to create more delete bitmaps
+ for (int i = 0; i < 10; i++) {
+ sql """ INSERT INTO ${tableName} VALUES(${i}, 'value_${i}_v2'); """
+ }
+
+ order_qt_multi_rowset "SELECT * FROM ${tableName} ORDER BY k;"
+
+ // Restart and verify
+ cluster.restartBackends()
+ order_qt_multi_rowset_after_restart "SELECT * FROM ${tableName} ORDER
BY k;"
+ }
+
+ // Test 3: Large delete bitmap exceeds small file threshold - fallback to
direct write
+ def options3 = new ClusterOptions()
+ options3.beConfigs += [
+ 'delete_bitmap_store_write_version=2',
+ 'delete_bitmap_store_read_version=2',
+ 'delete_bitmap_store_v2_max_bytes_in_fdb=0',
+ 'enable_sync_tablet_delete_bitmap_by_cache=false',
+ 'enable_delete_bitmap_store_v2_check_correctness=true',
+ 'enable_java_support=false',
+ 'enable_packed_file=true',
+ 'small_file_threshold_bytes=100' // Very small threshold to trigger
direct write
+ ]
+ options3.setFeNum(1)
+ options3.setBeNum(1)
+ options3.cloudMode = true
+
+ docker(options3) {
+ def tableName = "test_large_bitmap"
+ sql """ DROP TABLE IF EXISTS ${tableName}; """
+ sql """
+ CREATE TABLE ${tableName} (
+ `k` int(11) NOT NULL,
+ `v` varchar(1000) NOT NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS 1
+ PROPERTIES (
+ "disable_auto_compaction"="true",
+ "replication_num" = "1"
+ );
+ """
+
+ // Insert enough data to create a large delete bitmap
+ def values = []
+ for (int i = 0; i < 100; i++) {
+ values.add("(${i}, 'value_${i}_initial')")
+ }
+ sql """ INSERT INTO ${tableName} VALUES ${values.join(',')}; """
+
+ // Update all rows to create delete bitmap entries
+ values = []
+ for (int i = 0; i < 100; i++) {
+ values.add("(${i}, 'value_${i}_updated')")
+ }
+ sql """ INSERT INTO ${tableName} VALUES ${values.join(',')}; """
+
+ order_qt_large_bitmap "SELECT COUNT(*) FROM ${tableName};"
+ order_qt_large_bitmap_sample "SELECT * FROM ${tableName} WHERE k < 5
ORDER BY k;"
+
+ // Restart and verify
+ cluster.restartBackends()
+ order_qt_large_bitmap_after_restart "SELECT COUNT(*) FROM
${tableName};"
+ order_qt_large_bitmap_sample_after_restart "SELECT * FROM ${tableName}
WHERE k < 5 ORDER BY k;"
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]