This is an automated email from the ASF dual-hosted git repository. yangzhg pushed a commit to branch support_batch_delete_in_fe in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/support_batch_delete_in_fe by this push: new f2b2a0a Add delete bitmap index and base operation (#4242) f2b2a0a is described below commit f2b2a0a70bc6d1fed3f5d4f853ede10b35a53f92 Author: ZhangYu0123 <67053339+zhangyu0...@users.noreply.github.com> AuthorDate: Tue Aug 4 14:29:33 2020 +0800 Add delete bitmap index and base operation (#4242) * add delete bitmap index * delete index for batch delete * delete index for batch delete * add delete bitmap index * add delete bitmap index --- be/src/olap/CMakeLists.txt | 1 + be/src/olap/delete_bitmap_index.cpp | 72 +++++++++++++ be/src/olap/delete_bitmap_index.h | 118 +++++++++++++++++++++ be/src/olap/row_block.cpp | 2 + be/src/olap/row_block.h | 8 ++ be/src/olap/row_block2.cpp | 12 ++- be/src/olap/row_block2.h | 8 ++ be/src/olap/row_cursor.cpp | 3 +- be/src/olap/row_cursor.h | 7 ++ be/src/olap/rowset/segment_v2/segment.cpp | 32 ++++++ be/src/olap/rowset/segment_v2/segment.h | 15 +++ be/src/olap/rowset/segment_v2/segment_iterator.cpp | 22 ++++ be/src/olap/rowset/segment_v2/segment_writer.cpp | 14 +++ be/src/olap/rowset/segment_v2/segment_writer.h | 3 + be/test/olap/CMakeLists.txt | 1 + be/test/olap/delete_bitmap_index_test.cpp | 79 ++++++++++++++ docs/zh-CN/internal/doris_storage_optimization.md | 13 +-- gensrc/proto/segment_v2.proto | 10 ++ 18 files changed, 409 insertions(+), 11 deletions(-) diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index 3705f59..99da5ef 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -71,6 +71,7 @@ add_library(Olap STATIC data_dir.cpp row.cpp short_key_index.cpp + delete_bitmap_index.cpp snapshot_manager.cpp stream_index_common.cpp stream_index_reader.cpp diff --git a/be/src/olap/delete_bitmap_index.cpp b/be/src/olap/delete_bitmap_index.cpp new file mode 100644 index 0000000..92f9e02 --- /dev/null +++ b/be/src/olap/delete_bitmap_index.cpp @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/delete_bitmap_index.h" + +#include <string> +#include "gutil/strings/substitute.h" + +using strings::Substitute; + +namespace doris { + +Status DeleteBitmapIndexBuilder::add_delete_item(const uint32_t& _row_count) { + _delete_bitmap.add(_row_count); + _num_items++; + return Status::OK(); +} + +Status DeleteBitmapIndexBuilder::finalize(std::vector<Slice>* body, + segment_v2::PageFooterPB* page_footer) { + // get the size after serialize + _delete_bitmap.runOptimize(); + uint32_t size = _delete_bitmap.getSizeInBytes(false); + // fill in bitmap index page + page_footer->set_type(segment_v2::DELETE_INDEX_PAGE); + page_footer->set_uncompressed_size(size); + + segment_v2::DeleteIndexFooterPB* footer = page_footer->mutable_delete_index_page_footer(); + footer->set_num_items(_num_items); + footer->set_content_bytes(size); + + // write bitmap to slice as return + _buf.resize(size); + _delete_bitmap.write(reinterpret_cast<char*>(_buf.data()), false); + body->emplace_back(_buf); + return Status::OK(); +} + +Status DeleteBitmapIndexDecoder::parse(const Slice& body, const segment_v2::DeleteIndexFooterPB& footer) { + _footer = footer; + // check if body size match footer's information + if (body.size != (_footer.content_bytes())) { + return Status::Corruption(Substitute("Index size not match, need=$0, real=$1", + _footer.content_bytes(), body.size)); + } + // set index buffer + Slice index_data(body.data, _footer.content_bytes()); + // load delete bitmap + _delete_bitmap = Roaring::read(index_data.data, false); + + _parsed = true; + return Status::OK(); +} + +const Roaring& DeleteBitmapIndexIterator:: get_delete_bitmap() const{ + return _decoder->get_delete_bitmap(); +} +} diff --git a/be/src/olap/delete_bitmap_index.h b/be/src/olap/delete_bitmap_index.h new file mode 100644 index 0000000..5d1c553 --- /dev/null +++ b/be/src/olap/delete_bitmap_index.h @@ -0,0 +1,118 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> +#include <iterator> +#include <string> +#include <vector> +#include <vector> +#include <roaring/roaring.hh> + +#include "common/status.h" +#include "gen_cpp/segment_v2.pb.h" +#include "util/faststring.h" +#include "util/slice.h" + +#include "util/debug_util.h" + +namespace doris { + +class DeleteBitmapIndexIterator; +class DeleteBitmapIndexDecoder; + +/// This class is a builder which can build delete bitmap index. SegmentWriter can use it to generate +/// delete bitmap index page and save it in segment. +class DeleteBitmapIndexBuilder { +public: + /// Construction function of DeleteBitmapIndexBuilder + DeleteBitmapIndexBuilder() : _num_items(0) { + } + + /// Add delete item to delete bitmap index + Status add_delete_item(const uint32_t& _row_count); + + /// How many bytes are required to serialize this bitmap + uint64_t size() { + return _delete_bitmap.getSizeInBytes(false); + } + + /// When the segment flush, use finalize function to flush index data to slice to generate index page + /// and fill the page footer record meta. + Status finalize(std::vector<Slice>* body, segment_v2::PageFooterPB* footer); + +private: + /// the number of delete items in delete bitmap index + uint32_t _num_items; + + /// roaring bitmap to record rowids of delete items + Roaring _delete_bitmap; + + faststring _buf; +}; + +/// An Iterator to iterate one delete bitmap index. +/// Client can use this class to access the bitmap. +class DeleteBitmapIndexIterator { +public: + /// Construction function of DeleteBitmapIndexBuilder + DeleteBitmapIndexIterator(const DeleteBitmapIndexDecoder* decoder) + : _decoder(decoder) {} + + /// get const delete bitmap to access delete bitmap record + const Roaring& get_delete_bitmap() const; + +private: + const DeleteBitmapIndexDecoder* _decoder; +}; + +/// Used to decode bitmap ordinal to footer and encoded index data. +/// Usage: +/// DeleteBitmapIndexDecoder decoder; +/// decoder.parse(body, footer); +class DeleteBitmapIndexDecoder { +public: + DeleteBitmapIndexDecoder(bool parsed = false) : _parsed(parsed), _delete_bitmap() {} + + /// client should assure that body is available when this class is used + Status parse(const Slice& body, const segment_v2::DeleteIndexFooterPB& footer); + + /// The number of delete items in delete bitmap index + uint32_t num_items() const { + DCHECK(_parsed); + return _footer.num_items(); + } + + /// Get the iterator of DeleteBitmapIndex + DeleteBitmapIndexIterator get_iterator() const { + DCHECK(_parsed); + return {this}; + } + + /// get const delete bitmap to access delete bitmap record + const Roaring& get_delete_bitmap() const { return _delete_bitmap; } + +private: + bool _parsed; + + // All following fields are only valid after parse has been executed successfully + segment_v2::DeleteIndexFooterPB _footer; + Roaring _delete_bitmap; +}; + +} diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp index 09cf48b..e20d1a4 100644 --- a/be/src/olap/row_block.cpp +++ b/be/src/olap/row_block.cpp @@ -52,6 +52,7 @@ OLAPStatus RowBlock::init(const RowBlockInfo& block_info) { _info = block_info; _null_supported = block_info.null_supported; _capacity = _info.row_num; + _delete_bitmap = std::unique_ptr<Roaring>(new Roaring()); _compute_layout(); _mem_buf = new char[_mem_buf_bytes]; return OLAP_SUCCESS; @@ -75,6 +76,7 @@ void RowBlock::clear() { _pos = 0; _limit = 0; _mem_pool->clear(); + _delete_bitmap.reset(); } void RowBlock::_compute_layout() { diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h index c9d277f..a439338 100644 --- a/be/src/olap/row_block.h +++ b/be/src/olap/row_block.h @@ -68,6 +68,8 @@ public: inline void get_row(uint32_t row_index, RowCursor* cursor) const { cursor->attach(_mem_buf + row_index * _mem_row_bytes); + // set current row whether it is deleted + cursor->set_is_delete(_delete_bitmap->contains(row_index)); } template<typename RowType> @@ -93,6 +95,10 @@ public: return _mem_pool.get(); } + Roaring* get_delete_bitmap() const { + return _delete_bitmap.get(); + } + // 重用rowblock之前需调用clear,恢复到init之后的原始状态 void clear(); @@ -139,6 +145,8 @@ private: std::unique_ptr<MemTracker> _tracker; std::unique_ptr<MemPool> _mem_pool; + // delete bitmap which records deleted rows + std::unique_ptr<Roaring> _delete_bitmap; // 由于内部持有内存资源,所以这里禁止拷贝和赋值 DISALLOW_COPY_AND_ASSIGN(RowBlock); }; diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 1ab8782..66d480e 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -32,7 +32,8 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity) _column_datas(_schema.num_columns(), nullptr), _column_null_bitmaps(_schema.num_columns(), nullptr), _pool(new MemPool(&_tracker)), - _selection_vector(nullptr) { + _selection_vector(nullptr), + _delete_bitmap(new Roaring()) { auto bitmap_size = BitmapSize(capacity); for (auto cid : _schema.column_ids()) { size_t data_size = _schema.column(cid)->type_info()->size() * _capacity; @@ -68,7 +69,8 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) { helper->set_null(cid); } else { helper->set_not_null(cid); - helper->set_field_content_shallow(cid, + helper->set_field_content_shallow( + cid, reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx))); } } @@ -77,11 +79,13 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) { uint16_t row_idx = _selection_vector[i]; dst->get_row(i, helper); helper->set_not_null(cid); - helper->set_field_content_shallow(cid, - reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx))); + helper->set_field_content_shallow( + cid, reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx))); } } } + + dst->get_delete_bitmap()->swap(*(_delete_bitmap.get())); // swap MemPool to copy string content dst->mem_pool()->exchange_data(_pool.get()); dst->set_pos(0); diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h index 6716592..c22eca8 100644 --- a/be/src/olap/row_block2.h +++ b/be/src/olap/row_block2.h @@ -59,6 +59,7 @@ public: // all previously returned ColumnBlocks are invalidated after clear(), accessing them // will result in undefined behavior. void clear() { + _delete_bitmap.reset(); _num_rows = 0; _pool->clear(); _selected_size = _capacity; @@ -110,6 +111,11 @@ public: _delete_state = delete_state; } + // get delete bitmap + Roaring* get_delete_bitmap() { + return _delete_bitmap.get(); + } + private: Schema _schema; size_t _capacity; @@ -133,6 +139,8 @@ private: // block delete state DelCondSatisfied _delete_state; + // delete bit map + std::unique_ptr<Roaring> _delete_bitmap; }; // Stands for a row in RowBlockV2. It is consisted of a RowBlockV2 reference diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp index 9fad7ba..22533de 100644 --- a/be/src/olap/row_cursor.cpp +++ b/be/src/olap/row_cursor.cpp @@ -30,7 +30,8 @@ using std::vector; namespace doris { RowCursor::RowCursor() : _fixed_len(0), - _variable_len(0) {} + _variable_len(0), + _is_delete(false) {} RowCursor::~RowCursor() { delete [] _owned_fixed_buf; diff --git a/be/src/olap/row_cursor.h b/be/src/olap/row_cursor.h index eb4e537..fb77fe7 100644 --- a/be/src/olap/row_cursor.h +++ b/be/src/olap/row_cursor.h @@ -74,6 +74,10 @@ public: column_schema(index)->to_index(&dst_cell, cell(index)); } + void set_is_delete(bool is_delete) { _is_delete = is_delete; } + + bool is_delete() { return _is_delete; } + // deep copy field content (ignore null-byte) void set_field_content(size_t index, const char* buf, MemPool* mem_pool) { char* dest = cell_ptr(index); @@ -166,6 +170,9 @@ private: char* _variable_buf = nullptr; size_t _variable_len; + // current row is deleted + bool _is_delete; + DISALLOW_COPY_AND_ASSIGN(RowCursor); }; } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 002f056..b132065 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -80,6 +80,8 @@ Status Segment::new_iterator(const Schema& schema, } RETURN_IF_ERROR(_load_index()); + RETURN_IF_ERROR(_load_delete_index()); + iter->reset(new SegmentIterator(this->shared_from_this(), schema)); iter->get()->init(read_options); return Status::OK(); @@ -157,6 +159,36 @@ Status Segment::_load_index() { }); } +Status Segment::_load_delete_index() { + return _delete_index_once.call([this] { + if (!_footer.has_delete_index_page()) { + _delete_index_decoder.reset(new DeleteBitmapIndexDecoder(true)); + Status::OK(); + } + + // read and parse delete key index page + std::unique_ptr<fs::ReadableBlock> rblock; + fs::BlockManager* block_mgr = fs::fs_util::block_manager(); + RETURN_IF_ERROR(block_mgr->open_block(_fname, &rblock)); + + PageReadOptions opts; + opts.rblock = rblock.get(); + opts.page_pointer = PagePointer(_footer.delete_index_page()); + opts.codec = nullptr; // delete key index page uses NO_COMPRESSION for now + OlapReaderStatistics tmp_stats; + opts.stats = &tmp_stats; + + Slice body; + PageFooterPB footer; + RETURN_IF_ERROR(PageIO::read_and_decompress_page(opts, &_sk_index_handle, &body, &footer)); + DCHECK_EQ(footer.type(), SHORT_KEY_PAGE); + DCHECK(footer.has_delete_index_page_footer()); + + _delete_index_decoder.reset(new DeleteBitmapIndexDecoder()); + return _delete_index_decoder->parse(body, footer.delete_index_page_footer()); + }); +} + Status Segment::_create_column_readers() { for (uint32_t ordinal = 0; ordinal < _footer.columns().size(); ++ordinal) { auto& column_pb = _footer.columns(ordinal); diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 78539fa..d279489 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -28,6 +28,8 @@ #include "olap/iterators.h" #include "olap/rowset/segment_v2/page_handle.h" #include "olap/short_key_index.h" +#include "olap/delete_bitmap_index.h" + #include "olap/tablet_schema.h" #include "util/faststring.h" #include "util/once.h" @@ -95,6 +97,11 @@ public: return _sk_index_decoder->upper_bound(key); } + DeleteBitmapIndexIterator delete_index_iterator() const { + DCHECK(_delete_index_once.has_called() && _delete_index_once.stored_result().ok()); + return _delete_index_decoder->get_iterator(); + } + // This will return the last row block in this segment. // NOTE: Before call this function , client should assure that // this segment is not empty. @@ -120,6 +127,8 @@ private: // May be called multiple times, subsequent calls will no op. Status _load_index(); + Status _load_delete_index(); + private: friend class SegmentIterator; std::string _fname; @@ -140,10 +149,16 @@ private: // used to guarantee that short key index will be loaded at most once in a thread-safe way DorisCallOnce<Status> _load_index_once; + + // used to guarantee that delete index will be loaded at most once in a thread-safe way + DorisCallOnce<Status> _delete_index_once; // used to hold short key index page in memory PageHandle _sk_index_handle; // short key index decoder std::unique_ptr<ShortKeyIndexDecoder> _sk_index_decoder; + // delete index decoder + std::unique_ptr<DeleteBitmapIndexDecoder> _delete_index_decoder; + }; } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index ec8bb47..b039e81 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -561,6 +561,28 @@ Status SegmentIterator::next_batch(RowBlockV2* block) { i += range_size; } } + + // phase 4: read delete index, fill in the row whether is delete. + { + Roaring* current_bitmap = block->get_delete_bitmap(); + + // fetch delete index + const Roaring& delete_bitmap = _segment->delete_index_iterator().get_delete_bitmap(); + + const uint16_t* sv = block->selection_vector(); + const uint16_t sv_size = block->selected_size(); + uint16_t i = 0; + // check the delete rows and fill in the current_bitmap + // which use i in selection_vector as rowid + while (i < sv_size) { + + if(delete_bitmap.contains(_block_rowids[sv[i]])) { + current_bitmap->add(i); + } + i++; + } + } + return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index a977407..a9f7aba 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -77,6 +77,7 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec __attribute__((unused)) _column_writers.push_back(std::move(writer)); } _index_builder.reset(new ShortKeyIndexBuilder(_segment_id, _opts.num_rows_per_block)); + _delete_bitmap_builder.reset(new DeleteBitmapIndexBuilder()); return Status::OK(); } @@ -111,6 +112,7 @@ uint64_t SegmentWriter::estimate_segment_size() { size += column_writer->estimate_buffer_size(); } size += _index_builder->size(); + size += _delete_bitmap_builder->size(); return size; } @@ -125,6 +127,7 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size RETURN_IF_ERROR(_write_bitmap_index()); RETURN_IF_ERROR(_write_bloom_filter_index()); RETURN_IF_ERROR(_write_short_key_index()); + RETURN_IF_ERROR(_write_delete_index()); *index_size = _wblock->bytes_appended() - index_offset; RETURN_IF_ERROR(_write_footer()); RETURN_IF_ERROR(_wblock->finalize()); @@ -180,6 +183,17 @@ Status SegmentWriter::_write_short_key_index() { return Status::OK(); } +Status SegmentWriter::_write_delete_index() { + std::vector<Slice> body; + PageFooterPB footer; + RETURN_IF_ERROR(_delete_bitmap_builder->finalize(&body, &footer)); + PagePointer pp; + // delete index page is not compressed right now + RETURN_IF_ERROR(PageIO::write_page(_wblock, body, footer, &pp)); + pp.to_proto(_footer.mutable_delete_index_page()); + return Status::OK(); +} + Status SegmentWriter::_write_footer() { _footer.set_num_rows(_row_count); diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index 4703f68..8eee044 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -25,6 +25,7 @@ #include "common/status.h" // Status #include "gen_cpp/segment_v2.pb.h" #include "gutil/macros.h" +#include "olap/delete_bitmap_index.h" namespace doris { @@ -75,6 +76,7 @@ private: Status _write_bitmap_index(); Status _write_bloom_filter_index(); Status _write_short_key_index(); + Status _write_delete_index(); Status _write_footer(); Status _write_raw_data(const std::vector<Slice>& slices); @@ -88,6 +90,7 @@ private: SegmentFooterPB _footer; std::unique_ptr<ShortKeyIndexBuilder> _index_builder; + std::unique_ptr<DeleteBitmapIndexBuilder> _delete_bitmap_builder; std::vector<std::unique_ptr<ColumnWriter>> _column_writers; uint32_t _row_count = 0; }; diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt index 76e0569..2e500af 100644 --- a/be/test/olap/CMakeLists.txt +++ b/be/test/olap/CMakeLists.txt @@ -37,6 +37,7 @@ ADD_BE_TEST(in_list_predicate_test) ADD_BE_TEST(null_predicate_test) ADD_BE_TEST(file_helper_test) ADD_BE_TEST(file_utils_test) +ADD_BE_TEST(delete_bitmap_index_test) ADD_BE_TEST(delete_handler_test) ADD_BE_TEST(column_reader_test) ADD_BE_TEST(schema_change_test) diff --git a/be/test/olap/delete_bitmap_index_test.cpp b/be/test/olap/delete_bitmap_index_test.cpp new file mode 100644 index 0000000..ffc9386 --- /dev/null +++ b/be/test/olap/delete_bitmap_index_test.cpp @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/delete_bitmap_index.h" + +#include <gtest/gtest.h> + +#include "olap/tablet_schema_helper.h" +#include "olap/row_cursor.h" +#include "util/debug_util.h" + +namespace doris { + +class DeleteBitmapIndexTest : public testing::Test { +public: + DeleteBitmapIndexTest() { } + virtual ~DeleteBitmapIndexTest() { + } +}; + +TEST_F(DeleteBitmapIndexTest, buider) { + DeleteBitmapIndexBuilder builder; + + int num_items = 0; + for (int i = 1000; i < 10000; i += 2) { + builder.add_delete_item(i); + num_items++; + } + std::vector<Slice> slices; + segment_v2::PageFooterPB footer; + auto st = builder.finalize(&slices, &footer); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(segment_v2::DELETE_INDEX_PAGE, footer.type()); + ASSERT_EQ(num_items, footer.delete_index_page_footer().num_items()); + + std::string buf; + for (auto& slice : slices) { + buf.append(slice.data, slice.size); + } + + DeleteBitmapIndexDecoder decoder; + st = decoder.parse(buf, footer.delete_index_page_footer()); + ASSERT_TRUE(st.ok()); + + auto& bitmap = decoder.get_iterator().get_delete_bitmap(); + { + ASSERT_TRUE(bitmap.contains(1002)); + } + { + ASSERT_TRUE(!bitmap.contains(1003)); + } + { + ASSERT_TRUE(!bitmap.contains(5003)); + } + { + ASSERT_TRUE(bitmap.contains(5002)); + } +} +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/docs/zh-CN/internal/doris_storage_optimization.md b/docs/zh-CN/internal/doris_storage_optimization.md index 3d65388..a2a49f5 100644 --- a/docs/zh-CN/internal/doris_storage_optimization.md +++ b/docs/zh-CN/internal/doris_storage_optimization.md @@ -163,17 +163,18 @@ message ColumnMetaPB { repeated MetadataPairPB column_meta_datas; } -message FileFooterPB { +message SegmentFooterPB { optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用 repeated ColumnPB schema = 5; // 列Schema - optional uint64 num_values = 4; // 文件中保存的行数 - optional uint64 index_footprint = 7; // 索引大小 - optional uint64 data_footprint = 8; // 数据大小 + optional uint64 num_values = 4; // 文件中保存的行数 + optional uint64 index_footprint = 7; // 索引大小 + optional uint64 data_footprint = 8; // 数据大小 optional uint64 raw_data_footprint = 8; // 原始数据大小 - optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式 - repeated ColumnMetaPB column_metas = 10; // 列元数据 + optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式 + repeated ColumnMetaPB column_metas = 10; // 列元数据 optional PagePointerPB key_index_page; // short key索引page + optional PagePointerPB delete_index_page; // 删除索引page } ``` diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index 93c3f19..9ed7c2d 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -59,6 +59,7 @@ enum PageTypePB { INDEX_PAGE = 2; DICTIONARY_PAGE = 3; SHORT_KEY_PAGE = 4; + DELETE_INDEX_PAGE = 5; } message DataPageFooterPB { @@ -106,6 +107,13 @@ message ShortKeyFooterPB { optional uint32 num_segment_rows = 6; } +message DeleteIndexFooterPB { + // How many index item in this index. + optional uint32 num_items = 1; + // The total bytes occupied by the delete index + optional uint32 content_bytes = 2; +} + message PageFooterPB { // required: indicates which of the *_footer fields is set optional PageTypePB type = 1; @@ -120,6 +128,8 @@ message PageFooterPB { optional DictPageFooterPB dict_page_footer = 9; // present only when type == SHORT_KEY_PAGE optional ShortKeyFooterPB short_key_page_footer = 10; + // present only when type == DELETE_INDEX_PAGE + optional DeleteIndexFooterPB delete_index_page_footer = 11; } message ZoneMapPB { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org