This is an automated email from the ASF dual-hosted git repository.

yangzhg pushed a commit to branch support_batch_delete_in_fe
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/support_batch_delete_in_fe by 
this push:
     new f2b2a0a  Add delete bitmap index and base operation (#4242)
f2b2a0a is described below

commit f2b2a0a70bc6d1fed3f5d4f853ede10b35a53f92
Author: ZhangYu0123 <67053339+zhangyu0...@users.noreply.github.com>
AuthorDate: Tue Aug 4 14:29:33 2020 +0800

    Add delete bitmap index and base operation (#4242)
    
    * add delete bitmap index
    
    * delete index for batch delete
    
    * delete index for batch delete
    
    * add delete bitmap index
    
    * add delete bitmap index
---
 be/src/olap/CMakeLists.txt                         |   1 +
 be/src/olap/delete_bitmap_index.cpp                |  72 +++++++++++++
 be/src/olap/delete_bitmap_index.h                  | 118 +++++++++++++++++++++
 be/src/olap/row_block.cpp                          |   2 +
 be/src/olap/row_block.h                            |   8 ++
 be/src/olap/row_block2.cpp                         |  12 ++-
 be/src/olap/row_block2.h                           |   8 ++
 be/src/olap/row_cursor.cpp                         |   3 +-
 be/src/olap/row_cursor.h                           |   7 ++
 be/src/olap/rowset/segment_v2/segment.cpp          |  32 ++++++
 be/src/olap/rowset/segment_v2/segment.h            |  15 +++
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  22 ++++
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |  14 +++
 be/src/olap/rowset/segment_v2/segment_writer.h     |   3 +
 be/test/olap/CMakeLists.txt                        |   1 +
 be/test/olap/delete_bitmap_index_test.cpp          |  79 ++++++++++++++
 docs/zh-CN/internal/doris_storage_optimization.md  |  13 +--
 gensrc/proto/segment_v2.proto                      |  10 ++
 18 files changed, 409 insertions(+), 11 deletions(-)

diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
index 3705f59..99da5ef 100644
--- a/be/src/olap/CMakeLists.txt
+++ b/be/src/olap/CMakeLists.txt
@@ -71,6 +71,7 @@ add_library(Olap STATIC
     data_dir.cpp
     row.cpp
     short_key_index.cpp
+    delete_bitmap_index.cpp
     snapshot_manager.cpp
     stream_index_common.cpp
     stream_index_reader.cpp
diff --git a/be/src/olap/delete_bitmap_index.cpp 
b/be/src/olap/delete_bitmap_index.cpp
new file mode 100644
index 0000000..92f9e02
--- /dev/null
+++ b/be/src/olap/delete_bitmap_index.cpp
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/delete_bitmap_index.h"
+
+#include <string>
+#include "gutil/strings/substitute.h"
+
+using strings::Substitute;
+
+namespace doris {
+
+Status DeleteBitmapIndexBuilder::add_delete_item(const uint32_t& _row_count) {
+    _delete_bitmap.add(_row_count);
+    _num_items++;
+    return Status::OK();
+}
+
+Status DeleteBitmapIndexBuilder::finalize(std::vector<Slice>* body,
+                                          segment_v2::PageFooterPB* 
page_footer) {
+    // get the size after serialize
+    _delete_bitmap.runOptimize();
+    uint32_t size = _delete_bitmap.getSizeInBytes(false);
+    // fill in bitmap index page
+    page_footer->set_type(segment_v2::DELETE_INDEX_PAGE);
+    page_footer->set_uncompressed_size(size);
+
+    segment_v2::DeleteIndexFooterPB* footer = 
page_footer->mutable_delete_index_page_footer();
+    footer->set_num_items(_num_items);
+    footer->set_content_bytes(size);
+
+    // write bitmap to slice as return
+    _buf.resize(size);
+    _delete_bitmap.write(reinterpret_cast<char*>(_buf.data()), false);
+    body->emplace_back(_buf);
+    return Status::OK();
+}
+
+Status DeleteBitmapIndexDecoder::parse(const Slice& body, const 
segment_v2::DeleteIndexFooterPB& footer) {
+    _footer = footer;
+    // check if body size match footer's information
+    if (body.size != (_footer.content_bytes())) {
+        return Status::Corruption(Substitute("Index size not match, need=$0, 
real=$1",
+                                             _footer.content_bytes(), 
body.size));
+    }
+    // set index buffer
+    Slice index_data(body.data, _footer.content_bytes());
+    // load delete bitmap
+    _delete_bitmap = Roaring::read(index_data.data, false);
+
+    _parsed = true;
+    return Status::OK();
+}
+
+const Roaring& DeleteBitmapIndexIterator:: get_delete_bitmap() const{
+    return _decoder->get_delete_bitmap();
+}
+}
diff --git a/be/src/olap/delete_bitmap_index.h 
b/be/src/olap/delete_bitmap_index.h
new file mode 100644
index 0000000..5d1c553
--- /dev/null
+++ b/be/src/olap/delete_bitmap_index.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <vector>
+#include <vector>
+#include <roaring/roaring.hh>
+
+#include "common/status.h"
+#include "gen_cpp/segment_v2.pb.h"
+#include "util/faststring.h"
+#include "util/slice.h"
+
+#include "util/debug_util.h"
+
+namespace doris {
+
+class DeleteBitmapIndexIterator;
+class DeleteBitmapIndexDecoder;
+
+/// This class is a builder which can build delete bitmap index. SegmentWriter 
can use it to generate
+/// delete bitmap index page and save it in segment.
+class DeleteBitmapIndexBuilder {
+public:
+    /// Construction function of DeleteBitmapIndexBuilder
+    DeleteBitmapIndexBuilder() : _num_items(0) {
+    }
+
+    /// Add delete item to delete bitmap index
+    Status add_delete_item(const uint32_t& _row_count);
+
+    /// How many bytes are required to serialize this bitmap
+    uint64_t size() {
+        return _delete_bitmap.getSizeInBytes(false);
+    }
+
+    /// When the segment flush, use finalize function to flush index data to 
slice to generate index page 
+    /// and fill the page footer record meta.
+    Status finalize(std::vector<Slice>* body, segment_v2::PageFooterPB* 
footer);
+
+private:
+    /// the number of delete items in delete bitmap index
+    uint32_t _num_items;
+
+    /// roaring bitmap to record rowids of delete items 
+    Roaring _delete_bitmap;
+
+    faststring _buf;
+};
+
+/// An Iterator to iterate one delete bitmap index.
+/// Client can use this class to access the bitmap.
+class DeleteBitmapIndexIterator {
+public:
+    /// Construction function of DeleteBitmapIndexBuilder
+    DeleteBitmapIndexIterator(const DeleteBitmapIndexDecoder* decoder)
+            : _decoder(decoder) {}
+
+    /// get const delete bitmap to access delete bitmap record
+    const Roaring& get_delete_bitmap() const;
+
+private:
+    const DeleteBitmapIndexDecoder* _decoder;
+};
+
+/// Used to decode bitmap ordinal to footer and encoded index data.
+/// Usage:
+///      DeleteBitmapIndexDecoder decoder;
+///      decoder.parse(body, footer);
+class DeleteBitmapIndexDecoder {
+public:
+    DeleteBitmapIndexDecoder(bool parsed = false) : _parsed(parsed), 
_delete_bitmap() {}
+
+    /// client should assure that body is available when this class is used
+    Status parse(const Slice& body, const segment_v2::DeleteIndexFooterPB& 
footer);
+
+    /// The number of delete items in delete bitmap index
+    uint32_t num_items() const {
+        DCHECK(_parsed);
+        return _footer.num_items();
+    }
+
+    /// Get the iterator of DeleteBitmapIndex
+    DeleteBitmapIndexIterator get_iterator() const { 
+        DCHECK(_parsed);
+        return {this};
+    }
+
+    /// get const delete bitmap to access delete bitmap record
+    const Roaring& get_delete_bitmap() const { return _delete_bitmap; }
+
+private:
+    bool _parsed;
+
+    // All following fields are only valid after parse has been executed 
successfully
+    segment_v2::DeleteIndexFooterPB _footer;
+    Roaring _delete_bitmap;
+};
+
+}
diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp
index 09cf48b..e20d1a4 100644
--- a/be/src/olap/row_block.cpp
+++ b/be/src/olap/row_block.cpp
@@ -52,6 +52,7 @@ OLAPStatus RowBlock::init(const RowBlockInfo& block_info) {
     _info = block_info;
     _null_supported = block_info.null_supported;
     _capacity = _info.row_num;
+    _delete_bitmap = std::unique_ptr<Roaring>(new Roaring());
     _compute_layout();
     _mem_buf = new char[_mem_buf_bytes];
     return OLAP_SUCCESS;
@@ -75,6 +76,7 @@ void RowBlock::clear() {
     _pos = 0;
     _limit = 0;
     _mem_pool->clear();
+    _delete_bitmap.reset();
 }
 
 void RowBlock::_compute_layout() {
diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h
index c9d277f..a439338 100644
--- a/be/src/olap/row_block.h
+++ b/be/src/olap/row_block.h
@@ -68,6 +68,8 @@ public:
 
     inline void get_row(uint32_t row_index, RowCursor* cursor) const {
         cursor->attach(_mem_buf + row_index * _mem_row_bytes);
+        // set current row whether it is deleted
+        cursor->set_is_delete(_delete_bitmap->contains(row_index));
     }
 
     template<typename RowType>
@@ -93,6 +95,10 @@ public:
         return _mem_pool.get();
     }
 
+    Roaring* get_delete_bitmap() const { 
+        return _delete_bitmap.get();
+    }
+
     // 重用rowblock之前需调用clear,恢复到init之后的原始状态
     void clear();
 
@@ -139,6 +145,8 @@ private:
 
     std::unique_ptr<MemTracker> _tracker;
     std::unique_ptr<MemPool> _mem_pool;
+    // delete bitmap which records deleted rows
+    std::unique_ptr<Roaring> _delete_bitmap;
     // 由于内部持有内存资源,所以这里禁止拷贝和赋值
     DISALLOW_COPY_AND_ASSIGN(RowBlock);
 };
diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 1ab8782..66d480e 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -32,7 +32,8 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t 
capacity)
       _column_datas(_schema.num_columns(), nullptr),
       _column_null_bitmaps(_schema.num_columns(), nullptr),
       _pool(new MemPool(&_tracker)),
-      _selection_vector(nullptr) {
+      _selection_vector(nullptr),
+      _delete_bitmap(new Roaring()) {
     auto bitmap_size = BitmapSize(capacity);
     for (auto cid : _schema.column_ids()) {
         size_t data_size = _schema.column(cid)->type_info()->size() * 
_capacity;
@@ -68,7 +69,8 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, 
RowBlock* dst) {
                     helper->set_null(cid);
                 } else {
                     helper->set_not_null(cid);
-                    helper->set_field_content_shallow(cid,
+                    helper->set_field_content_shallow(
+                            cid,
                             reinterpret_cast<const 
char*>(column_block(cid).cell_ptr(row_idx)));
                 }
             }
@@ -77,11 +79,13 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, 
RowBlock* dst) {
                 uint16_t row_idx = _selection_vector[i];
                 dst->get_row(i, helper);
                 helper->set_not_null(cid);
-                helper->set_field_content_shallow(cid,
-                        reinterpret_cast<const 
char*>(column_block(cid).cell_ptr(row_idx)));
+                helper->set_field_content_shallow(
+                        cid, reinterpret_cast<const 
char*>(column_block(cid).cell_ptr(row_idx)));
             }
         }
     }
+
+    dst->get_delete_bitmap()->swap(*(_delete_bitmap.get()));
     // swap MemPool to copy string content
     dst->mem_pool()->exchange_data(_pool.get());
     dst->set_pos(0);
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index 6716592..c22eca8 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -59,6 +59,7 @@ public:
     // all previously returned ColumnBlocks are invalidated after clear(), 
accessing them
     // will result in undefined behavior.
     void clear() {
+        _delete_bitmap.reset();
         _num_rows = 0;
         _pool->clear();
         _selected_size = _capacity;
@@ -110,6 +111,11 @@ public:
         _delete_state = delete_state;
     }
 
+    // get delete bitmap
+    Roaring* get_delete_bitmap() {
+        return _delete_bitmap.get();
+    }
+
 private:
     Schema _schema;
     size_t _capacity;
@@ -133,6 +139,8 @@ private:
 
     // block delete state
     DelCondSatisfied _delete_state;
+    // delete bit map
+    std::unique_ptr<Roaring> _delete_bitmap;
 };
 
 // Stands for a row in RowBlockV2. It is consisted of a RowBlockV2 reference
diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp
index 9fad7ba..22533de 100644
--- a/be/src/olap/row_cursor.cpp
+++ b/be/src/olap/row_cursor.cpp
@@ -30,7 +30,8 @@ using std::vector;
 namespace doris {
 RowCursor::RowCursor() :
         _fixed_len(0),
-        _variable_len(0) {}
+        _variable_len(0),
+        _is_delete(false) {}
 
 RowCursor::~RowCursor() {
     delete [] _owned_fixed_buf;
diff --git a/be/src/olap/row_cursor.h b/be/src/olap/row_cursor.h
index eb4e537..fb77fe7 100644
--- a/be/src/olap/row_cursor.h
+++ b/be/src/olap/row_cursor.h
@@ -74,6 +74,10 @@ public:
         column_schema(index)->to_index(&dst_cell, cell(index));
     }
 
+    void set_is_delete(bool is_delete) { _is_delete = is_delete; }
+
+    bool is_delete() { return _is_delete; }
+
     // deep copy field content (ignore null-byte)
     void set_field_content(size_t index, const char* buf, MemPool* mem_pool) {
         char* dest = cell_ptr(index);
@@ -166,6 +170,9 @@ private:
     char* _variable_buf = nullptr;
     size_t _variable_len;
 
+    // current row is deleted
+    bool _is_delete;
+
     DISALLOW_COPY_AND_ASSIGN(RowCursor);
 };
 }  // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 002f056..b132065 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -80,6 +80,8 @@ Status Segment::new_iterator(const Schema& schema,
     }
 
     RETURN_IF_ERROR(_load_index());
+    RETURN_IF_ERROR(_load_delete_index());
+
     iter->reset(new SegmentIterator(this->shared_from_this(), schema));
     iter->get()->init(read_options);
     return Status::OK();
@@ -157,6 +159,36 @@ Status Segment::_load_index() {
     });
 }
 
+Status Segment::_load_delete_index() {
+    return _delete_index_once.call([this] {
+        if (!_footer.has_delete_index_page()) {
+            _delete_index_decoder.reset(new DeleteBitmapIndexDecoder(true));
+            Status::OK(); 
+        }
+
+        // read and parse delete key index page
+        std::unique_ptr<fs::ReadableBlock> rblock;
+        fs::BlockManager* block_mgr = fs::fs_util::block_manager();
+        RETURN_IF_ERROR(block_mgr->open_block(_fname, &rblock));
+
+        PageReadOptions opts;
+        opts.rblock = rblock.get();
+        opts.page_pointer = PagePointer(_footer.delete_index_page());
+        opts.codec = nullptr; // delete key index page uses NO_COMPRESSION for 
now
+        OlapReaderStatistics tmp_stats;
+        opts.stats = &tmp_stats;
+
+        Slice body;
+        PageFooterPB footer;
+        RETURN_IF_ERROR(PageIO::read_and_decompress_page(opts, 
&_sk_index_handle, &body, &footer));
+        DCHECK_EQ(footer.type(), SHORT_KEY_PAGE);
+        DCHECK(footer.has_delete_index_page_footer());
+
+        _delete_index_decoder.reset(new DeleteBitmapIndexDecoder());
+        return _delete_index_decoder->parse(body, 
footer.delete_index_page_footer());
+    });
+}
+
 Status Segment::_create_column_readers() {
     for (uint32_t ordinal = 0; ordinal < _footer.columns().size(); ++ordinal) {
         auto& column_pb = _footer.columns(ordinal);
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index 78539fa..d279489 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -28,6 +28,8 @@
 #include "olap/iterators.h"
 #include "olap/rowset/segment_v2/page_handle.h"
 #include "olap/short_key_index.h"
+#include "olap/delete_bitmap_index.h"
+
 #include "olap/tablet_schema.h"
 #include "util/faststring.h"
 #include "util/once.h"
@@ -95,6 +97,11 @@ public:
         return _sk_index_decoder->upper_bound(key);
     }
 
+    DeleteBitmapIndexIterator delete_index_iterator() const { 
+        DCHECK(_delete_index_once.has_called() && 
_delete_index_once.stored_result().ok());
+        return _delete_index_decoder->get_iterator();
+    }
+
     // This will return the last row block in this segment.
     // NOTE: Before call this function , client should assure that
     // this segment is not empty.
@@ -120,6 +127,8 @@ private:
     // May be called multiple times, subsequent calls will no op.
     Status _load_index();
 
+    Status _load_delete_index();
+
 private:
     friend class SegmentIterator;
     std::string _fname;
@@ -140,10 +149,16 @@ private:
 
     // used to guarantee that short key index will be loaded at most once in a 
thread-safe way
     DorisCallOnce<Status> _load_index_once;
+
+    // used to guarantee that delete index will be loaded at most once in a 
thread-safe way
+    DorisCallOnce<Status> _delete_index_once;
     // used to hold short key index page in memory
     PageHandle _sk_index_handle;
     // short key index decoder
     std::unique_ptr<ShortKeyIndexDecoder> _sk_index_decoder;
+    // delete index decoder
+    std::unique_ptr<DeleteBitmapIndexDecoder> _delete_index_decoder;
+
 };
 
 }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index ec8bb47..b039e81 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -561,6 +561,28 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
             i += range_size;
         }
     }
+
+    // phase 4: read delete index, fill in the row whether is delete.
+    {
+        Roaring* current_bitmap = block->get_delete_bitmap();
+
+        // fetch delete index
+        const Roaring& delete_bitmap = 
_segment->delete_index_iterator().get_delete_bitmap();
+
+        const uint16_t* sv = block->selection_vector();
+        const uint16_t sv_size = block->selected_size();
+        uint16_t i = 0;
+        // check the delete rows and fill in the current_bitmap
+        // which use i in selection_vector as rowid 
+        while (i < sv_size) {
+
+            if(delete_bitmap.contains(_block_rowids[sv[i]])) {
+                current_bitmap->add(i);
+            }
+            i++;
+        }
+    }
+
     return Status::OK();
 }
 
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index a977407..a9f7aba 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -77,6 +77,7 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec 
__attribute__((unused))
         _column_writers.push_back(std::move(writer));
     }
     _index_builder.reset(new ShortKeyIndexBuilder(_segment_id, 
_opts.num_rows_per_block));
+    _delete_bitmap_builder.reset(new DeleteBitmapIndexBuilder());
     return Status::OK();
 }
 
@@ -111,6 +112,7 @@ uint64_t SegmentWriter::estimate_segment_size() {
         size += column_writer->estimate_buffer_size();
     }
     size += _index_builder->size();
+    size += _delete_bitmap_builder->size();
     return size;
 }
 
@@ -125,6 +127,7 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, 
uint64_t* index_size
     RETURN_IF_ERROR(_write_bitmap_index());
     RETURN_IF_ERROR(_write_bloom_filter_index());
     RETURN_IF_ERROR(_write_short_key_index());
+    RETURN_IF_ERROR(_write_delete_index());
     *index_size = _wblock->bytes_appended() - index_offset;
     RETURN_IF_ERROR(_write_footer());
     RETURN_IF_ERROR(_wblock->finalize());
@@ -180,6 +183,17 @@ Status SegmentWriter::_write_short_key_index() {
     return Status::OK();
 }
 
+Status SegmentWriter::_write_delete_index() {
+    std::vector<Slice> body;
+    PageFooterPB footer;
+    RETURN_IF_ERROR(_delete_bitmap_builder->finalize(&body, &footer));
+    PagePointer pp;
+    // delete index page is not compressed right now
+    RETURN_IF_ERROR(PageIO::write_page(_wblock, body, footer, &pp));
+    pp.to_proto(_footer.mutable_delete_index_page());
+    return Status::OK();
+}
+
 Status SegmentWriter::_write_footer() {
     _footer.set_num_rows(_row_count);
 
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h 
b/be/src/olap/rowset/segment_v2/segment_writer.h
index 4703f68..8eee044 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -25,6 +25,7 @@
 #include "common/status.h" // Status
 #include "gen_cpp/segment_v2.pb.h"
 #include "gutil/macros.h"
+#include "olap/delete_bitmap_index.h"
 
 namespace doris {
 
@@ -75,6 +76,7 @@ private:
     Status _write_bitmap_index();
     Status _write_bloom_filter_index();
     Status _write_short_key_index();
+    Status _write_delete_index();
     Status _write_footer();
     Status _write_raw_data(const std::vector<Slice>& slices);
 
@@ -88,6 +90,7 @@ private:
 
     SegmentFooterPB _footer;
     std::unique_ptr<ShortKeyIndexBuilder> _index_builder;
+    std::unique_ptr<DeleteBitmapIndexBuilder> _delete_bitmap_builder;
     std::vector<std::unique_ptr<ColumnWriter>> _column_writers;
     uint32_t _row_count = 0;
 };
diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt
index 76e0569..2e500af 100644
--- a/be/test/olap/CMakeLists.txt
+++ b/be/test/olap/CMakeLists.txt
@@ -37,6 +37,7 @@ ADD_BE_TEST(in_list_predicate_test)
 ADD_BE_TEST(null_predicate_test)
 ADD_BE_TEST(file_helper_test)
 ADD_BE_TEST(file_utils_test)
+ADD_BE_TEST(delete_bitmap_index_test)
 ADD_BE_TEST(delete_handler_test)
 ADD_BE_TEST(column_reader_test)
 ADD_BE_TEST(schema_change_test)
diff --git a/be/test/olap/delete_bitmap_index_test.cpp 
b/be/test/olap/delete_bitmap_index_test.cpp
new file mode 100644
index 0000000..ffc9386
--- /dev/null
+++ b/be/test/olap/delete_bitmap_index_test.cpp
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/delete_bitmap_index.h"
+
+#include <gtest/gtest.h>
+
+#include "olap/tablet_schema_helper.h"
+#include "olap/row_cursor.h"
+#include "util/debug_util.h"
+
+namespace doris {
+
+class DeleteBitmapIndexTest : public testing::Test {
+public:
+    DeleteBitmapIndexTest() { }
+    virtual ~DeleteBitmapIndexTest() {
+    }
+};
+
+TEST_F(DeleteBitmapIndexTest, buider) {
+    DeleteBitmapIndexBuilder builder;
+
+    int num_items = 0;
+    for (int i = 1000; i < 10000; i += 2) {
+        builder.add_delete_item(i);
+        num_items++;
+    }
+    std::vector<Slice> slices;
+    segment_v2::PageFooterPB footer;
+    auto st = builder.finalize(&slices, &footer);
+    ASSERT_TRUE(st.ok());
+    ASSERT_EQ(segment_v2::DELETE_INDEX_PAGE, footer.type());
+    ASSERT_EQ(num_items, footer.delete_index_page_footer().num_items());
+    
+    std::string buf;
+    for (auto& slice : slices) {
+        buf.append(slice.data, slice.size);
+    }
+
+    DeleteBitmapIndexDecoder decoder;
+    st = decoder.parse(buf, footer.delete_index_page_footer());
+    ASSERT_TRUE(st.ok());
+
+    auto& bitmap = decoder.get_iterator().get_delete_bitmap();
+    {
+        ASSERT_TRUE(bitmap.contains(1002));
+    }
+    {
+        ASSERT_TRUE(!bitmap.contains(1003));
+    }
+    {
+        ASSERT_TRUE(!bitmap.contains(5003));
+    }
+    {
+        ASSERT_TRUE(bitmap.contains(5002));
+    }
+}
+}
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
+
diff --git a/docs/zh-CN/internal/doris_storage_optimization.md 
b/docs/zh-CN/internal/doris_storage_optimization.md
index 3d65388..a2a49f5 100644
--- a/docs/zh-CN/internal/doris_storage_optimization.md
+++ b/docs/zh-CN/internal/doris_storage_optimization.md
@@ -163,17 +163,18 @@ message ColumnMetaPB {
        repeated MetadataPairPB column_meta_datas;
 }
 
-message FileFooterPB {
+message SegmentFooterPB {
        optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
        repeated ColumnPB schema = 5; // 列Schema
-    optional uint64 num_values = 4; // 文件中保存的行数
-    optional uint64 index_footprint = 7; // 索引大小
-    optional uint64 data_footprint = 8; // 数据大小
+  optional uint64 num_values = 4; // 文件中保存的行数
+  optional uint64 index_footprint = 7; // 索引大小
+  optional uint64 data_footprint = 8; // 数据大小
        optional uint64 raw_data_footprint = 8; // 原始数据大小
 
-    optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
-    repeated ColumnMetaPB column_metas = 10; // 列元数据
+  optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
+  repeated ColumnMetaPB column_metas = 10; // 列元数据
        optional PagePointerPB key_index_page; // short key索引page
+  optional PagePointerPB delete_index_page; // 删除索引page
 }
 
 ```
diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto
index 93c3f19..9ed7c2d 100644
--- a/gensrc/proto/segment_v2.proto
+++ b/gensrc/proto/segment_v2.proto
@@ -59,6 +59,7 @@ enum PageTypePB {
     INDEX_PAGE = 2;
     DICTIONARY_PAGE = 3;
     SHORT_KEY_PAGE = 4;
+    DELETE_INDEX_PAGE = 5;
 }
 
 message DataPageFooterPB {
@@ -106,6 +107,13 @@ message ShortKeyFooterPB {
     optional uint32 num_segment_rows = 6;
 }
 
+message DeleteIndexFooterPB {
+    // How many index item in this index.
+    optional uint32 num_items = 1;
+    // The total bytes occupied by the delete index
+    optional uint32 content_bytes = 2;
+}
+
 message PageFooterPB {
     // required: indicates which of the *_footer fields is set
     optional PageTypePB type = 1;
@@ -120,6 +128,8 @@ message PageFooterPB {
     optional DictPageFooterPB dict_page_footer = 9;
     // present only when type == SHORT_KEY_PAGE
     optional ShortKeyFooterPB short_key_page_footer = 10;
+    // present only when type == DELETE_INDEX_PAGE
+    optional DeleteIndexFooterPB delete_index_page_footer = 11;
 }
 
 message ZoneMapPB {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to