kangkaisen commented on a change in pull request #2319: Add Bitmap index reader
URL: https://github.com/apache/incubator-doris/pull/2319#discussion_r352555263
 
 

 ##########
 File path: be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
 ##########
 @@ -17,55 +17,254 @@
 
 #include "olap/rowset/segment_v2/indexed_column_reader.h"
 
+#include "env/env.h" // for RandomAccessFile
+#include "gutil/strings/substitute.h" // for Substitute
+#include "olap/key_coder.h"
+#include "olap/rowset/segment_v2/encoding_info.h" // for EncodingInfo
+#include "olap/rowset/segment_v2/index_page.h" // for IndexPageReader
+#include "olap/rowset/segment_v2/options.h" // for PageDecoderOptions
+#include "olap/rowset/segment_v2/page_compression.h"
+#include "olap/rowset/segment_v2/page_decoder.h" // for PagePointer
+#include "util/crc32c.h"
+#include "util/rle_encoding.h" // for RleDecoder
+
 namespace doris {
 namespace segment_v2 {
 
-IndexedColumnReader::IndexedColumnReader(RandomAccessFile* file) {
-    // TODO
-}
-Status IndexedColumnReader::init() {
-    return Status(); // TODO
-}
-Status 
IndexedColumnReader::new_iterator(std::unique_ptr<IndexedColumnIterator>* iter) 
{
-    return Status(); // TODO
-}
-Status IndexedColumnReader::read_page(const PagePointer& pp, PageHandle* ret) {
-    return Status(); // TODO
-}
-const IndexedColumnMetaPB& IndexedColumnReader::meta() const {
-    static IndexedColumnMetaPB temp;
-    return temp; // TODO
-}
-bool IndexedColumnReader::has_ordinal_index() const {
-    return false; // TODO
+using strings::Substitute;
+
+Status IndexedColumnReader::load() {
+    _type_info = get_type_info((FieldType)_meta.data_type());
+    if (_type_info == nullptr) {
+        return Status::NotSupported(Substitute("unsupported typeinfo, 
type=$0", _meta.data_type()));
+    }
+    RETURN_IF_ERROR(EncodingInfo::get(_type_info, _meta.encoding(), 
&_encoding_info));
+    RETURN_IF_ERROR(get_block_compression_codec(_meta.compression(), 
&_compress_codec));
+    _validx_key_coder = get_key_coder(_type_info->type());
+
+    // read and parse ordinal index page when exists
+    if (_meta.has_ordinal_index_meta()) {
+        if (_meta.ordinal_index_meta().is_root_data_page()) {
+            _sole_data_page = 
PagePointer(_meta.ordinal_index_meta().root_page());
+        } else {
+            RETURN_IF_ERROR(read_page(_meta.ordinal_index_meta().root_page(), 
&_ordinal_index_page_handle));
+            
RETURN_IF_ERROR(_ordinal_index_reader.parse(_ordinal_index_page_handle.data()));
+            _has_index_page = true;
+        }
+    }
+
+    // read and parse value index page when exists
+    if (_meta.has_value_index_meta()) {
+        if (_meta.value_index_meta().is_root_data_page()) {
+            _sole_data_page = 
PagePointer(_meta.value_index_meta().root_page());
+        } else {
+            RETURN_IF_ERROR(read_page(_meta.value_index_meta().root_page(), 
&_value_index_page_handle));
+            
RETURN_IF_ERROR(_value_index_reader.parse(_value_index_page_handle.data()));
+            _has_index_page = true;
+        }
+    }
+    _num_values = _meta.num_values();
+    return Status::OK();
 }
-bool IndexedColumnReader::has_value_index() const {
-    return false; // TODO
+
+Status IndexedColumnReader::read_page(const PagePointer& pp, PageHandle* 
handle) const {
+    auto cache = StoragePageCache::instance();
+    PageCacheHandle cache_handle;
+    StoragePageCache::CacheKey cache_key(_file->file_name(), pp.offset);
+    if (cache->lookup(cache_key, &cache_handle)) {
+        // we find page in cache, use it
+        *handle = PageHandle(std::move(cache_handle));
+        return Status::OK();
+    }
+    // Now we read this from file.
+    size_t page_size = pp.size;
+    if (page_size < sizeof(uint32_t)) {
+        return Status::Corruption(Substitute("Bad page, page size is too 
small, size=$0", page_size));
+    }
+
+    // Now we use this buffer to store page from storage, if this page is 
compressed
+    // this buffer will assigned uncompressed page, and origin content will be 
freed.
+    std::unique_ptr<uint8_t[]> page(new uint8_t[page_size]);
+    Slice page_slice(page.get(), page_size);
+    RETURN_IF_ERROR(_file->read_at(pp.offset, page_slice));
+
+    size_t data_size = page_size - 4;
+    if (_verify_checksum) {
+        uint32_t expect = decode_fixed32_le((uint8_t*)page_slice.data + 
page_slice.size - 4);
+        uint32_t actual = crc32c::Value(page_slice.data, page_slice.size - 4);
+        if (expect != actual) {
+            return Status::Corruption(
+                Substitute("Page checksum mismatch, actual=$0 vs expect=$1", 
actual, expect));
+        }
+    }
+
+    // remove page's suffix
+    page_slice.size = data_size;
+    if (_compress_codec != nullptr) {
+        PageDecompressor decompressor(page_slice, _compress_codec);
+
+        Slice uncompressed_page;
+        RETURN_IF_ERROR(decompressor.decompress_to(&uncompressed_page));
+
+        // If decompressor create new heap memory for uncompressed data,
+        // assign this uncompressed page to page and page slice
+        if (uncompressed_page.data != page_slice.data) {
+            page.reset((uint8_t*)uncompressed_page.data);
+        }
+        page_slice = uncompressed_page;
+    }
+    // insert this into cache and return the cache handle
+    cache->insert(cache_key, page_slice, &cache_handle);
+    page.release();
+    *handle = PageHandle(std::move(cache_handle));
+
+    return Status::OK();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
+Status IndexedColumnIterator::_read_data_page(const PagePointer& page_pointer, 
ParsedPage* page) {
+    RETURN_IF_ERROR(_reader->read_page(page_pointer, &page->page_handle));
+    Slice data = page->page_handle.data();
 
-IndexedColumnIterator::IndexedColumnIterator(IndexedColumnReader* reader) {
-    // TODO
-}
-IndexedColumnIterator::~IndexedColumnIterator() {
-    // TODO
-}
-Status IndexedColumnIterator::seek_to_first() {
-    return Status(); // TODO
+    // decode first rowid
+    if (!get_varint32(&data, &page->first_rowid)) {
+        return Status::Corruption("Bad page, failed to decode first rowid");
+    }
+
+    // decode number rows
+    if (!get_varint32(&data, &page->num_rows)) {
+        return Status::Corruption("Bad page, failed to decode rows count");
+    }
+
+    // create page data decoder
+    PageDecoderOptions options;
+    RETURN_IF_ERROR(_reader->encoding_info()->create_page_decoder(data, 
options, &page->data_decoder));
+    RETURN_IF_ERROR(page->data_decoder->init());
+
+    page->offset_in_page = 0;
+    return Status::OK();
 }
+
 Status IndexedColumnIterator::seek_to_ordinal(rowid_t idx) {
-    return Status(); // TODO
-}
-Status IndexedColumnIterator::seek_at_or_after(const Slice& key, bool* 
exact_match) {
-    return Status(); // TODO
+    DCHECK(idx >= 0 && idx <= _reader->num_values());
+
+    if (!_reader->support_ordinal_seek()) {
+        return Status::NotSupported("no ordinal index");
+    }
+
+    // it's ok to seek past the last value
+    if (idx == _reader->num_values()) {
+        _current_rowid = idx;
+        _seeked = true;
+        return Status::OK();
+    }
+
+    if (!_data_page || !_data_page->contains(idx)) {
 
 Review comment:
   OK. we firstly check is `_data_page == nullptr`, not `_data_page != nullptr`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to