liutang123 commented on a change in pull request #3025: URL: https://github.com/apache/incubator-doris/pull/3025#discussion_r521078102
########## File path: be/src/olap/rowset/segment_v2/column_reader.cpp ########## @@ -296,6 +317,115 @@ Status ColumnReader::seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterat return Status::OK(); } +Status ColumnReader::new_iterator(ColumnIterator** iterator) { + if (is_scalar_type((FieldType)_meta.type())) { + *iterator = new FileColumnIterator(this); + return Status::OK(); + } else { + auto type = (FieldType)_meta.type(); + switch(type) { + case FieldType::OLAP_FIELD_TYPE_ARRAY: { + ColumnIterator* item_iterator; + RETURN_IF_ERROR(_sub_readers[0]->new_iterator(&item_iterator)); + FileColumnIterator* offset_iterator = new FileColumnIterator(this); + *iterator = new ArrayFileColumnIterator(offset_iterator, item_iterator); + return Status::OK(); + } + default: + return Status::NotSupported("unsupported type to create iterator: " + std::to_string(type)); + } + } +} + +//////////////////////////////////////////////////////////////////////////////// + +ArrayFileColumnIterator::ArrayFileColumnIterator(FileColumnIterator* offset_reader, ColumnIterator* item_iterator) { + _offset_iterator.reset(offset_reader); + _item_iterator.reset(item_iterator); +} + +Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) { + RETURN_IF_ERROR(_offset_iterator->init(opts)); + RETURN_IF_ERROR(_item_iterator->init(opts)); + TypeInfo* bigint_type_info = get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_BIGINT); + RETURN_IF_ERROR(ColumnVectorBatch::create(1024, _offset_iterator->is_nullable(), bigint_type_info, nullptr, &_offset_batch)); + return Status::OK(); +} + +// every invoke this method, _offset_batch will be cover, so this method is not thread safe. +Status ArrayFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) { + // 1. read n offsets into _offset_batch; + _offset_batch->resize(*n + 1); + ColumnBlock ordinal_block(_offset_batch.get(), nullptr); + ColumnBlockView ordinal_view(&ordinal_block); + RETURN_IF_ERROR(_offset_iterator->next_batch(n, &ordinal_view, has_null)); + + if (*n == 0) { + return Status::OK(); + } + + // 2. Because we should read n + 1 offsets, so read one more here. + PageDecoder* offset_page_decoder = _offset_iterator->get_current_page()->data_decoder; + if (offset_page_decoder->has_remaining()) { // not _page->has_remaining() + size_t i = 1; + offset_page_decoder->peek_next_batch(&i, &ordinal_view); // not null + DCHECK(i == 1); + } else { + *(reinterpret_cast<ordinal_t*>(ordinal_view.data())) = + _offset_iterator->get_current_page()->next_array_item_ordinal; + } + ordinal_view.set_null_bits(1, false); + ordinal_view.advance(1); + + // 3. For nullable dataļ¼fill null ordinals from last to start: 0 N N 3 N 5 -> 0 3 3 3 5 5 Review comment: The last ordinal can not be null. if current page has more data, peek_next_batch will return the next not null ordinal, else next_array_item_ordinal will return then next not null ordinal. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org