This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 3b0175d190 [FIX](map)fix map offsets set next_array_item_rowid (#23251) 3b0175d190 is described below commit 3b0175d190c85f55edf6bf502720a1abdff9e0fc Author: amory <wangqian...@selectdb.com> AuthorDate: Mon Aug 21 18:01:10 2023 +0800 [FIX](map)fix map offsets set next_array_item_rowid (#23251) --- be/src/olap/rowset/segment_v2/column_reader.cpp | 26 +++++++++++++++++++------ be/src/olap/rowset/segment_v2/column_writer.cpp | 20 ++++++++++--------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 03f29a6451..30d593ba80 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -675,6 +675,7 @@ Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr auto& column_offsets = static_cast<vectorized::ColumnArray::ColumnOffsets&>(*column_offsets_ptr); RETURN_IF_ERROR(_offsets_iterator->_calculate_offsets(start, column_offsets)); + DCHECK(column_offsets.get_data().back() >= column_offsets.get_data()[start - 1]); size_t num_items = column_offsets.get_data().back() - column_offsets.get_data()[start - 1]; // -1 is valid auto key_ptr = column_map->get_keys().assume_mutable(); @@ -809,20 +810,33 @@ Status OffsetFileColumnIterator::_peek_one_offset(ordinal_t* offset) { return Status::OK(); } +/** + * first_storage_offset read from page should smaller than next_storage_offset which here call _peek_one_offset from page, + and first_column_offset is keep in memory data which is different dimension with (first_storage_offset and next_storage_offset) + eg. step1. read page: first_storage_offset = 16382 + step2. read page below with _peek_one_offset(&last_offset): last_offset = 16387 + step3. first_offset = 126 which is calculate in column offsets + for loop column offsets element in size + we can calculate from first_storage_offset to next_storage_offset one by one to fill with offsets_data in memory column offsets + * @param start + * @param column_offsets + * @return + */ Status OffsetFileColumnIterator::_calculate_offsets( ssize_t start, vectorized::ColumnArray::ColumnOffsets& column_offsets) { - ordinal_t last_offset = 0; - RETURN_IF_ERROR(_peek_one_offset(&last_offset)); + ordinal_t next_storage_offset = 0; + RETURN_IF_ERROR(_peek_one_offset(&next_storage_offset)); // calculate real offsets auto& offsets_data = column_offsets.get_data(); - ordinal_t first_offset = offsets_data[start - 1]; // -1 is valid - ordinal_t first_ord = offsets_data[start]; + ordinal_t first_column_offset = offsets_data[start - 1]; // -1 is valid + ordinal_t first_storage_offset = offsets_data[start]; for (ssize_t i = start; i < offsets_data.size() - 1; ++i) { - offsets_data[i] = first_offset + (offsets_data[i + 1] - first_ord); + offsets_data[i] = first_column_offset + (offsets_data[i + 1] - first_storage_offset); } // last offset - offsets_data[offsets_data.size() - 1] = first_offset + (last_offset - first_ord); + offsets_data[offsets_data.size() - 1] = + first_column_offset + (next_storage_offset - first_storage_offset); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 4ddd7e2c6b..dcb0f89858 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -1083,17 +1083,19 @@ Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { size_t element_cnt = size_t((unsigned long)(*data_ptr)); auto offset_data = *(data_ptr + 1); const uint8_t* offsets_ptr = (const uint8_t*)offset_data; - RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows)); - if (element_cnt == 0) { - return Status::OK(); - } - for (size_t i = 0; i < 2; ++i) { - auto data = *(data_ptr + 2 + i); - auto nested_null_map = *(data_ptr + 2 + 2 + i); - RETURN_IF_ERROR(_kv_writers[i]->append(reinterpret_cast<const uint8_t*>(nested_null_map), - reinterpret_cast<const void*>(data), element_cnt)); + if (element_cnt > 0) { + for (size_t i = 0; i < 2; ++i) { + auto data = *(data_ptr + 2 + i); + auto nested_null_map = *(data_ptr + 2 + 2 + i); + RETURN_IF_ERROR( + _kv_writers[i]->append(reinterpret_cast<const uint8_t*>(nested_null_map), + reinterpret_cast<const void*>(data), element_cnt)); + } } + // make sure the order : offset writer flush next_array_item_ordinal after kv_writers append_data + // because we use _kv_writers[0]->get_next_rowid() to set next_array_item_ordinal in offset page footer + RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows)); return Status::OK(); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org