This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 687c676160 [FIX](map)fix column map for offset next_array_item_rowid order (#23250) 687c676160 is described below commit 687c67616060c57f706cb21e92824a6fd0f94205 Author: amory <wangqian...@selectdb.com> AuthorDate: Thu Aug 24 10:57:40 2023 +0800 [FIX](map)fix column map for offset next_array_item_rowid order (#23250) * fix column map for offset next_array_item_rowid order * add regress test --- be/src/olap/rowset/segment_v2/column_reader.cpp | 26 +++++++--- be/src/olap/rowset/segment_v2/column_writer.cpp | 20 ++++---- .../complex_types/map_uniq_with_local_tvf.out | 10 ++++ regression-test/data/types/complex_types/mm.orc | Bin 0 -> 19666267 bytes .../complex_types/map_uniq_with_local_tvf.groovy | 57 +++++++++++++++++++++ 5 files changed, 98 insertions(+), 15 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 7a7eec9578..1f98b9032c 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -674,6 +674,7 @@ Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr auto& column_offsets = static_cast<vectorized::ColumnArray::ColumnOffsets&>(*column_offsets_ptr); RETURN_IF_ERROR(_offsets_iterator->_calculate_offsets(start, column_offsets)); + DCHECK(column_offsets.get_data().back() >= column_offsets.get_data()[start - 1]); size_t num_items = column_offsets.get_data().back() - column_offsets.get_data()[start - 1]; // -1 is valid auto key_ptr = column_map->get_keys().assume_mutable(); @@ -808,20 +809,33 @@ Status OffsetFileColumnIterator::_peek_one_offset(ordinal_t* offset) { return Status::OK(); } +/** + * first_storage_offset read from page should smaller than next_storage_offset which here call _peek_one_offset from page, + and first_column_offset is keep in memory data which is different dimension with (first_storage_offset and next_storage_offset) + eg. step1. read page: first_storage_offset = 16382 + step2. read page below with _peek_one_offset(&last_offset): last_offset = 16387 + step3. first_offset = 126 which is calculate in column offsets + for loop column offsets element in size + we can calculate from first_storage_offset to next_storage_offset one by one to fill with offsets_data in memory column offsets + * @param start + * @param column_offsets + * @return + */ Status OffsetFileColumnIterator::_calculate_offsets( ssize_t start, vectorized::ColumnArray::ColumnOffsets& column_offsets) { - ordinal_t last_offset = 0; - RETURN_IF_ERROR(_peek_one_offset(&last_offset)); + ordinal_t next_storage_offset = 0; + RETURN_IF_ERROR(_peek_one_offset(&next_storage_offset)); // calculate real offsets auto& offsets_data = column_offsets.get_data(); - ordinal_t first_offset = offsets_data[start - 1]; // -1 is valid - ordinal_t first_ord = offsets_data[start]; + ordinal_t first_column_offset = offsets_data[start - 1]; // -1 is valid + ordinal_t first_storage_offset = offsets_data[start]; for (ssize_t i = start; i < offsets_data.size() - 1; ++i) { - offsets_data[i] = first_offset + (offsets_data[i + 1] - first_ord); + offsets_data[i] = first_column_offset + (offsets_data[i + 1] - first_storage_offset); } // last offset - offsets_data[offsets_data.size() - 1] = first_offset + (last_offset - first_ord); + offsets_data[offsets_data.size() - 1] = + first_column_offset + (next_storage_offset - first_storage_offset); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 4ddd7e2c6b..dcb0f89858 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -1083,17 +1083,19 @@ Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { size_t element_cnt = size_t((unsigned long)(*data_ptr)); auto offset_data = *(data_ptr + 1); const uint8_t* offsets_ptr = (const uint8_t*)offset_data; - RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows)); - if (element_cnt == 0) { - return Status::OK(); - } - for (size_t i = 0; i < 2; ++i) { - auto data = *(data_ptr + 2 + i); - auto nested_null_map = *(data_ptr + 2 + 2 + i); - RETURN_IF_ERROR(_kv_writers[i]->append(reinterpret_cast<const uint8_t*>(nested_null_map), - reinterpret_cast<const void*>(data), element_cnt)); + if (element_cnt > 0) { + for (size_t i = 0; i < 2; ++i) { + auto data = *(data_ptr + 2 + i); + auto nested_null_map = *(data_ptr + 2 + 2 + i); + RETURN_IF_ERROR( + _kv_writers[i]->append(reinterpret_cast<const uint8_t*>(nested_null_map), + reinterpret_cast<const void*>(data), element_cnt)); + } } + // make sure the order : offset writer flush next_array_item_ordinal after kv_writers append_data + // because we use _kv_writers[0]->get_next_rowid() to set next_array_item_ordinal in offset page footer + RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows)); return Status::OK(); } diff --git a/regression-test/data/types/complex_types/map_uniq_with_local_tvf.out b/regression-test/data/types/complex_types/map_uniq_with_local_tvf.out new file mode 100644 index 0000000000..dee62c1518 --- /dev/null +++ b/regression-test/data/types/complex_types/map_uniq_with_local_tvf.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +25000 + +-- !sql -- +25000 + +-- !sql -- +25000 + diff --git a/regression-test/data/types/complex_types/mm.orc b/regression-test/data/types/complex_types/mm.orc new file mode 100644 index 0000000000..8042f56dae Binary files /dev/null and b/regression-test/data/types/complex_types/mm.orc differ diff --git a/regression-test/suites/types/complex_types/map_uniq_with_local_tvf.groovy b/regression-test/suites/types/complex_types/map_uniq_with_local_tvf.groovy new file mode 100644 index 0000000000..c77e74e0b6 --- /dev/null +++ b/regression-test/suites/types/complex_types/map_uniq_with_local_tvf.groovy @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("map_uniq_with_local_tvf", "p0") { + def table_name = "map_uniq" + List<List<Object>> backends = sql """ show backends """ + def dataFilePath = context.config.dataPath + "/types/complex_types/" + assertTrue(backends.size() > 0) + def be_id = backends[0][0] + // cluster mode need to make sure all be has this data + def outFilePath="/" + def transFile01="${dataFilePath}/mm.orc" + for (List<Object> backend : backends) { + def be_host = backend[1] + scpFiles ("root", be_host, transFile01, outFilePath, false); + } + sql "DROP TABLE IF EXISTS ${table_name};" + sql """ + CREATE TABLE ${table_name} ( + `id` int(11) NULL, + `m` MAP<text,text> NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); """ + + qt_sql """ + insert into ${table_name} select * from local( + "file_path" = "${outFilePath}/mm.orc", + "backend_id" = "${be_id}", + "format" = "orc");""" + qt_sql """ select count(m) from ${table_name}; """ + qt_sql """ select count(m) from ${table_name} where map_size(m) > 0;""" + +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org