This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 687c676160 [FIX](map)fix column map for offset next_array_item_rowid 
order (#23250)
687c676160 is described below

commit 687c67616060c57f706cb21e92824a6fd0f94205
Author: amory <wangqian...@selectdb.com>
AuthorDate: Thu Aug 24 10:57:40 2023 +0800

    [FIX](map)fix column map for offset next_array_item_rowid order (#23250)
    
    * fix column map for offset next_array_item_rowid order
    
    * add regress test
---
 be/src/olap/rowset/segment_v2/column_reader.cpp    |  26 +++++++---
 be/src/olap/rowset/segment_v2/column_writer.cpp    |  20 ++++----
 .../complex_types/map_uniq_with_local_tvf.out      |  10 ++++
 regression-test/data/types/complex_types/mm.orc    | Bin 0 -> 19666267 bytes
 .../complex_types/map_uniq_with_local_tvf.groovy   |  57 +++++++++++++++++++++
 5 files changed, 98 insertions(+), 15 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 7a7eec9578..1f98b9032c 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -674,6 +674,7 @@ Status MapFileColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr
     auto& column_offsets =
             
static_cast<vectorized::ColumnArray::ColumnOffsets&>(*column_offsets_ptr);
     RETURN_IF_ERROR(_offsets_iterator->_calculate_offsets(start, 
column_offsets));
+    DCHECK(column_offsets.get_data().back() >= column_offsets.get_data()[start 
- 1]);
     size_t num_items =
             column_offsets.get_data().back() - column_offsets.get_data()[start 
- 1]; // -1 is valid
     auto key_ptr = column_map->get_keys().assume_mutable();
@@ -808,20 +809,33 @@ Status 
OffsetFileColumnIterator::_peek_one_offset(ordinal_t* offset) {
     return Status::OK();
 }
 
+/**
+ *  first_storage_offset read from page should smaller than 
next_storage_offset which here call _peek_one_offset from page,
+    and first_column_offset is keep in memory data which is different 
dimension with (first_storage_offset and next_storage_offset)
+     eg. step1. read page: first_storage_offset = 16382
+         step2. read page below with _peek_one_offset(&last_offset): 
last_offset = 16387
+         step3. first_offset = 126 which is calculate in column offsets
+         for loop column offsets element in size
+            we can calculate from first_storage_offset to next_storage_offset 
one by one to fill with offsets_data in memory column offsets
+ * @param start
+ * @param column_offsets
+ * @return
+ */
 Status OffsetFileColumnIterator::_calculate_offsets(
         ssize_t start, vectorized::ColumnArray::ColumnOffsets& column_offsets) 
{
-    ordinal_t last_offset = 0;
-    RETURN_IF_ERROR(_peek_one_offset(&last_offset));
+    ordinal_t next_storage_offset = 0;
+    RETURN_IF_ERROR(_peek_one_offset(&next_storage_offset));
 
     // calculate real offsets
     auto& offsets_data = column_offsets.get_data();
-    ordinal_t first_offset = offsets_data[start - 1]; // -1 is valid
-    ordinal_t first_ord = offsets_data[start];
+    ordinal_t first_column_offset = offsets_data[start - 1]; // -1 is valid
+    ordinal_t first_storage_offset = offsets_data[start];
     for (ssize_t i = start; i < offsets_data.size() - 1; ++i) {
-        offsets_data[i] = first_offset + (offsets_data[i + 1] - first_ord);
+        offsets_data[i] = first_column_offset + (offsets_data[i + 1] - 
first_storage_offset);
     }
     // last offset
-    offsets_data[offsets_data.size() - 1] = first_offset + (last_offset - 
first_ord);
+    offsets_data[offsets_data.size() - 1] =
+            first_column_offset + (next_storage_offset - first_storage_offset);
     return Status::OK();
 }
 
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp 
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index 4ddd7e2c6b..dcb0f89858 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -1083,17 +1083,19 @@ Status MapColumnWriter::append_data(const uint8_t** 
ptr, size_t num_rows) {
     size_t element_cnt = size_t((unsigned long)(*data_ptr));
     auto offset_data = *(data_ptr + 1);
     const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
-    RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows));
 
-    if (element_cnt == 0) {
-        return Status::OK();
-    }
-    for (size_t i = 0; i < 2; ++i) {
-        auto data = *(data_ptr + 2 + i);
-        auto nested_null_map = *(data_ptr + 2 + 2 + i);
-        RETURN_IF_ERROR(_kv_writers[i]->append(reinterpret_cast<const 
uint8_t*>(nested_null_map),
-                                               reinterpret_cast<const 
void*>(data), element_cnt));
+    if (element_cnt > 0) {
+        for (size_t i = 0; i < 2; ++i) {
+            auto data = *(data_ptr + 2 + i);
+            auto nested_null_map = *(data_ptr + 2 + 2 + i);
+            RETURN_IF_ERROR(
+                    _kv_writers[i]->append(reinterpret_cast<const 
uint8_t*>(nested_null_map),
+                                           reinterpret_cast<const 
void*>(data), element_cnt));
+        }
     }
+    // make sure the order : offset writer flush next_array_item_ordinal after 
kv_writers append_data
+    // because we use _kv_writers[0]->get_next_rowid() to set 
next_array_item_ordinal in offset page footer
+    RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows));
     return Status::OK();
 }
 
diff --git 
a/regression-test/data/types/complex_types/map_uniq_with_local_tvf.out 
b/regression-test/data/types/complex_types/map_uniq_with_local_tvf.out
new file mode 100644
index 0000000000..dee62c1518
--- /dev/null
+++ b/regression-test/data/types/complex_types/map_uniq_with_local_tvf.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+25000
+
+-- !sql --
+25000
+
+-- !sql --
+25000
+
diff --git a/regression-test/data/types/complex_types/mm.orc 
b/regression-test/data/types/complex_types/mm.orc
new file mode 100644
index 0000000000..8042f56dae
Binary files /dev/null and b/regression-test/data/types/complex_types/mm.orc 
differ
diff --git 
a/regression-test/suites/types/complex_types/map_uniq_with_local_tvf.groovy 
b/regression-test/suites/types/complex_types/map_uniq_with_local_tvf.groovy
new file mode 100644
index 0000000000..c77e74e0b6
--- /dev/null
+++ b/regression-test/suites/types/complex_types/map_uniq_with_local_tvf.groovy
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("map_uniq_with_local_tvf", "p0") {
+    def table_name = "map_uniq"
+    List<List<Object>> backends = sql """ show backends """
+    def dataFilePath = context.config.dataPath + "/types/complex_types/"
+    assertTrue(backends.size() > 0)
+    def be_id = backends[0][0]
+    // cluster mode need to make sure all be has this data
+    def outFilePath="/"
+    def transFile01="${dataFilePath}/mm.orc"
+    for (List<Object> backend : backends) {
+         def be_host = backend[1]
+         scpFiles ("root", be_host, transFile01, outFilePath, false);
+    }
+    sql "DROP TABLE IF EXISTS ${table_name};"
+    sql """
+            CREATE TABLE ${table_name} (
+              `id` int(11) NULL,
+              `m` MAP<text,text> NULL
+            ) ENGINE=OLAP
+            UNIQUE KEY(`id`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "is_being_synced" = "false",
+            "storage_format" = "V2",
+            "light_schema_change" = "true",
+            "disable_auto_compaction" = "false",
+            "enable_single_replica_compaction" = "false"
+            ); """
+
+    qt_sql """
+           insert into ${table_name} select * from local(
+                       "file_path" = "${outFilePath}/mm.orc",
+                       "backend_id" = "${be_id}",
+                       "format" = "orc");"""
+    qt_sql  """ select count(m) from ${table_name}; """
+    qt_sql  """ select count(m) from ${table_name} where map_size(m) > 0;"""
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to