This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 00c9455f16 [fix](array-type) fix arrow column to doris array column 
(#10855)
00c9455f16 is described below

commit 00c9455f16bb6998a399c977f9bfc5d42de32276
Author: camby <104178...@qq.com>
AuthorDate: Sat Jul 16 11:49:42 2022 +0800

    [fix](array-type) fix arrow column to doris array column (#10855)
    
    * support merge array column, while convert from arrow column to doris 
array column
    
    * fix typo
    
    Co-authored-by: cambyzju <zhuxiaol...@baidu.com>
---
 be/src/vec/utils/arrow_column_to_doris_column.cpp      |  8 +++++---
 .../vec/utils/arrow_column_to_doris_column_test.cpp    | 18 ++++++++++++------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/be/src/vec/utils/arrow_column_to_doris_column.cpp 
b/be/src/vec/utils/arrow_column_to_doris_column.cpp
index 3d851d14e1..9f2f7ddb26 100644
--- a/be/src/vec/utils/arrow_column_to_doris_column.cpp
+++ b/be/src/vec/utils/arrow_column_to_doris_column.cpp
@@ -274,12 +274,14 @@ static Status convert_offset_from_list_column(const 
arrow::Array* array, size_t
     auto concrete_array = down_cast<const arrow::ListArray*>(array);
     auto arrow_offsets_array = concrete_array->offsets();
     auto arrow_offsets = 
down_cast<arrow::Int32Array*>(arrow_offsets_array.get());
+    auto prev_size = offsets_data.back();
     for (int64_t i = array_idx + 1; i < array_idx + num_elements + 1; ++i) {
-        // convert to doris offset, start from 0
-        offsets_data.emplace_back(arrow_offsets->Value(i) - 
arrow_offsets->Value(array_idx));
+        // convert to doris offset, start from offsets.back()
+        offsets_data.emplace_back(prev_size + arrow_offsets->Value(i) -
+                                  arrow_offsets->Value(array_idx));
     }
     *start_idx_for_data = arrow_offsets->Value(array_idx);
-    *num_for_data = offsets_data.back();
+    *num_for_data = offsets_data.back() - prev_size;
 
     return Status::OK();
 }
diff --git a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp 
b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
index 606132ca9d..4eec72ae65 100644
--- a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
+++ b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
@@ -652,13 +652,13 @@ void test_arrow_to_array_column(ColumnWithTypeAndName& 
column,
                                 std::shared_ptr<arrow::DataType> value_type,
                                 std::shared_ptr<arrow::Array> values, const 
std::string& value,
                                 size_t& counter) {
-    ASSERT_EQ(column.column->size(), counter);
     auto array = create_array_array<ArrowType, is_nullable>(vec_offsets, 
null_map, value_type,
                                                             values, counter);
+    auto old_size = column.column->size();
     auto ret = arrow_column_to_doris_column(array.get(), 0, column.column, 
column.type,
                                             vec_offsets.size() - 1, "UTC");
     ASSERT_EQ(ret.ok(), true);
-    ASSERT_EQ(column.column->size(), counter);
+    ASSERT_EQ(column.column->size() - old_size, counter);
     MutableColumnPtr data_column = nullptr;
     vectorized::ColumnNullable* nullable_column = nullptr;
     if (column.column->is_nullable()) {
@@ -669,14 +669,16 @@ void test_arrow_to_array_column(ColumnWithTypeAndName& 
column,
         data_column = (*std::move(column.column)).mutate();
     }
     auto& array_column = static_cast<ColumnArray&>(*data_column);
-    EXPECT_EQ(array_column.size(), vec_offsets.size() - 1);
-    for (size_t i = 0; i < array_column.size(); ++i) {
-        auto v = get<Array>(array_column[i]);
+    EXPECT_EQ(array_column.size() - old_size, vec_offsets.size() - 1);
+    for (size_t i = 0; i < array_column.size() - old_size; ++i) {
+        auto v = get<Array>(array_column[old_size + i]);
         EXPECT_EQ(v.size(), vec_offsets[i + 1] - vec_offsets[i]);
+        EXPECT_EQ(v.size(), array_column.get_offsets()[old_size + i] -
+                                    array_column.get_offsets()[old_size + i - 
1]);
         if (is_nullable) {
             ASSERT_NE(nullable_column, nullptr);
             NullMap& map_data = nullable_column->get_null_map_data();
-            ASSERT_EQ(map_data[i], null_map[i]);
+            ASSERT_EQ(map_data[old_size + i], null_map[i]);
             if (!null_map[i]) {
                 // check value
                 for (size_t j = 0; j < v.size(); ++j) {
@@ -713,6 +715,10 @@ void test_array(const std::vector<std::string>& 
test_cases, size_t num_elements,
         size_t counter = 0;
         test_arrow_to_array_column<ArrowType, is_nullable>(column, 
vec_offsets, null_map,
                                                            value_type, array, 
value, counter);
+        // multi arrow array can merge into one array column, here test again 
with non empty array column
+        counter = 0;
+        test_arrow_to_array_column<ArrowType, is_nullable>(column, 
vec_offsets, null_map,
+                                                           value_type, array, 
value, counter);
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to