This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 00c9455f16 [fix](array-type) fix arrow column to doris array column (#10855) 00c9455f16 is described below commit 00c9455f16bb6998a399c977f9bfc5d42de32276 Author: camby <104178...@qq.com> AuthorDate: Sat Jul 16 11:49:42 2022 +0800 [fix](array-type) fix arrow column to doris array column (#10855) * support merge array column, while convert from arrow column to doris array column * fix typo Co-authored-by: cambyzju <zhuxiaol...@baidu.com> --- be/src/vec/utils/arrow_column_to_doris_column.cpp | 8 +++++--- .../vec/utils/arrow_column_to_doris_column_test.cpp | 18 ++++++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/be/src/vec/utils/arrow_column_to_doris_column.cpp b/be/src/vec/utils/arrow_column_to_doris_column.cpp index 3d851d14e1..9f2f7ddb26 100644 --- a/be/src/vec/utils/arrow_column_to_doris_column.cpp +++ b/be/src/vec/utils/arrow_column_to_doris_column.cpp @@ -274,12 +274,14 @@ static Status convert_offset_from_list_column(const arrow::Array* array, size_t auto concrete_array = down_cast<const arrow::ListArray*>(array); auto arrow_offsets_array = concrete_array->offsets(); auto arrow_offsets = down_cast<arrow::Int32Array*>(arrow_offsets_array.get()); + auto prev_size = offsets_data.back(); for (int64_t i = array_idx + 1; i < array_idx + num_elements + 1; ++i) { - // convert to doris offset, start from 0 - offsets_data.emplace_back(arrow_offsets->Value(i) - arrow_offsets->Value(array_idx)); + // convert to doris offset, start from offsets.back() + offsets_data.emplace_back(prev_size + arrow_offsets->Value(i) - + arrow_offsets->Value(array_idx)); } *start_idx_for_data = arrow_offsets->Value(array_idx); - *num_for_data = offsets_data.back(); + *num_for_data = offsets_data.back() - prev_size; return Status::OK(); } diff --git a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp index 606132ca9d..4eec72ae65 100644 --- a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp +++ b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp @@ -652,13 +652,13 @@ void test_arrow_to_array_column(ColumnWithTypeAndName& column, std::shared_ptr<arrow::DataType> value_type, std::shared_ptr<arrow::Array> values, const std::string& value, size_t& counter) { - ASSERT_EQ(column.column->size(), counter); auto array = create_array_array<ArrowType, is_nullable>(vec_offsets, null_map, value_type, values, counter); + auto old_size = column.column->size(); auto ret = arrow_column_to_doris_column(array.get(), 0, column.column, column.type, vec_offsets.size() - 1, "UTC"); ASSERT_EQ(ret.ok(), true); - ASSERT_EQ(column.column->size(), counter); + ASSERT_EQ(column.column->size() - old_size, counter); MutableColumnPtr data_column = nullptr; vectorized::ColumnNullable* nullable_column = nullptr; if (column.column->is_nullable()) { @@ -669,14 +669,16 @@ void test_arrow_to_array_column(ColumnWithTypeAndName& column, data_column = (*std::move(column.column)).mutate(); } auto& array_column = static_cast<ColumnArray&>(*data_column); - EXPECT_EQ(array_column.size(), vec_offsets.size() - 1); - for (size_t i = 0; i < array_column.size(); ++i) { - auto v = get<Array>(array_column[i]); + EXPECT_EQ(array_column.size() - old_size, vec_offsets.size() - 1); + for (size_t i = 0; i < array_column.size() - old_size; ++i) { + auto v = get<Array>(array_column[old_size + i]); EXPECT_EQ(v.size(), vec_offsets[i + 1] - vec_offsets[i]); + EXPECT_EQ(v.size(), array_column.get_offsets()[old_size + i] - + array_column.get_offsets()[old_size + i - 1]); if (is_nullable) { ASSERT_NE(nullable_column, nullptr); NullMap& map_data = nullable_column->get_null_map_data(); - ASSERT_EQ(map_data[i], null_map[i]); + ASSERT_EQ(map_data[old_size + i], null_map[i]); if (!null_map[i]) { // check value for (size_t j = 0; j < v.size(); ++j) { @@ -713,6 +715,10 @@ void test_array(const std::vector<std::string>& test_cases, size_t num_elements, size_t counter = 0; test_arrow_to_array_column<ArrowType, is_nullable>(column, vec_offsets, null_map, value_type, array, value, counter); + // multi arrow array can merge into one array column, here test again with non empty array column + counter = 0; + test_arrow_to_array_column<ArrowType, is_nullable>(column, vec_offsets, null_map, + value_type, array, value, counter); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org