This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 0eb73c82188708ebc2fd1ff816fc14a1b5c7f7b4 Author: amory <wangqian...@selectdb.com> AuthorDate: Sat Jul 8 15:51:14 2023 +0800 [FIX](serde) fix map/struct/array support arrow #21628 support map/struct support arrow format fix string arrow format fix largeInt 128 for arrow builder --- be/src/util/arrow/row_batch.cpp | 4 +- .../vec/data_types/serde/data_type_array_serde.cpp | 9 +- .../vec/data_types/serde/data_type_array_serde.h | 2 +- .../vec/data_types/serde/data_type_bitmap_serde.h | 2 +- .../data_types/serde/data_type_date64_serde.cpp | 5 +- .../vec/data_types/serde/data_type_date64_serde.h | 2 +- .../serde/data_type_datetimev2_serde.cpp | 4 +- .../data_types/serde/data_type_datetimev2_serde.h | 2 +- .../data_types/serde/data_type_datev2_serde.cpp | 6 +- .../vec/data_types/serde/data_type_datev2_serde.h | 2 +- .../data_types/serde/data_type_decimal_serde.cpp | 10 +- .../vec/data_types/serde/data_type_decimal_serde.h | 2 +- .../serde/data_type_fixedlengthobject_serde.h | 2 +- .../vec/data_types/serde/data_type_hll_serde.cpp | 4 +- be/src/vec/data_types/serde/data_type_hll_serde.h | 2 +- .../vec/data_types/serde/data_type_jsonb_serde.cpp | 21 ++++ .../vec/data_types/serde/data_type_jsonb_serde.h | 3 + .../vec/data_types/serde/data_type_map_serde.cpp | 41 ++++++- be/src/vec/data_types/serde/data_type_map_serde.h | 2 +- .../data_types/serde/data_type_nullable_serde.cpp | 15 +-- .../data_types/serde/data_type_nullable_serde.h | 2 +- .../data_types/serde/data_type_number_serde.cpp | 21 ++-- .../vec/data_types/serde/data_type_number_serde.h | 2 +- .../vec/data_types/serde/data_type_object_serde.h | 2 +- .../serde/data_type_quantilestate_serde.h | 2 +- be/src/vec/data_types/serde/data_type_serde.h | 18 ++- .../data_types/serde/data_type_string_serde.cpp | 15 +-- .../vec/data_types/serde/data_type_string_serde.h | 2 +- .../data_types/serde/data_type_struct_serde.cpp | 27 ++++- .../vec/data_types/serde/data_type_struct_serde.h | 2 +- .../serde/data_type_serde_arrow_test.cpp | 131 +++++++++++++++++++-- 31 files changed, 283 insertions(+), 81 deletions(-) diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp index 110977c6e6..78fe346be6 100644 --- a/be/src/util/arrow/row_batch.cpp +++ b/be/src/util/arrow/row_batch.cpp @@ -65,10 +65,12 @@ Status convert_to_arrow_type(const TypeDescriptor& type, std::shared_ptr<arrow:: case TYPE_TIME: *result = arrow::float64(); break; + case TYPE_LARGEINT: + *result = arrow::fixed_size_binary(sizeof(int128_t)); + break; case TYPE_VARCHAR: case TYPE_CHAR: case TYPE_HLL: - case TYPE_LARGEINT: case TYPE_DATE: case TYPE_DATETIME: case TYPE_DATEV2: diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index f3d0098238..596209c9aa 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -49,7 +49,7 @@ void DataTypeArraySerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbVa column.deserialize_and_insert_from_arena(blob->getBlob()); } -void DataTypeArraySerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeArraySerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { auto& array_column = static_cast<const ColumnArray&>(column); @@ -58,8 +58,13 @@ void DataTypeArraySerDe::write_column_to_arrow(const IColumn& column, const UInt auto& builder = assert_cast<arrow::ListBuilder&>(*array_builder); auto nested_builder = builder.value_builder(); for (size_t array_idx = start; array_idx < end; ++array_idx) { + if (null_map && (*null_map)[array_idx]) { + checkArrowStatus(builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + continue; + } checkArrowStatus(builder.Append(), column.get_name(), array_builder->type()->name()); - nested_serde->write_column_to_arrow(nested_data, null_map, nested_builder, + nested_serde->write_column_to_arrow(nested_data, nullptr, nested_builder, offsets[array_idx - 1], offsets[array_idx]); } } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index 9576812ff1..37516b45b0 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -51,7 +51,7 @@ public: void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_bytemap, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.h b/be/src/vec/data_types/serde/data_type_bitmap_serde.h index 0825e8f298..01988b50bd 100644 --- a/be/src/vec/data_types/serde/data_type_bitmap_serde.h +++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.h @@ -41,7 +41,7 @@ public: int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_bytemap, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { LOG(FATAL) << "Not support write bitmap column to arrow"; diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp b/be/src/vec/data_types/serde/data_type_date64_serde.cpp index 787a01f250..6133d73b08 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp @@ -27,7 +27,7 @@ namespace doris { namespace vectorized { -void DataTypeDate64SerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeDate64SerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { auto& col_data = static_cast<const ColumnVector<Int64>&>(column).get_data(); @@ -37,7 +37,7 @@ void DataTypeDate64SerDe::write_column_to_arrow(const IColumn& column, const UIn const vectorized::VecDateTimeValue* time_val = (const vectorized::VecDateTimeValue*)(&col_data[i]); int len = time_val->to_buffer(buf); - if (null_map && null_map[i]) { + if (null_map && (*null_map)[i]) { checkArrowStatus(string_builder.AppendNull(), column.get_name(), array_builder->type()->name()); } else { @@ -99,7 +99,6 @@ void DataTypeDate64SerDe::read_column_from_arrow(IColumn& column, const arrow::A auto concrete_array = down_cast<const arrow::Date32Array*>(arrow_array); multiplier = 24 * 60 * 60; // day => secs for (size_t value_i = start; value_i < end; ++value_i) { - // std::cout << "serde : " << concrete_array->Value(value_i) << std::endl; VecDateTimeValue v; v.from_unixtime( static_cast<Int64>(concrete_array->Value(value_i)) / divisor * multiplier, ctz); diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h b/be/src/vec/data_types/serde/data_type_date64_serde.h index c7f29fc118..94a4618d5c 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.h +++ b/be/src/vec/data_types/serde/data_type_date64_serde.h @@ -42,7 +42,7 @@ namespace vectorized { class Arena; class DataTypeDate64SerDe : public DataTypeNumberSerDe<Int64> { - void write_column_to_arrow(const IColumn& column, const UInt8* null_bytemap, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index 5b912ff79e..6800486dd3 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -27,7 +27,7 @@ namespace doris { namespace vectorized { -void DataTypeDateTimeV2SerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeDateTimeV2SerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { auto& col_data = static_cast<const ColumnVector<UInt64>&>(column).get_data(); @@ -37,7 +37,7 @@ void DataTypeDateTimeV2SerDe::write_column_to_arrow(const IColumn& column, const const vectorized::DateV2Value<vectorized::DateTimeV2ValueType>* time_val = (const vectorized::DateV2Value<vectorized::DateTimeV2ValueType>*)(&col_data[i]); int len = time_val->to_buffer(buf); - if (null_map && null_map[i]) { + if (null_map && (*null_map)[i]) { checkArrowStatus(string_builder.AppendNull(), column.get_name(), array_builder->type()->name()); } else { diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index a571ac10ab..0b943e6010 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -44,7 +44,7 @@ class Arena; class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe<UInt64> { public: DataTypeDateTimeV2SerDe(int scale) : scale(scale) {}; - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index fe75eb38cd..a538ec01f4 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -27,7 +27,7 @@ namespace doris { namespace vectorized { -void DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { auto& col_data = static_cast<const ColumnVector<UInt32>&>(column).get_data(); @@ -37,7 +37,7 @@ void DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const UIn const vectorized::DateV2Value<vectorized::DateV2ValueType>* time_val = (const vectorized::DateV2Value<vectorized::DateV2ValueType>*)(&col_data[i]); int len = time_val->to_buffer(buf); - if (null_map && null_map[i]) { + if (null_map && (*null_map)[i]) { checkArrowStatus(string_builder.AppendNull(), column.get_name(), array_builder->type()->name()); } else { @@ -50,8 +50,6 @@ void DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const UIn void DataTypeDateV2SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const { - std::cout << "column : " << column.get_name() << " data" << getTypeName(column.get_data_type()) - << " array " << arrow_array->type_id() << std::endl; auto& col_data = static_cast<ColumnVector<UInt32>&>(column).get_data(); auto concrete_array = down_cast<const arrow::Date64Array*>(arrow_array); int64_t divisor = 1; diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index be435ff80d..5d2baf7704 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -42,7 +42,7 @@ namespace vectorized { class Arena; class DataTypeDateV2SerDe : public DataTypeNumberSerDe<UInt32> { - void write_column_to_arrow(const IColumn& column, const UInt8* null_bytemap, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index d4b2de6281..4137dd885c 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -32,7 +32,7 @@ namespace doris { namespace vectorized { template <typename T> -void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { auto& col = reinterpret_cast<const ColumnDecimal<T>&>(column); @@ -41,7 +41,7 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, const std::shared_ptr<arrow::DataType> s_decimal_ptr = std::make_shared<arrow::Decimal128Type>(27, 9); for (size_t i = start; i < end; ++i) { - if (null_map && null_map[i]) { + if (null_map && (*null_map)[i]) { checkArrowStatus(builder.AppendNull(), column.get_name(), array_builder->type()->name()); continue; @@ -58,7 +58,7 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, const std::shared_ptr<arrow::DataType> s_decimal_ptr = std::make_shared<arrow::Decimal128Type>(38, col.get_scale()); for (size_t i = start; i < end; ++i) { - if (null_map && null_map[i]) { + if (null_map && (*null_map)[i]) { checkArrowStatus(builder.AppendNull(), column.get_name(), array_builder->type()->name()); continue; @@ -75,7 +75,7 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, const std::shared_ptr<arrow::DataType> s_decimal_ptr = std::make_shared<arrow::Decimal128Type>(8, col.get_scale()); for (size_t i = start; i < end; ++i) { - if (null_map && null_map[i]) { + if (null_map && (*null_map)[i]) { checkArrowStatus(builder.AppendNull(), column.get_name(), array_builder->type()->name()); continue; @@ -91,7 +91,7 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, const std::shared_ptr<arrow::DataType> s_decimal_ptr = std::make_shared<arrow::Decimal128Type>(18, col.get_scale()); for (size_t i = start; i < end; ++i) { - if (null_map && null_map[i]) { + if (null_map && (*null_map)[i]) { checkArrowStatus(builder.AppendNull(), column.get_name(), array_builder->type()->name()); continue; diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 60014a99cf..2b64a00eca 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -60,7 +60,7 @@ public: void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h index 092d2202b0..85c27e7c1c 100644 --- a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h +++ b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h @@ -51,7 +51,7 @@ public: void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override { LOG(FATAL) << "Not support read from jsonb to FixedLengthObject"; } - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { LOG(FATAL) << "Not support write FixedLengthObject column to arrow"; diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp b/be/src/vec/data_types/serde/data_type_hll_serde.cpp index 2c32ceaccc..72052d47cf 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp @@ -81,13 +81,13 @@ void DataTypeHLLSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValu col.insert_value(hyper_log_log); } -void DataTypeHLLSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeHLLSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { const auto& col = assert_cast<const ColumnHLL&>(column); auto& builder = assert_cast<arrow::StringBuilder&>(*array_builder); for (size_t string_i = start; string_i < end; ++string_i) { - if (null_map && null_map[string_i]) { + if (null_map && (*null_map)[string_i]) { checkArrowStatus(builder.AppendNull(), column.get_name(), array_builder->type()->name()); } else { diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.h b/be/src/vec/data_types/serde/data_type_hll_serde.h index 7decc40134..981b197ca8 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.h +++ b/be/src/vec/data_types/serde/data_type_hll_serde.h @@ -41,7 +41,7 @@ public: int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index 03d0038ee0..ff42a06c9d 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -16,6 +16,8 @@ // under the License. #include "data_type_jsonb_serde.h" + +#include <arrow/array/builder_binary.h> namespace doris { namespace vectorized { @@ -52,5 +54,24 @@ Status DataTypeJsonbSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } +void DataTypeJsonbSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + const auto& string_column = assert_cast<const ColumnString&>(column); + auto& builder = assert_cast<arrow::StringBuilder&>(*array_builder); + for (size_t string_i = start; string_i < end; ++string_i) { + if (null_map && (*null_map)[string_i]) { + checkArrowStatus(builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + continue; + } + std::string_view string_ref = string_column.get_data_at(string_i).to_string_view(); + std::string json_string = + JsonbToJson::jsonb_to_json_string(string_ref.data(), string_ref.size()); + checkArrowStatus(builder.Append(json_string.data(), json_string.size()), column.get_name(), + array_builder->type()->name()); + } +} + } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index e2d7b8c7c0..9bf523504c 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -38,6 +38,9 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe { int row_idx, bool col_const) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer<false>& row_buffer, int row_idx, bool col_const) const override; + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; private: template <bool is_binary_format> diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index 187c3ffc15..fb3eddc4ff 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -17,6 +17,7 @@ #include "data_type_map_serde.h" +#include "arrow/array/builder_nested.h" #include "util/jsonb_document.h" #include "vec/columns/column.h" #include "vec/columns/column_const.h" @@ -43,16 +44,50 @@ void DataTypeMapSerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWrite result.writeEndBinary(); } -void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { - LOG(FATAL) << "Not support write " << column.get_name() << " to arrow"; + auto& builder = assert_cast<arrow::MapBuilder&>(*array_builder); + auto& map_column = assert_cast<const ColumnMap&>(column); + const IColumn& nested_keys_column = map_column.get_keys(); + CHECK(!nested_keys_column.is_nullable()); + const IColumn& nested_values_column = map_column.get_values(); + auto& offsets = map_column.get_offsets(); + auto key_builder = builder.key_builder(); + auto value_builder = builder.item_builder(); + for (size_t r = start; r < end; ++r) { + if (null_map && (*null_map)[r]) { + checkArrowStatus(builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + } else { + checkArrowStatus(builder.Append(), column.get_name(), array_builder->type()->name()); + key_serde->write_column_to_arrow(nested_keys_column, nullptr, key_builder, + offsets[r - 1], offsets[r]); + value_serde->write_column_to_arrow(nested_values_column, nullptr, value_builder, + offsets[r - 1], offsets[r]); + } + } } void DataTypeMapSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const { - LOG(FATAL) << "Not support read " << column.get_name() << " from arrow"; + auto& column_map = static_cast<ColumnMap&>(column); + auto& offsets_data = column_map.get_offsets(); + auto concrete_map = down_cast<const arrow::MapArray*>(arrow_array); + auto arrow_offsets_array = concrete_map->offsets(); + auto arrow_offsets = down_cast<arrow::Int32Array*>(arrow_offsets_array.get()); + auto prev_size = offsets_data.back(); + auto arrow_nested_start_offset = arrow_offsets->Value(start); + auto arrow_nested_end_offset = arrow_offsets->Value(end); + for (int64_t i = start + 1; i < end + 1; ++i) { + // convert to doris offset, start from offsets.back() + offsets_data.emplace_back(prev_size + arrow_offsets->Value(i) - arrow_nested_start_offset); + } + key_serde->read_column_from_arrow(column_map.get_keys(), concrete_map->keys().get(), + arrow_nested_start_offset, arrow_nested_end_offset, ctz); + value_serde->read_column_from_arrow(column_map.get_values(), concrete_map->items().get(), + arrow_nested_start_offset, arrow_nested_end_offset, ctz); } template <bool is_binary_format> diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h b/be/src/vec/data_types/serde/data_type_map_serde.h index f41f75e1e8..67474d0676 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.h +++ b/be/src/vec/data_types/serde/data_type_map_serde.h @@ -50,7 +50,7 @@ public: int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index f2938450aa..654890f501 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -101,22 +101,13 @@ void DataTypeNullableSerDe::read_one_cell_from_jsonb(IColumn& column, const Json 1/ convert the null_map from doris to arrow null byte map 2/ pass the arrow null byteamp to nested column , and call AppendValues **/ -void DataTypeNullableSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeNullableSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { const auto& column_nullable = assert_cast<const ColumnNullable&>(column); - const PaddedPODArray<UInt8>& bytemap = column_nullable.get_null_map_data(); - PaddedPODArray<UInt8> res; - if (column_nullable.has_null()) { - res.reserve(end - start); - for (size_t i = start; i < end; ++i) { - res.emplace_back( - !(bytemap)[i]); //Invert values since Arrow interprets 1 as a non-null value - } - } - const UInt8* arrow_null_bytemap_raw_ptr = res.empty() ? nullptr : res.data(); nested_serde->write_column_to_arrow(column_nullable.get_nested_column(), - arrow_null_bytemap_raw_ptr, array_builder, start, end); + &column_nullable.get_null_map_data(), array_builder, start, + end); } void DataTypeNullableSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 170dc8e572..1c7dc1b8a5 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -43,7 +43,7 @@ public: int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index aac945ebf4..081ad569c3 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -65,30 +65,35 @@ using DORIS_NUMERIC_ARROW_BUILDER = >; template <typename T> -void DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { auto& col_data = assert_cast<const ColumnType&>(column).get_data(); using ARROW_BUILDER_TYPE = typename TypeMapLookup<T, DORIS_NUMERIC_ARROW_BUILDER>::ValueType; + auto arrow_null_map = revert_null_map(null_map, start, end); + auto arrow_null_map_data = arrow_null_map.empty() ? nullptr : arrow_null_map.data(); if constexpr (std::is_same_v<T, UInt8>) { ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder); checkArrowStatus( builder.AppendValues(reinterpret_cast<const uint8_t*>(col_data.data() + start), - end - start, reinterpret_cast<const uint8_t*>(null_map)), + end - start, + reinterpret_cast<const uint8_t*>(arrow_null_map_data)), column.get_name(), array_builder->type()->name()); } else if constexpr (std::is_same_v<T, Int128> || std::is_same_v<T, UInt128>) { ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder); size_t fixed_length = sizeof(typename ColumnType::value_type); const uint8_t* data_start = reinterpret_cast<const uint8_t*>(col_data.data()) + start * fixed_length; - checkArrowStatus(builder.AppendValues(data_start, end - start, - reinterpret_cast<const uint8_t*>(null_map)), - column.get_name(), array_builder->type()->name()); + checkArrowStatus( + builder.AppendValues(data_start, end - start, + reinterpret_cast<const uint8_t*>(arrow_null_map_data)), + column.get_name(), array_builder->type()->name()); } else { ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder); - checkArrowStatus(builder.AppendValues(col_data.data() + start, end - start, - reinterpret_cast<const uint8_t*>(null_map)), - column.get_name(), array_builder->type()->name()); + checkArrowStatus( + builder.AppendValues(col_data.data() + start, end - start, + reinterpret_cast<const uint8_t*>(arrow_null_map_data)), + column.get_name(), array_builder->type()->name()); } } diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index 7e1f22874d..2130d78493 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -63,7 +63,7 @@ public: void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h b/be/src/vec/data_types/serde/data_type_object_serde.h index 6ffbd07eb9..ceb4a20cf2 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.h +++ b/be/src/vec/data_types/serde/data_type_object_serde.h @@ -52,7 +52,7 @@ public: LOG(FATAL) << "Not support write json object to column"; } - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { LOG(FATAL) << "Not support write object column to arrow"; diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h index c9126ab298..ebeccca097 100644 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h +++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h @@ -48,7 +48,7 @@ public: int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { LOG(FATAL) << "Not support write " << column.get_name() << " to arrow"; diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 7064e308c9..4df22029c9 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -26,6 +26,8 @@ #include "common/status.h" #include "util/jsonb_writer.h" #include "util/mysql_row_buffer.h" +#include "vec/columns/column_nullable.h" +#include "vec/common/pod_array.h" #include "vec/common/pod_array_fwd.h" #include "vec/core/types.h" @@ -88,7 +90,7 @@ public: // JSON serializer and deserializer // Arrow serializer and deserializer - virtual void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + virtual void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const = 0; virtual void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, @@ -100,6 +102,20 @@ protected: bool _return_object_as_string = false; }; +/// Invert values since Arrow interprets 1 as a non-null value, while doris as a null +inline static NullMap revert_null_map(const NullMap* null_bytemap, size_t start, size_t end) { + NullMap res; + if (!null_bytemap) { + return res; + } + + res.reserve(end - start); + for (size_t i = start; i < end; ++i) { + res.emplace_back(!(*null_bytemap)[i]); + } + return res; +} + inline void checkArrowStatus(const arrow::Status& status, const std::string& column, const std::string& format_name) { if (!status.ok()) { diff --git a/be/src/vec/data_types/serde/data_type_string_serde.cpp b/be/src/vec/data_types/serde/data_type_string_serde.cpp index 02be5f8072..bff2df4431 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_string_serde.cpp @@ -68,27 +68,20 @@ void DataTypeStringSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV col.insert_data(blob->getBlob(), blob->getBlobLen()); } -void DataTypeStringSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeStringSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { const auto& string_column = assert_cast<const ColumnString&>(column); auto& builder = assert_cast<arrow::StringBuilder&>(*array_builder); for (size_t string_i = start; string_i < end; ++string_i) { - if (null_map && null_map[string_i]) { + if (null_map && (*null_map)[string_i]) { checkArrowStatus(builder.AppendNull(), column.get_name(), array_builder->type()->name()); continue; } std::string_view string_ref = string_column.get_data_at(string_i).to_string_view(); - if (column.get_data_type() == TypeIndex::JSONB) { - std::string json_string = - JsonbToJson::jsonb_to_json_string(string_ref.data(), string_ref.size()); - checkArrowStatus(builder.Append(json_string.data(), json_string.size()), - column.get_name(), array_builder->type()->name()); - } else { - checkArrowStatus(builder.Append(string_ref.data(), string_ref.size()), - column.get_name(), array_builder->type()->name()); - } + checkArrowStatus(builder.Append(string_ref.data(), string_ref.size()), column.get_name(), + array_builder->type()->name()); } } diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 1a8189fc8c..85b60cb268 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -41,7 +41,7 @@ public: void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index 3e384e8140..ed6e9c1b7b 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -17,6 +17,7 @@ #include "data_type_struct_serde.h" +#include "arrow/array/builder_nested.h" #include "util/jsonb_document.h" #include "vec/columns/column.h" #include "vec/columns/column_const.h" @@ -45,16 +46,36 @@ void DataTypeStructSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV column.deserialize_and_insert_from_arena(blob->getBlob()); } -void DataTypeStructSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, +void DataTypeStructSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { - LOG(FATAL) << "Not support write " << column.get_name() << " to arrow"; + auto& builder = assert_cast<arrow::StructBuilder&>(*array_builder); + auto& struct_column = assert_cast<const ColumnStruct&>(column); + for (int r = start; r < end; ++r) { + if (null_map != nullptr && (*null_map)[r]) { + checkArrowStatus(builder.AppendNull(), struct_column.get_name(), + builder.type()->name()); + continue; + } + checkArrowStatus(builder.Append(), struct_column.get_name(), builder.type()->name()); + for (size_t ei = 0; ei < struct_column.tuple_size(); ++ei) { + auto elem_builder = builder.field_builder(ei); + elemSerDeSPtrs[ei]->write_column_to_arrow(struct_column.get_column(ei), nullptr, + elem_builder, r, r + 1); + } + } } void DataTypeStructSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const { - LOG(FATAL) << "Not support read " << column.get_name() << " from arrow"; + auto& struct_column = static_cast<ColumnStruct&>(column); + auto concrete_struct = down_cast<const arrow::StructArray*>(arrow_array); + DCHECK_EQ(struct_column.tuple_size(), concrete_struct->num_fields()); + for (size_t i = 0; i < struct_column.tuple_size(); ++i) { + elemSerDeSPtrs[i]->read_column_from_arrow(struct_column.get_column(i), + concrete_struct->field(i).get(), start, end, ctz); + } } template <bool is_binary_format> diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h b/be/src/vec/data_types/serde/data_type_struct_serde.h index 3b5c183246..927c7ac9ed 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.h +++ b/be/src/vec/data_types/serde/data_type_struct_serde.h @@ -51,7 +51,7 @@ public: void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; - void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp index e7917e7cea..a51cb9bc90 100644 --- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp @@ -52,6 +52,7 @@ #include "util/bitmap_value.h" #include "util/quantile_state.h" #include "vec/columns/column.h" +#include "vec/columns/column_array.h" #include "vec/columns/column_complex.h" #include "vec/columns/column_decimal.h" #include "vec/columns/column_nullable.h" @@ -61,29 +62,40 @@ #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_hll.h" +#include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_quantilestate.h" #include "vec/data_types/data_type_string.h" +#include "vec/data_types/data_type_struct.h" #include "vec/data_types/data_type_time_v2.h" #include "vec/runtime/vdatetime_value.h" #include "vec/utils/arrow_column_to_doris_column.h" namespace doris::vectorized { +template <bool is_scalar> void serialize_and_deserialize_arrow_test() { vectorized::Block block; - std::vector<std::tuple<std::string, FieldType, int, PrimitiveType, bool>> cols { - {"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false}, - {"k7", FieldType::OLAP_FIELD_TYPE_INT, 7, TYPE_INT, true}, - {"k2", FieldType::OLAP_FIELD_TYPE_STRING, 2, TYPE_STRING, false}, - {"k3", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 3, TYPE_DECIMAL128I, false}, - {"k11", FieldType::OLAP_FIELD_TYPE_DATETIME, 11, TYPE_DATETIME, false}, - {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}}; + std::vector<std::tuple<std::string, FieldType, int, PrimitiveType, bool>> cols; + if constexpr (is_scalar) { + cols = {{"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false}, + {"k7", FieldType::OLAP_FIELD_TYPE_INT, 7, TYPE_INT, true}, + {"k2", FieldType::OLAP_FIELD_TYPE_STRING, 2, TYPE_STRING, false}, + {"k3", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 3, TYPE_DECIMAL128I, false}, + {"k11", FieldType::OLAP_FIELD_TYPE_DATETIME, 11, TYPE_DATETIME, false}, + {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}}; + } else { + cols = {{"a", FieldType::OLAP_FIELD_TYPE_ARRAY, 6, TYPE_ARRAY, true}, + {"m", FieldType::OLAP_FIELD_TYPE_MAP, 8, TYPE_MAP, true}, + {"s", FieldType::OLAP_FIELD_TYPE_STRUCT, 5, TYPE_STRUCT, true}}; + } + int row_num = 7; // make desc and generate block TupleDescriptor tuple_desc(PTupleDescriptor(), true); @@ -117,7 +129,11 @@ void serialize_and_deserialize_arrow_test() { vectorized::make_nullable(std::move(column_vector_int32)); auto mutable_nullable_vector = std::move(*column_nullable_vector).mutate(); for (int i = 0; i < row_num; i++) { - mutable_nullable_vector->insert(int32(i)); + if (i % 2 == 0) { + mutable_nullable_vector->insert_default(); + } else { + mutable_nullable_vector->insert(int32(i)); + } } auto data_type = vectorized::make_nullable( std::make_shared<vectorized::DataTypeInt32>()); @@ -239,6 +255,97 @@ void serialize_and_deserialize_arrow_test() { block.insert(test_datetime); } break; + case TYPE_ARRAY: // array + type_desc.add_sub_type(TYPE_STRING, true); + tslot.__set_slotType(type_desc.to_thrift()); + { + DataTypePtr s = + std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()); + DataTypePtr au = std::make_shared<DataTypeArray>(s); + Array a1, a2; + a1.push_back(String("sss")); + a1.push_back(Null()); + a1.push_back(String("clever amory")); + a2.push_back(String("hello amory")); + a2.push_back(Null()); + a2.push_back(String("cute amory")); + a2.push_back(String("sf")); + MutableColumnPtr array_column = au->create_column(); + array_column->reserve(2); + array_column->insert(a1); + array_column->insert(a2); + vectorized::ColumnWithTypeAndName type_and_name(array_column->get_ptr(), au, + col_name); + block.insert(type_and_name); + } + break; + case TYPE_MAP: + type_desc.add_sub_type(TYPE_STRING, true); + type_desc.add_sub_type(TYPE_STRING, true); + tslot.__set_slotType(type_desc.to_thrift()); + { + DataTypePtr s = std::make_shared<DataTypeString>(); + DataTypePtr d = + std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()); + DataTypePtr m = std::make_shared<DataTypeMap>(s, d); + Array k1, k2, v1, v2; + k1.push_back("null"); + k1.push_back("doris"); + k1.push_back("clever amory"); + v1.push_back("ss"); + v1.push_back(Null()); + v1.push_back("NULL"); + k2.push_back("hello amory"); + k2.push_back("NULL"); + k2.push_back("cute amory"); + k2.push_back("doris"); + v2.push_back("s"); + v2.push_back("0"); + v2.push_back("sf"); + v2.push_back(Null()); + Map m1, m2; + m1.push_back(k1); + m1.push_back(v1); + m2.push_back(k2); + m2.push_back(v2); + MutableColumnPtr map_column = m->create_column(); + map_column->reserve(2); + map_column->insert(m1); + map_column->insert(m2); + vectorized::ColumnWithTypeAndName type_and_name(map_column->get_ptr(), m, col_name); + block.insert(type_and_name); + } + break; + case TYPE_STRUCT: + type_desc.add_sub_type(TYPE_STRING, "name", true); + type_desc.add_sub_type(TYPE_LARGEINT, "age", true); + type_desc.add_sub_type(TYPE_BOOLEAN, "is", true); + tslot.__set_slotType(type_desc.to_thrift()); + { + DataTypePtr s = + std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()); + DataTypePtr d = + std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt128>()); + DataTypePtr m = + std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>()); + DataTypePtr st = + std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m}); + Tuple t1, t2; + t1.push_back(String("amory cute")); + t1.push_back(__int128_t(37)); + t1.push_back(true); + t2.push_back("null"); + t2.push_back(__int128_t(26)); + t2.push_back(false); + MutableColumnPtr struct_column = st->create_column(); + struct_column->reserve(2); + struct_column->insert(t1); + struct_column->insert(t2); + vectorized::ColumnWithTypeAndName type_and_name(struct_column->get_ptr(), st, + col_name); + block.insert(type_and_name); + } + break; default: break; } @@ -260,6 +367,8 @@ void serialize_and_deserialize_arrow_test() { convert_to_arrow_batch(block, _arrow_schema, arrow::default_memory_pool(), &result); Block new_block = block.clone_empty(); + EXPECT_TRUE(result != nullptr); + std::cout << "result: " << result->ToString() << std::endl; // deserialize for (auto t : cols) { std::string real_column_name = std::get<0>(t); @@ -315,7 +424,11 @@ void serialize_and_deserialize_arrow_test() { } TEST(DataTypeSerDeArrowTest, DataTypeScalaSerDeTest) { - serialize_and_deserialize_arrow_test(); + serialize_and_deserialize_arrow_test<true>(); +} + +TEST(DataTypeSerDeArrowTest, DataTypeCollectionSerDeTest) { + serialize_and_deserialize_arrow_test<false>(); } } // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org