This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 0eb73c82188708ebc2fd1ff816fc14a1b5c7f7b4
Author: amory <wangqian...@selectdb.com>
AuthorDate: Sat Jul 8 15:51:14 2023 +0800

    [FIX](serde) fix map/struct/array support arrow #21628
    
    support map/struct support arrow format
    fix string arrow format
    fix largeInt 128 for arrow builder
---
 be/src/util/arrow/row_batch.cpp                    |   4 +-
 .../vec/data_types/serde/data_type_array_serde.cpp |   9 +-
 .../vec/data_types/serde/data_type_array_serde.h   |   2 +-
 .../vec/data_types/serde/data_type_bitmap_serde.h  |   2 +-
 .../data_types/serde/data_type_date64_serde.cpp    |   5 +-
 .../vec/data_types/serde/data_type_date64_serde.h  |   2 +-
 .../serde/data_type_datetimev2_serde.cpp           |   4 +-
 .../data_types/serde/data_type_datetimev2_serde.h  |   2 +-
 .../data_types/serde/data_type_datev2_serde.cpp    |   6 +-
 .../vec/data_types/serde/data_type_datev2_serde.h  |   2 +-
 .../data_types/serde/data_type_decimal_serde.cpp   |  10 +-
 .../vec/data_types/serde/data_type_decimal_serde.h |   2 +-
 .../serde/data_type_fixedlengthobject_serde.h      |   2 +-
 .../vec/data_types/serde/data_type_hll_serde.cpp   |   4 +-
 be/src/vec/data_types/serde/data_type_hll_serde.h  |   2 +-
 .../vec/data_types/serde/data_type_jsonb_serde.cpp |  21 ++++
 .../vec/data_types/serde/data_type_jsonb_serde.h   |   3 +
 .../vec/data_types/serde/data_type_map_serde.cpp   |  41 ++++++-
 be/src/vec/data_types/serde/data_type_map_serde.h  |   2 +-
 .../data_types/serde/data_type_nullable_serde.cpp  |  15 +--
 .../data_types/serde/data_type_nullable_serde.h    |   2 +-
 .../data_types/serde/data_type_number_serde.cpp    |  21 ++--
 .../vec/data_types/serde/data_type_number_serde.h  |   2 +-
 .../vec/data_types/serde/data_type_object_serde.h  |   2 +-
 .../serde/data_type_quantilestate_serde.h          |   2 +-
 be/src/vec/data_types/serde/data_type_serde.h      |  18 ++-
 .../data_types/serde/data_type_string_serde.cpp    |  15 +--
 .../vec/data_types/serde/data_type_string_serde.h  |   2 +-
 .../data_types/serde/data_type_struct_serde.cpp    |  27 ++++-
 .../vec/data_types/serde/data_type_struct_serde.h  |   2 +-
 .../serde/data_type_serde_arrow_test.cpp           | 131 +++++++++++++++++++--
 31 files changed, 283 insertions(+), 81 deletions(-)

diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp
index 110977c6e6..78fe346be6 100644
--- a/be/src/util/arrow/row_batch.cpp
+++ b/be/src/util/arrow/row_batch.cpp
@@ -65,10 +65,12 @@ Status convert_to_arrow_type(const TypeDescriptor& type, 
std::shared_ptr<arrow::
     case TYPE_TIME:
         *result = arrow::float64();
         break;
+    case TYPE_LARGEINT:
+        *result = arrow::fixed_size_binary(sizeof(int128_t));
+        break;
     case TYPE_VARCHAR:
     case TYPE_CHAR:
     case TYPE_HLL:
-    case TYPE_LARGEINT:
     case TYPE_DATE:
     case TYPE_DATETIME:
     case TYPE_DATEV2:
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp 
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index f3d0098238..596209c9aa 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -49,7 +49,7 @@ void DataTypeArraySerDe::read_one_cell_from_jsonb(IColumn& 
column, const JsonbVa
     column.deserialize_and_insert_from_arena(blob->getBlob());
 }
 
-void DataTypeArraySerDe::write_column_to_arrow(const IColumn& column, const 
UInt8* null_map,
+void DataTypeArraySerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
                                                arrow::ArrayBuilder* 
array_builder, int start,
                                                int end) const {
     auto& array_column = static_cast<const ColumnArray&>(column);
@@ -58,8 +58,13 @@ void DataTypeArraySerDe::write_column_to_arrow(const 
IColumn& column, const UInt
     auto& builder = assert_cast<arrow::ListBuilder&>(*array_builder);
     auto nested_builder = builder.value_builder();
     for (size_t array_idx = start; array_idx < end; ++array_idx) {
+        if (null_map && (*null_map)[array_idx]) {
+            checkArrowStatus(builder.AppendNull(), column.get_name(),
+                             array_builder->type()->name());
+            continue;
+        }
         checkArrowStatus(builder.Append(), column.get_name(), 
array_builder->type()->name());
-        nested_serde->write_column_to_arrow(nested_data, null_map, 
nested_builder,
+        nested_serde->write_column_to_arrow(nested_data, nullptr, 
nested_builder,
                                             offsets[array_idx - 1], 
offsets[array_idx]);
     }
 }
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h 
b/be/src/vec/data_types/serde/data_type_array_serde.h
index 9576812ff1..37516b45b0 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.h
+++ b/be/src/vec/data_types/serde/data_type_array_serde.h
@@ -51,7 +51,7 @@ public:
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
 
-    void write_column_to_arrow(const IColumn& column, const UInt8* 
null_bytemap,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.h 
b/be/src/vec/data_types/serde/data_type_bitmap_serde.h
index 0825e8f298..01988b50bd 100644
--- a/be/src/vec/data_types/serde/data_type_bitmap_serde.h
+++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.h
@@ -41,7 +41,7 @@ public:
                                  int32_t col_id, int row_num) const override;
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
-    void write_column_to_arrow(const IColumn& column, const UInt8* 
null_bytemap,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override {
         LOG(FATAL) << "Not support write bitmap column to arrow";
diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp 
b/be/src/vec/data_types/serde/data_type_date64_serde.cpp
index 787a01f250..6133d73b08 100644
--- a/be/src/vec/data_types/serde/data_type_date64_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp
@@ -27,7 +27,7 @@
 namespace doris {
 namespace vectorized {
 
-void DataTypeDate64SerDe::write_column_to_arrow(const IColumn& column, const 
UInt8* null_map,
+void DataTypeDate64SerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
                                                 arrow::ArrayBuilder* 
array_builder, int start,
                                                 int end) const {
     auto& col_data = static_cast<const 
ColumnVector<Int64>&>(column).get_data();
@@ -37,7 +37,7 @@ void DataTypeDate64SerDe::write_column_to_arrow(const 
IColumn& column, const UIn
         const vectorized::VecDateTimeValue* time_val =
                 (const vectorized::VecDateTimeValue*)(&col_data[i]);
         int len = time_val->to_buffer(buf);
-        if (null_map && null_map[i]) {
+        if (null_map && (*null_map)[i]) {
             checkArrowStatus(string_builder.AppendNull(), column.get_name(),
                              array_builder->type()->name());
         } else {
@@ -99,7 +99,6 @@ void DataTypeDate64SerDe::read_column_from_arrow(IColumn& 
column, const arrow::A
         auto concrete_array = down_cast<const 
arrow::Date32Array*>(arrow_array);
         multiplier = 24 * 60 * 60; // day => secs
         for (size_t value_i = start; value_i < end; ++value_i) {
-            //            std::cout << "serde : " <<  
concrete_array->Value(value_i) << std::endl;
             VecDateTimeValue v;
             v.from_unixtime(
                     static_cast<Int64>(concrete_array->Value(value_i)) / 
divisor * multiplier, ctz);
diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h 
b/be/src/vec/data_types/serde/data_type_date64_serde.h
index c7f29fc118..94a4618d5c 100644
--- a/be/src/vec/data_types/serde/data_type_date64_serde.h
+++ b/be/src/vec/data_types/serde/data_type_date64_serde.h
@@ -42,7 +42,7 @@ namespace vectorized {
 class Arena;
 
 class DataTypeDate64SerDe : public DataTypeNumberSerDe<Int64> {
-    void write_column_to_arrow(const IColumn& column, const UInt8* 
null_bytemap,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
index 5b912ff79e..6800486dd3 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
@@ -27,7 +27,7 @@
 namespace doris {
 namespace vectorized {
 
-void DataTypeDateTimeV2SerDe::write_column_to_arrow(const IColumn& column, 
const UInt8* null_map,
+void DataTypeDateTimeV2SerDe::write_column_to_arrow(const IColumn& column, 
const NullMap* null_map,
                                                     arrow::ArrayBuilder* 
array_builder, int start,
                                                     int end) const {
     auto& col_data = static_cast<const 
ColumnVector<UInt64>&>(column).get_data();
@@ -37,7 +37,7 @@ void DataTypeDateTimeV2SerDe::write_column_to_arrow(const 
IColumn& column, const
         const vectorized::DateV2Value<vectorized::DateTimeV2ValueType>* 
time_val =
                 (const 
vectorized::DateV2Value<vectorized::DateTimeV2ValueType>*)(&col_data[i]);
         int len = time_val->to_buffer(buf);
-        if (null_map && null_map[i]) {
+        if (null_map && (*null_map)[i]) {
             checkArrowStatus(string_builder.AppendNull(), column.get_name(),
                              array_builder->type()->name());
         } else {
diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h 
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
index a571ac10ab..0b943e6010 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
@@ -44,7 +44,7 @@ class Arena;
 class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe<UInt64> {
 public:
     DataTypeDateTimeV2SerDe(int scale) : scale(scale) {};
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
index fe75eb38cd..a538ec01f4 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
@@ -27,7 +27,7 @@
 namespace doris {
 namespace vectorized {
 
-void DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const 
UInt8* null_map,
+void DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
                                                 arrow::ArrayBuilder* 
array_builder, int start,
                                                 int end) const {
     auto& col_data = static_cast<const 
ColumnVector<UInt32>&>(column).get_data();
@@ -37,7 +37,7 @@ void DataTypeDateV2SerDe::write_column_to_arrow(const 
IColumn& column, const UIn
         const vectorized::DateV2Value<vectorized::DateV2ValueType>* time_val =
                 (const 
vectorized::DateV2Value<vectorized::DateV2ValueType>*)(&col_data[i]);
         int len = time_val->to_buffer(buf);
-        if (null_map && null_map[i]) {
+        if (null_map && (*null_map)[i]) {
             checkArrowStatus(string_builder.AppendNull(), column.get_name(),
                              array_builder->type()->name());
         } else {
@@ -50,8 +50,6 @@ void DataTypeDateV2SerDe::write_column_to_arrow(const 
IColumn& column, const UIn
 void DataTypeDateV2SerDe::read_column_from_arrow(IColumn& column, const 
arrow::Array* arrow_array,
                                                  int start, int end,
                                                  const cctz::time_zone& ctz) 
const {
-    std::cout << "column : " << column.get_name() << " data" << 
getTypeName(column.get_data_type())
-              << " array " << arrow_array->type_id() << std::endl;
     auto& col_data = static_cast<ColumnVector<UInt32>&>(column).get_data();
     auto concrete_array = down_cast<const arrow::Date64Array*>(arrow_array);
     int64_t divisor = 1;
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h 
b/be/src/vec/data_types/serde/data_type_datev2_serde.h
index be435ff80d..5d2baf7704 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.h
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h
@@ -42,7 +42,7 @@ namespace vectorized {
 class Arena;
 
 class DataTypeDateV2SerDe : public DataTypeNumberSerDe<UInt32> {
-    void write_column_to_arrow(const IColumn& column, const UInt8* 
null_bytemap,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp 
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index d4b2de6281..4137dd885c 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -32,7 +32,7 @@ namespace doris {
 namespace vectorized {
 
 template <typename T>
-void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, 
const UInt8* null_map,
+void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, 
const NullMap* null_map,
                                                     arrow::ArrayBuilder* 
array_builder, int start,
                                                     int end) const {
     auto& col = reinterpret_cast<const ColumnDecimal<T>&>(column);
@@ -41,7 +41,7 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const 
IColumn& column, const
         std::shared_ptr<arrow::DataType> s_decimal_ptr =
                 std::make_shared<arrow::Decimal128Type>(27, 9);
         for (size_t i = start; i < end; ++i) {
-            if (null_map && null_map[i]) {
+            if (null_map && (*null_map)[i]) {
                 checkArrowStatus(builder.AppendNull(), column.get_name(),
                                  array_builder->type()->name());
                 continue;
@@ -58,7 +58,7 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const 
IColumn& column, const
         std::shared_ptr<arrow::DataType> s_decimal_ptr =
                 std::make_shared<arrow::Decimal128Type>(38, col.get_scale());
         for (size_t i = start; i < end; ++i) {
-            if (null_map && null_map[i]) {
+            if (null_map && (*null_map)[i]) {
                 checkArrowStatus(builder.AppendNull(), column.get_name(),
                                  array_builder->type()->name());
                 continue;
@@ -75,7 +75,7 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const 
IColumn& column, const
         std::shared_ptr<arrow::DataType> s_decimal_ptr =
                 std::make_shared<arrow::Decimal128Type>(8, col.get_scale());
         for (size_t i = start; i < end; ++i) {
-            if (null_map && null_map[i]) {
+            if (null_map && (*null_map)[i]) {
                 checkArrowStatus(builder.AppendNull(), column.get_name(),
                                  array_builder->type()->name());
                 continue;
@@ -91,7 +91,7 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const 
IColumn& column, const
         std::shared_ptr<arrow::DataType> s_decimal_ptr =
                 std::make_shared<arrow::Decimal128Type>(18, col.get_scale());
         for (size_t i = start; i < end; ++i) {
-            if (null_map && null_map[i]) {
+            if (null_map && (*null_map)[i]) {
                 checkArrowStatus(builder.AppendNull(), column.get_name(),
                                  array_builder->type()->name());
                 continue;
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h 
b/be/src/vec/data_types/serde/data_type_decimal_serde.h
index 60014a99cf..2b64a00eca 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.h
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h
@@ -60,7 +60,7 @@ public:
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
 
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h 
b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h
index 092d2202b0..85c27e7c1c 100644
--- a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h
+++ b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h
@@ -51,7 +51,7 @@ public:
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override {
         LOG(FATAL) << "Not support read from jsonb to FixedLengthObject";
     }
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override {
         LOG(FATAL) << "Not support write FixedLengthObject column to arrow";
diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp 
b/be/src/vec/data_types/serde/data_type_hll_serde.cpp
index 2c32ceaccc..72052d47cf 100644
--- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp
@@ -81,13 +81,13 @@ void DataTypeHLLSerDe::read_one_cell_from_jsonb(IColumn& 
column, const JsonbValu
     col.insert_value(hyper_log_log);
 }
 
-void DataTypeHLLSerDe::write_column_to_arrow(const IColumn& column, const 
UInt8* null_map,
+void DataTypeHLLSerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
                                              arrow::ArrayBuilder* 
array_builder, int start,
                                              int end) const {
     const auto& col = assert_cast<const ColumnHLL&>(column);
     auto& builder = assert_cast<arrow::StringBuilder&>(*array_builder);
     for (size_t string_i = start; string_i < end; ++string_i) {
-        if (null_map && null_map[string_i]) {
+        if (null_map && (*null_map)[string_i]) {
             checkArrowStatus(builder.AppendNull(), column.get_name(),
                              array_builder->type()->name());
         } else {
diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.h 
b/be/src/vec/data_types/serde/data_type_hll_serde.h
index 7decc40134..981b197ca8 100644
--- a/be/src/vec/data_types/serde/data_type_hll_serde.h
+++ b/be/src/vec/data_types/serde/data_type_hll_serde.h
@@ -41,7 +41,7 @@ public:
                                  int32_t col_id, int row_num) const override;
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
index 03d0038ee0..ff42a06c9d 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
@@ -16,6 +16,8 @@
 // under the License.
 
 #include "data_type_jsonb_serde.h"
+
+#include <arrow/array/builder_binary.h>
 namespace doris {
 namespace vectorized {
 
@@ -52,5 +54,24 @@ Status DataTypeJsonbSerDe::write_column_to_mysql(const 
IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
+void DataTypeJsonbSerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
+                                               arrow::ArrayBuilder* 
array_builder, int start,
+                                               int end) const {
+    const auto& string_column = assert_cast<const ColumnString&>(column);
+    auto& builder = assert_cast<arrow::StringBuilder&>(*array_builder);
+    for (size_t string_i = start; string_i < end; ++string_i) {
+        if (null_map && (*null_map)[string_i]) {
+            checkArrowStatus(builder.AppendNull(), column.get_name(),
+                             array_builder->type()->name());
+            continue;
+        }
+        std::string_view string_ref = 
string_column.get_data_at(string_i).to_string_view();
+        std::string json_string =
+                JsonbToJson::jsonb_to_json_string(string_ref.data(), 
string_ref.size());
+        checkArrowStatus(builder.Append(json_string.data(), 
json_string.size()), column.get_name(),
+                         array_builder->type()->name());
+    }
+}
+
 } // namespace vectorized
 } // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
index e2d7b8c7c0..9bf523504c 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
@@ -38,6 +38,9 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe {
                                  int row_idx, bool col_const) const override;
     Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer<false>& 
row_buffer,
                                  int row_idx, bool col_const) const override;
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
+                               arrow::ArrayBuilder* array_builder, int start,
+                               int end) const override;
 
 private:
     template <bool is_binary_format>
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp 
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index 187c3ffc15..fb3eddc4ff 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -17,6 +17,7 @@
 
 #include "data_type_map_serde.h"
 
+#include "arrow/array/builder_nested.h"
 #include "util/jsonb_document.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_const.h"
@@ -43,16 +44,50 @@ void DataTypeMapSerDe::write_one_cell_to_jsonb(const 
IColumn& column, JsonbWrite
     result.writeEndBinary();
 }
 
-void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const 
UInt8* null_map,
+void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
                                              arrow::ArrayBuilder* 
array_builder, int start,
                                              int end) const {
-    LOG(FATAL) << "Not support write " << column.get_name() << " to arrow";
+    auto& builder = assert_cast<arrow::MapBuilder&>(*array_builder);
+    auto& map_column = assert_cast<const ColumnMap&>(column);
+    const IColumn& nested_keys_column = map_column.get_keys();
+    CHECK(!nested_keys_column.is_nullable());
+    const IColumn& nested_values_column = map_column.get_values();
+    auto& offsets = map_column.get_offsets();
+    auto key_builder = builder.key_builder();
+    auto value_builder = builder.item_builder();
+    for (size_t r = start; r < end; ++r) {
+        if (null_map && (*null_map)[r]) {
+            checkArrowStatus(builder.AppendNull(), column.get_name(),
+                             array_builder->type()->name());
+        } else {
+            checkArrowStatus(builder.Append(), column.get_name(), 
array_builder->type()->name());
+            key_serde->write_column_to_arrow(nested_keys_column, nullptr, 
key_builder,
+                                             offsets[r - 1], offsets[r]);
+            value_serde->write_column_to_arrow(nested_values_column, nullptr, 
value_builder,
+                                               offsets[r - 1], offsets[r]);
+        }
+    }
 }
 
 void DataTypeMapSerDe::read_column_from_arrow(IColumn& column, const 
arrow::Array* arrow_array,
                                               int start, int end,
                                               const cctz::time_zone& ctz) 
const {
-    LOG(FATAL) << "Not support read " << column.get_name() << " from arrow";
+    auto& column_map = static_cast<ColumnMap&>(column);
+    auto& offsets_data = column_map.get_offsets();
+    auto concrete_map = down_cast<const arrow::MapArray*>(arrow_array);
+    auto arrow_offsets_array = concrete_map->offsets();
+    auto arrow_offsets = 
down_cast<arrow::Int32Array*>(arrow_offsets_array.get());
+    auto prev_size = offsets_data.back();
+    auto arrow_nested_start_offset = arrow_offsets->Value(start);
+    auto arrow_nested_end_offset = arrow_offsets->Value(end);
+    for (int64_t i = start + 1; i < end + 1; ++i) {
+        // convert to doris offset, start from offsets.back()
+        offsets_data.emplace_back(prev_size + arrow_offsets->Value(i) - 
arrow_nested_start_offset);
+    }
+    key_serde->read_column_from_arrow(column_map.get_keys(), 
concrete_map->keys().get(),
+                                      arrow_nested_start_offset, 
arrow_nested_end_offset, ctz);
+    value_serde->read_column_from_arrow(column_map.get_values(), 
concrete_map->items().get(),
+                                        arrow_nested_start_offset, 
arrow_nested_end_offset, ctz);
 }
 
 template <bool is_binary_format>
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h 
b/be/src/vec/data_types/serde/data_type_map_serde.h
index f41f75e1e8..67474d0676 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.h
+++ b/be/src/vec/data_types/serde/data_type_map_serde.h
@@ -50,7 +50,7 @@ public:
                                  int32_t col_id, int row_num) const override;
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp 
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index f2938450aa..654890f501 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -101,22 +101,13 @@ void 
DataTypeNullableSerDe::read_one_cell_from_jsonb(IColumn& column, const Json
    1/ convert the null_map from doris to arrow null byte map
    2/ pass the arrow null byteamp to nested column , and call AppendValues
 **/
-void DataTypeNullableSerDe::write_column_to_arrow(const IColumn& column, const 
UInt8* null_map,
+void DataTypeNullableSerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
                                                   arrow::ArrayBuilder* 
array_builder, int start,
                                                   int end) const {
     const auto& column_nullable = assert_cast<const ColumnNullable&>(column);
-    const PaddedPODArray<UInt8>& bytemap = column_nullable.get_null_map_data();
-    PaddedPODArray<UInt8> res;
-    if (column_nullable.has_null()) {
-        res.reserve(end - start);
-        for (size_t i = start; i < end; ++i) {
-            res.emplace_back(
-                    !(bytemap)[i]); //Invert values since Arrow interprets 1 
as a non-null value
-        }
-    }
-    const UInt8* arrow_null_bytemap_raw_ptr = res.empty() ? nullptr : 
res.data();
     nested_serde->write_column_to_arrow(column_nullable.get_nested_column(),
-                                        arrow_null_bytemap_raw_ptr, 
array_builder, start, end);
+                                        &column_nullable.get_null_map_data(), 
array_builder, start,
+                                        end);
 }
 
 void DataTypeNullableSerDe::read_column_from_arrow(IColumn& column, const 
arrow::Array* arrow_array,
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h 
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index 170dc8e572..1c7dc1b8a5 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -43,7 +43,7 @@ public:
                                  int32_t col_id, int row_num) const override;
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index aac945ebf4..081ad569c3 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -65,30 +65,35 @@ using DORIS_NUMERIC_ARROW_BUILDER =
                 >;
 
 template <typename T>
-void DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, 
const UInt8* null_map,
+void DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, 
const NullMap* null_map,
                                                    arrow::ArrayBuilder* 
array_builder, int start,
                                                    int end) const {
     auto& col_data = assert_cast<const ColumnType&>(column).get_data();
     using ARROW_BUILDER_TYPE = typename TypeMapLookup<T, 
DORIS_NUMERIC_ARROW_BUILDER>::ValueType;
+    auto arrow_null_map = revert_null_map(null_map, start, end);
+    auto arrow_null_map_data = arrow_null_map.empty() ? nullptr : 
arrow_null_map.data();
     if constexpr (std::is_same_v<T, UInt8>) {
         ARROW_BUILDER_TYPE& builder = 
assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
         checkArrowStatus(
                 builder.AppendValues(reinterpret_cast<const 
uint8_t*>(col_data.data() + start),
-                                     end - start, reinterpret_cast<const 
uint8_t*>(null_map)),
+                                     end - start,
+                                     reinterpret_cast<const 
uint8_t*>(arrow_null_map_data)),
                 column.get_name(), array_builder->type()->name());
     } else if constexpr (std::is_same_v<T, Int128> || std::is_same_v<T, 
UInt128>) {
         ARROW_BUILDER_TYPE& builder = 
assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
         size_t fixed_length = sizeof(typename ColumnType::value_type);
         const uint8_t* data_start =
                 reinterpret_cast<const uint8_t*>(col_data.data()) + start * 
fixed_length;
-        checkArrowStatus(builder.AppendValues(data_start, end - start,
-                                              reinterpret_cast<const 
uint8_t*>(null_map)),
-                         column.get_name(), array_builder->type()->name());
+        checkArrowStatus(
+                builder.AppendValues(data_start, end - start,
+                                     reinterpret_cast<const 
uint8_t*>(arrow_null_map_data)),
+                column.get_name(), array_builder->type()->name());
     } else {
         ARROW_BUILDER_TYPE& builder = 
assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
-        checkArrowStatus(builder.AppendValues(col_data.data() + start, end - 
start,
-                                              reinterpret_cast<const 
uint8_t*>(null_map)),
-                         column.get_name(), array_builder->type()->name());
+        checkArrowStatus(
+                builder.AppendValues(col_data.data() + start, end - start,
+                                     reinterpret_cast<const 
uint8_t*>(arrow_null_map_data)),
+                column.get_name(), array_builder->type()->name());
     }
 }
 
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h 
b/be/src/vec/data_types/serde/data_type_number_serde.h
index 7e1f22874d..2130d78493 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -63,7 +63,7 @@ public:
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
 
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h 
b/be/src/vec/data_types/serde/data_type_object_serde.h
index 6ffbd07eb9..ceb4a20cf2 100644
--- a/be/src/vec/data_types/serde/data_type_object_serde.h
+++ b/be/src/vec/data_types/serde/data_type_object_serde.h
@@ -52,7 +52,7 @@ public:
         LOG(FATAL) << "Not support write json object to column";
     }
 
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override {
         LOG(FATAL) << "Not support write object column to arrow";
diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h 
b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h
index c9126ab298..ebeccca097 100644
--- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h
+++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h
@@ -48,7 +48,7 @@ public:
                                  int32_t col_id, int row_num) const override;
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override {
         LOG(FATAL) << "Not support write " << column.get_name() << " to arrow";
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index 7064e308c9..4df22029c9 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -26,6 +26,8 @@
 #include "common/status.h"
 #include "util/jsonb_writer.h"
 #include "util/mysql_row_buffer.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/pod_array.h"
 #include "vec/common/pod_array_fwd.h"
 #include "vec/core/types.h"
 
@@ -88,7 +90,7 @@ public:
     // JSON serializer and deserializer
 
     // Arrow serializer and deserializer
-    virtual void write_column_to_arrow(const IColumn& column, const UInt8* 
null_map,
+    virtual void write_column_to_arrow(const IColumn& column, const NullMap* 
null_map,
                                        arrow::ArrayBuilder* array_builder, int 
start,
                                        int end) const = 0;
     virtual void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
@@ -100,6 +102,20 @@ protected:
     bool _return_object_as_string = false;
 };
 
+/// Invert values since Arrow interprets 1 as a non-null value, while doris as 
a null
+inline static NullMap revert_null_map(const NullMap* null_bytemap, size_t 
start, size_t end) {
+    NullMap res;
+    if (!null_bytemap) {
+        return res;
+    }
+
+    res.reserve(end - start);
+    for (size_t i = start; i < end; ++i) {
+        res.emplace_back(!(*null_bytemap)[i]);
+    }
+    return res;
+}
+
 inline void checkArrowStatus(const arrow::Status& status, const std::string& 
column,
                              const std::string& format_name) {
     if (!status.ok()) {
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.cpp 
b/be/src/vec/data_types/serde/data_type_string_serde.cpp
index 02be5f8072..bff2df4431 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_string_serde.cpp
@@ -68,27 +68,20 @@ void DataTypeStringSerDe::read_one_cell_from_jsonb(IColumn& 
column, const JsonbV
     col.insert_data(blob->getBlob(), blob->getBlobLen());
 }
 
-void DataTypeStringSerDe::write_column_to_arrow(const IColumn& column, const 
UInt8* null_map,
+void DataTypeStringSerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
                                                 arrow::ArrayBuilder* 
array_builder, int start,
                                                 int end) const {
     const auto& string_column = assert_cast<const ColumnString&>(column);
     auto& builder = assert_cast<arrow::StringBuilder&>(*array_builder);
     for (size_t string_i = start; string_i < end; ++string_i) {
-        if (null_map && null_map[string_i]) {
+        if (null_map && (*null_map)[string_i]) {
             checkArrowStatus(builder.AppendNull(), column.get_name(),
                              array_builder->type()->name());
             continue;
         }
         std::string_view string_ref = 
string_column.get_data_at(string_i).to_string_view();
-        if (column.get_data_type() == TypeIndex::JSONB) {
-            std::string json_string =
-                    JsonbToJson::jsonb_to_json_string(string_ref.data(), 
string_ref.size());
-            checkArrowStatus(builder.Append(json_string.data(), 
json_string.size()),
-                             column.get_name(), array_builder->type()->name());
-        } else {
-            checkArrowStatus(builder.Append(string_ref.data(), 
string_ref.size()),
-                             column.get_name(), array_builder->type()->name());
-        }
+        checkArrowStatus(builder.Append(string_ref.data(), string_ref.size()), 
column.get_name(),
+                         array_builder->type()->name());
     }
 }
 
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h 
b/be/src/vec/data_types/serde/data_type_string_serde.h
index 1a8189fc8c..85b60cb268 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -41,7 +41,7 @@ public:
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
 
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp 
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index 3e384e8140..ed6e9c1b7b 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -17,6 +17,7 @@
 
 #include "data_type_struct_serde.h"
 
+#include "arrow/array/builder_nested.h"
 #include "util/jsonb_document.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_const.h"
@@ -45,16 +46,36 @@ void DataTypeStructSerDe::read_one_cell_from_jsonb(IColumn& 
column, const JsonbV
     column.deserialize_and_insert_from_arena(blob->getBlob());
 }
 
-void DataTypeStructSerDe::write_column_to_arrow(const IColumn& column, const 
UInt8* null_map,
+void DataTypeStructSerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
                                                 arrow::ArrayBuilder* 
array_builder, int start,
                                                 int end) const {
-    LOG(FATAL) << "Not support write " << column.get_name() << " to arrow";
+    auto& builder = assert_cast<arrow::StructBuilder&>(*array_builder);
+    auto& struct_column = assert_cast<const ColumnStruct&>(column);
+    for (int r = start; r < end; ++r) {
+        if (null_map != nullptr && (*null_map)[r]) {
+            checkArrowStatus(builder.AppendNull(), struct_column.get_name(),
+                             builder.type()->name());
+            continue;
+        }
+        checkArrowStatus(builder.Append(), struct_column.get_name(), 
builder.type()->name());
+        for (size_t ei = 0; ei < struct_column.tuple_size(); ++ei) {
+            auto elem_builder = builder.field_builder(ei);
+            
elemSerDeSPtrs[ei]->write_column_to_arrow(struct_column.get_column(ei), nullptr,
+                                                      elem_builder, r, r + 1);
+        }
+    }
 }
 
 void DataTypeStructSerDe::read_column_from_arrow(IColumn& column, const 
arrow::Array* arrow_array,
                                                  int start, int end,
                                                  const cctz::time_zone& ctz) 
const {
-    LOG(FATAL) << "Not support read " << column.get_name() << " from arrow";
+    auto& struct_column = static_cast<ColumnStruct&>(column);
+    auto concrete_struct = down_cast<const arrow::StructArray*>(arrow_array);
+    DCHECK_EQ(struct_column.tuple_size(), concrete_struct->num_fields());
+    for (size_t i = 0; i < struct_column.tuple_size(); ++i) {
+        elemSerDeSPtrs[i]->read_column_from_arrow(struct_column.get_column(i),
+                                                  
concrete_struct->field(i).get(), start, end, ctz);
+    }
 }
 
 template <bool is_binary_format>
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h 
b/be/src/vec/data_types/serde/data_type_struct_serde.h
index 3b5c183246..927c7ac9ed 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.h
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.h
@@ -51,7 +51,7 @@ public:
 
     void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) 
const override;
 
-    void write_column_to_arrow(const IColumn& column, const UInt8* null_map,
+    void write_column_to_arrow(const IColumn& column, const NullMap* null_map,
                                arrow::ArrayBuilder* array_builder, int start,
                                int end) const override;
     void read_column_from_arrow(IColumn& column, const arrow::Array* 
arrow_array, int start,
diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp 
b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
index e7917e7cea..a51cb9bc90 100644
--- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
@@ -52,6 +52,7 @@
 #include "util/bitmap_value.h"
 #include "util/quantile_state.h"
 #include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
 #include "vec/columns/column_complex.h"
 #include "vec/columns/column_decimal.h"
 #include "vec/columns/column_nullable.h"
@@ -61,29 +62,40 @@
 #include "vec/core/field.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
 #include "vec/data_types/data_type_bitmap.h"
 #include "vec/data_types/data_type_date.h"
 #include "vec/data_types/data_type_date_time.h"
 #include "vec/data_types/data_type_decimal.h"
 #include "vec/data_types/data_type_hll.h"
+#include "vec/data_types/data_type_map.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_quantilestate.h"
 #include "vec/data_types/data_type_string.h"
+#include "vec/data_types/data_type_struct.h"
 #include "vec/data_types/data_type_time_v2.h"
 #include "vec/runtime/vdatetime_value.h"
 #include "vec/utils/arrow_column_to_doris_column.h"
 namespace doris::vectorized {
 
+template <bool is_scalar>
 void serialize_and_deserialize_arrow_test() {
     vectorized::Block block;
-    std::vector<std::tuple<std::string, FieldType, int, PrimitiveType, bool>> 
cols {
-            {"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false},
-            {"k7", FieldType::OLAP_FIELD_TYPE_INT, 7, TYPE_INT, true},
-            {"k2", FieldType::OLAP_FIELD_TYPE_STRING, 2, TYPE_STRING, false},
-            {"k3", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 3, 
TYPE_DECIMAL128I, false},
-            {"k11", FieldType::OLAP_FIELD_TYPE_DATETIME, 11, TYPE_DATETIME, 
false},
-            {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}};
+    std::vector<std::tuple<std::string, FieldType, int, PrimitiveType, bool>> 
cols;
+    if constexpr (is_scalar) {
+        cols = {{"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false},
+                {"k7", FieldType::OLAP_FIELD_TYPE_INT, 7, TYPE_INT, true},
+                {"k2", FieldType::OLAP_FIELD_TYPE_STRING, 2, TYPE_STRING, 
false},
+                {"k3", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 3, 
TYPE_DECIMAL128I, false},
+                {"k11", FieldType::OLAP_FIELD_TYPE_DATETIME, 11, 
TYPE_DATETIME, false},
+                {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, 
false}};
+    } else {
+        cols = {{"a", FieldType::OLAP_FIELD_TYPE_ARRAY, 6, TYPE_ARRAY, true},
+                {"m", FieldType::OLAP_FIELD_TYPE_MAP, 8, TYPE_MAP, true},
+                {"s", FieldType::OLAP_FIELD_TYPE_STRUCT, 5, TYPE_STRUCT, 
true}};
+    }
+
     int row_num = 7;
     // make desc and generate block
     TupleDescriptor tuple_desc(PTupleDescriptor(), true);
@@ -117,7 +129,11 @@ void serialize_and_deserialize_arrow_test() {
                             
vectorized::make_nullable(std::move(column_vector_int32));
                     auto mutable_nullable_vector = 
std::move(*column_nullable_vector).mutate();
                     for (int i = 0; i < row_num; i++) {
-                        mutable_nullable_vector->insert(int32(i));
+                        if (i % 2 == 0) {
+                            mutable_nullable_vector->insert_default();
+                        } else {
+                            mutable_nullable_vector->insert(int32(i));
+                        }
                     }
                     auto data_type = vectorized::make_nullable(
                             std::make_shared<vectorized::DataTypeInt32>());
@@ -239,6 +255,97 @@ void serialize_and_deserialize_arrow_test() {
                 block.insert(test_datetime);
             }
             break;
+        case TYPE_ARRAY: // array
+            type_desc.add_sub_type(TYPE_STRING, true);
+            tslot.__set_slotType(type_desc.to_thrift());
+            {
+                DataTypePtr s =
+                        
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+                DataTypePtr au = std::make_shared<DataTypeArray>(s);
+                Array a1, a2;
+                a1.push_back(String("sss"));
+                a1.push_back(Null());
+                a1.push_back(String("clever amory"));
+                a2.push_back(String("hello amory"));
+                a2.push_back(Null());
+                a2.push_back(String("cute amory"));
+                a2.push_back(String("sf"));
+                MutableColumnPtr array_column = au->create_column();
+                array_column->reserve(2);
+                array_column->insert(a1);
+                array_column->insert(a2);
+                vectorized::ColumnWithTypeAndName 
type_and_name(array_column->get_ptr(), au,
+                                                                col_name);
+                block.insert(type_and_name);
+            }
+            break;
+        case TYPE_MAP:
+            type_desc.add_sub_type(TYPE_STRING, true);
+            type_desc.add_sub_type(TYPE_STRING, true);
+            tslot.__set_slotType(type_desc.to_thrift());
+            {
+                DataTypePtr s = std::make_shared<DataTypeString>();
+                DataTypePtr d =
+                        
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+                DataTypePtr m = std::make_shared<DataTypeMap>(s, d);
+                Array k1, k2, v1, v2;
+                k1.push_back("null");
+                k1.push_back("doris");
+                k1.push_back("clever amory");
+                v1.push_back("ss");
+                v1.push_back(Null());
+                v1.push_back("NULL");
+                k2.push_back("hello amory");
+                k2.push_back("NULL");
+                k2.push_back("cute amory");
+                k2.push_back("doris");
+                v2.push_back("s");
+                v2.push_back("0");
+                v2.push_back("sf");
+                v2.push_back(Null());
+                Map m1, m2;
+                m1.push_back(k1);
+                m1.push_back(v1);
+                m2.push_back(k2);
+                m2.push_back(v2);
+                MutableColumnPtr map_column = m->create_column();
+                map_column->reserve(2);
+                map_column->insert(m1);
+                map_column->insert(m2);
+                vectorized::ColumnWithTypeAndName 
type_and_name(map_column->get_ptr(), m, col_name);
+                block.insert(type_and_name);
+            }
+            break;
+        case TYPE_STRUCT:
+            type_desc.add_sub_type(TYPE_STRING, "name", true);
+            type_desc.add_sub_type(TYPE_LARGEINT, "age", true);
+            type_desc.add_sub_type(TYPE_BOOLEAN, "is", true);
+            tslot.__set_slotType(type_desc.to_thrift());
+            {
+                DataTypePtr s =
+                        
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+                DataTypePtr d =
+                        
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt128>());
+                DataTypePtr m =
+                        
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
+                DataTypePtr st =
+                        
std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m});
+                Tuple t1, t2;
+                t1.push_back(String("amory cute"));
+                t1.push_back(__int128_t(37));
+                t1.push_back(true);
+                t2.push_back("null");
+                t2.push_back(__int128_t(26));
+                t2.push_back(false);
+                MutableColumnPtr struct_column = st->create_column();
+                struct_column->reserve(2);
+                struct_column->insert(t1);
+                struct_column->insert(t2);
+                vectorized::ColumnWithTypeAndName 
type_and_name(struct_column->get_ptr(), st,
+                                                                col_name);
+                block.insert(type_and_name);
+            }
+            break;
         default:
             break;
         }
@@ -260,6 +367,8 @@ void serialize_and_deserialize_arrow_test() {
 
     convert_to_arrow_batch(block, _arrow_schema, arrow::default_memory_pool(), 
&result);
     Block new_block = block.clone_empty();
+    EXPECT_TRUE(result != nullptr);
+    std::cout << "result: " << result->ToString() << std::endl;
     // deserialize
     for (auto t : cols) {
         std::string real_column_name = std::get<0>(t);
@@ -315,7 +424,11 @@ void serialize_and_deserialize_arrow_test() {
 }
 
 TEST(DataTypeSerDeArrowTest, DataTypeScalaSerDeTest) {
-    serialize_and_deserialize_arrow_test();
+    serialize_and_deserialize_arrow_test<true>();
+}
+
+TEST(DataTypeSerDeArrowTest, DataTypeCollectionSerDeTest) {
+    serialize_and_deserialize_arrow_test<false>();
 }
 
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to