This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 796c4b38b35a8f068a38541354fd3a37919bb3e1
Author: eldenmoon <lihan...@selectdb.com>
AuthorDate: Mon Nov 25 14:07:00 2024 +0800

    [Fix](Variant) fix some nested explode_variant_array bug and add more test
---
 be/src/vec/exprs/table_function/vexplode.cpp       | 40 ++++++++++++++++++----
 be/src/vec/exprs/table_function/vexplode.h         |  1 +
 .../vec/functions/array/function_array_distance.h  |  4 +--
 .../vec/functions/array/function_array_utils.cpp   | 13 +++++--
 be/src/vec/functions/array/function_array_utils.h  |  6 +++-
 5 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/be/src/vec/exprs/table_function/vexplode.cpp 
b/be/src/vec/exprs/table_function/vexplode.cpp
index feef58cd277..5fa378f6351 100644
--- a/be/src/vec/exprs/table_function/vexplode.cpp
+++ b/be/src/vec/exprs/table_function/vexplode.cpp
@@ -23,12 +23,17 @@
 
 #include "common/status.h"
 #include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_nothing.h"
 #include "vec/columns/column_object.h"
 #include "vec/core/block.h"
 #include "vec/core/column_with_type_and_name.h"
 #include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_nothing.h"
 #include "vec/exprs/vexpr.h"
 #include "vec/exprs/vexpr_context.h"
+#include "vec/functions/function_helpers.h"
 
 namespace doris::vectorized {
 #include "common/compile_check_begin.h"
@@ -37,6 +42,34 @@ VExplodeTableFunction::VExplodeTableFunction() {
     _fn_name = "vexplode";
 }
 
+Status VExplodeTableFunction::_process_init_variant(Block* block, int 
value_column_idx) {
+    // explode variant array
+    const auto& variant_column = check_and_get_column<ColumnObject>(
+            remove_nullable(block->get_by_position(value_column_idx)
+                                    .column->convert_to_full_column_if_const())
+                    .get());
+    _detail.output_as_variant = true;
+    if (!variant_column->is_null_root()) {
+        _array_column = variant_column->get_root();
+        // We need to wrap the output nested column within a variant column.
+        // Otherwise the type is missmatched
+        const auto* array_type = check_and_get_data_type<DataTypeArray>(
+                remove_nullable(variant_column->get_root_type()).get());
+        if (array_type == nullptr) {
+            return Status::NotSupported("explode not support none array type 
{}",
+                                        
variant_column->get_root_type()->get_name());
+        }
+        _detail.nested_type = array_type->get_nested_type();
+    } else {
+        // null root, use nothing type
+        _array_column = 
ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)),
+                                               ColumnUInt8::create(0));
+        
_array_column->assume_mutable()->insert_many_defaults(variant_column->size());
+        _detail.nested_type = std::make_shared<DataTypeNothing>();
+    }
+    return Status::OK();
+}
+
 Status VExplodeTableFunction::process_init(Block* block, RuntimeState* state) {
     CHECK(_expr_context->root()->children().size() == 1)
             << "VExplodeTableFunction only support 1 child but has "
@@ -47,12 +80,7 @@ Status VExplodeTableFunction::process_init(Block* block, 
RuntimeState* state) {
                                                                   
&value_column_idx));
     if 
(WhichDataType(remove_nullable(block->get_by_position(value_column_idx).type))
                 .is_variant_type()) {
-        // explode variant array
-        const auto& variant_column = check_and_get_column<ColumnObject>(
-                remove_nullable(block->get_by_position(value_column_idx)
-                                        
.column->convert_to_full_column_if_const())
-                        .get());
-        _array_column = variant_column->get_root();
+        RETURN_IF_ERROR(_process_init_variant(block, value_column_idx));
     } else {
         _array_column =
                 
block->get_by_position(value_column_idx).column->convert_to_full_column_if_const();
diff --git a/be/src/vec/exprs/table_function/vexplode.h 
b/be/src/vec/exprs/table_function/vexplode.h
index 17b67d07824..7b53926ae2c 100644
--- a/be/src/vec/exprs/table_function/vexplode.h
+++ b/be/src/vec/exprs/table_function/vexplode.h
@@ -47,6 +47,7 @@ public:
     int get_value(MutableColumnPtr& column, int max_step) override;
 
 private:
+    Status _process_init_variant(Block* block, int value_column_idx);
     ColumnPtr _array_column;
     ColumnArrayExecutionData _detail;
     size_t _array_offset; // start offset of array[row_idx]
diff --git a/be/src/vec/functions/array/function_array_distance.h 
b/be/src/vec/functions/array/function_array_distance.h
index e03e52a0ce1..0984479ce34 100644
--- a/be/src/vec/functions/array/function_array_distance.h
+++ b/be/src/vec/functions/array/function_array_distance.h
@@ -128,8 +128,8 @@ public:
 
         const auto& offsets1 = *arr1.offsets_ptr;
         const auto& offsets2 = *arr2.offsets_ptr;
-        const auto& nested_col1 = assert_cast<const 
ColumnFloat64*>(arr1.nested_col);
-        const auto& nested_col2 = assert_cast<const 
ColumnFloat64*>(arr2.nested_col);
+        const auto& nested_col1 = assert_cast<const 
ColumnFloat64*>(arr1.nested_col.get());
+        const auto& nested_col2 = assert_cast<const 
ColumnFloat64*>(arr2.nested_col.get());
         for (ssize_t row = 0; row < offsets1.size(); ++row) {
             if (arr1.array_nullmap_data && arr1.array_nullmap_data[row]) {
                 dst_null_data[row] = true;
diff --git a/be/src/vec/functions/array/function_array_utils.cpp 
b/be/src/vec/functions/array/function_array_utils.cpp
index ab999aa21cc..d25904baf93 100644
--- a/be/src/vec/functions/array/function_array_utils.cpp
+++ b/be/src/vec/functions/array/function_array_utils.cpp
@@ -24,7 +24,9 @@
 
 #include "vec/columns/column.h"
 #include "vec/columns/column_nullable.h"
+#include "vec/columns/column_object.h"
 #include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type.h"
 
 namespace doris::vectorized {
 
@@ -45,12 +47,19 @@ bool extract_column_array_info(const IColumn& src, 
ColumnArrayExecutionData& dat
 
     // extract array offsets and nested column
     data.offsets_ptr = &data.array_col->get_offsets();
-    data.nested_col = &data.array_col->get_data();
+    data.nested_col = data.array_col->get_data_ptr();
     // extract nested column is nullable
     if (data.nested_col->is_nullable()) {
         const auto& nested_null_col = reinterpret_cast<const 
ColumnNullable&>(*data.nested_col);
         data.nested_nullmap_data = nested_null_col.get_null_map_data().data();
-        data.nested_col = nested_null_col.get_nested_column_ptr().get();
+        data.nested_col = nested_null_col.get_nested_column_ptr();
+    }
+    if (data.output_as_variant &&
+        !WhichDataType(remove_nullable(data.nested_type)).is_variant_type()) {
+        // set variant root column/type to from column/type
+        auto variant = ColumnObject::create(true /*always nullable*/);
+        variant->create_root(data.nested_type, 
make_nullable(data.nested_col)->assume_mutable());
+        data.nested_col = variant->get_ptr();
     }
     return true;
 }
diff --git a/be/src/vec/functions/array/function_array_utils.h 
b/be/src/vec/functions/array/function_array_utils.h
index 36bf811b770..0a3149bd60c 100644
--- a/be/src/vec/functions/array/function_array_utils.h
+++ b/be/src/vec/functions/array/function_array_utils.h
@@ -16,6 +16,7 @@
 // under the License.
 #pragma once
 
+#include "vec/columns/column.h"
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/columns_number.h"
@@ -54,7 +55,10 @@ public:
     const ColumnArray* array_col = nullptr;
     const ColumnArray::Offsets64* offsets_ptr = nullptr;
     const UInt8* nested_nullmap_data = nullptr;
-    const IColumn* nested_col = nullptr;
+    ColumnPtr nested_col = nullptr;
+    DataTypePtr nested_type = nullptr;
+    // wrap the nested column as variant column
+    bool output_as_variant = false;
 
     ColumnArrayMutableData to_mutable_data() const {
         ColumnArrayMutableData dst;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to