This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3205ac9544d [opt](function) Merge the implementation of the posexplode 
function into the explode function (#54845)
3205ac9544d is described below

commit 3205ac9544d2a4c2227b5c6b10dba0fee7de86c2
Author: Jerry Hu <[email protected]>
AuthorDate: Tue Aug 19 10:10:40 2025 +0800

    [opt](function) Merge the implementation of the posexplode function into 
the explode function (#54845)
    
    ### What problem does this PR solve?
    
    By deleting `VPosExplodeTableFunction`.
    
    Doc: https://github.com/apache/doris-website/pull/2755
---
 .../table_function/table_function_factory.cpp      |   7 +-
 be/src/vec/exprs/table_function/vexplode_v2.cpp    |  24 +++-
 be/src/vec/exprs/table_function/vexplode_v2.h      |   9 +-
 be/src/vec/exprs/table_function/vposexplode.cpp    | 154 ---------------------
 be/src/vec/exprs/table_function/vposexplode.h      |  50 -------
 be/src/vec/functions/function_fake.cpp             |   2 +-
 .../functions/generator/PosExplode.java            |   2 +-
 .../functions/generator/PosExplodeOuter.java       |  10 +-
 8 files changed, 39 insertions(+), 219 deletions(-)

diff --git a/be/src/vec/exprs/table_function/table_function_factory.cpp 
b/be/src/vec/exprs/table_function/table_function_factory.cpp
index 88005cf8b9f..310e034e0c2 100644
--- a/be/src/vec/exprs/table_function/table_function_factory.cpp
+++ b/be/src/vec/exprs/table_function/table_function_factory.cpp
@@ -33,7 +33,6 @@
 #include "vec/exprs/table_function/vexplode_numbers.h"
 #include "vec/exprs/table_function/vexplode_split.h"
 #include "vec/exprs/table_function/vexplode_v2.h"
-#include "vec/exprs/table_function/vposexplode.h"
 #include "vec/utils/util.hpp"
 
 namespace doris::vectorized {
@@ -52,7 +51,7 @@ const std::unordered_map<std::string, 
std::function<std::unique_ptr<TableFunctio
                 {"explode_bitmap", 
TableFunctionCreator<VExplodeBitmapTableFunction>()},
                 {"explode_map", TableFunctionCreator<VExplodeMapTableFunction> 
{}},
                 {"explode_json_object", 
TableFunctionCreator<VExplodeJsonObjectTableFunction> {}},
-                {"posexplode", TableFunctionCreator<VPosExplodeTableFunction> 
{}},
+                {"posexplode", TableFunctionCreator<VExplodeV2TableFunction> 
{}},
                 {"explode", TableFunctionCreator<VExplodeV2TableFunction> {}},
                 {"explode_variant_array_old", 
TableFunctionCreator<VExplodeTableFunction>()},
                 {"explode_old", TableFunctionCreator<VExplodeTableFunction> 
{}}};
@@ -84,6 +83,10 @@ Status TableFunctionFactory::get_fn(const TFunction& t_fn, 
ObjectPool* pool, Tab
                 (*fn)->set_outer();
             }
 
+            if (fn_name_real_temp == "posexplode") {
+                
static_cast<VExplodeV2TableFunction*>(*fn)->set_generate_row_index(true);
+            }
+
             return Status::OK();
         }
     }
diff --git a/be/src/vec/exprs/table_function/vexplode_v2.cpp 
b/be/src/vec/exprs/table_function/vexplode_v2.cpp
index 5918dcd69ab..1a40bc1feda 100644
--- a/be/src/vec/exprs/table_function/vexplode_v2.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_v2.cpp
@@ -19,9 +19,12 @@
 
 #include <glog/logging.h>
 
+#include <algorithm>
+#include <cstdint>
 #include <ostream>
 
 #include "common/status.h"
+#include "runtime/primitive_type.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_nothing.h"
@@ -112,9 +115,7 @@ void VExplodeV2TableFunction::process_row(size_t row_idx) {
             _array_offsets[i] = (*detail.offsets_ptr)[row_idx - 1];
             // find max size in array
             auto cur_size = (*detail.offsets_ptr)[row_idx] - _array_offsets[i];
-            if (_cur_size < cur_size) {
-                _cur_size = cur_size;
-            }
+            _cur_size = std::max<unsigned long>(_cur_size, cur_size);
         }
     }
     _row_idx = row_idx;
@@ -146,11 +147,17 @@ void 
VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int
         throw Exception(ErrorCode::INTERNAL_ERROR,
                         "Only multiple columns can be returned within a 
struct.");
     }
+
+    if (_generate_row_index) {
+        auto& pos_column = 
assert_cast<ColumnInt32&>(struct_column->get_column(0));
+        pos_column.insert_many_vals(static_cast<int32_t>(_cur_offset), length);
+    }
+
     for (int i = 0; i < _multi_detail.size(); i++) {
         auto& detail = _multi_detail[i];
         size_t pos = _array_offsets[i] + _cur_offset;
         size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
-        auto& struct_field = struct_column->get_column(i);
+        auto& struct_field = struct_column->get_column(i + 
(_generate_row_index ? 1 : 0));
         if ((detail.array_nullmap_data && 
detail.array_nullmap_data[_row_idx])) {
             struct_field.insert_many_defaults(length);
         } else {
@@ -192,11 +199,18 @@ int VExplodeV2TableFunction::get_value(MutableColumnPtr& 
column, int max_step) {
             throw Exception(ErrorCode::INTERNAL_ERROR,
                             "Only multiple columns can be returned within a 
struct.");
         }
+
+        if (_generate_row_index) {
+            auto& pos_column = 
assert_cast<ColumnInt32&>(struct_column->get_column(0));
+            
pos_column.insert_range_of_integer(static_cast<int32_t>(_cur_offset),
+                                               
static_cast<int32_t>(_cur_offset + max_step));
+        }
+
         for (int i = 0; i < _multi_detail.size(); i++) {
             auto& detail = _multi_detail[i];
             size_t pos = _array_offsets[i] + _cur_offset;
             size_t element_size = 
_multi_detail[i].array_col->size_at(_row_idx);
-            auto& struct_field = struct_column->get_column(i);
+            auto& struct_field = struct_column->get_column(i + 
(_generate_row_index ? 1 : 0));
             if (detail.array_nullmap_data && 
detail.array_nullmap_data[_row_idx]) {
                 struct_field.insert_many_defaults(max_step);
             } else {
diff --git a/be/src/vec/exprs/table_function/vexplode_v2.h 
b/be/src/vec/exprs/table_function/vexplode_v2.h
index 20853f1ad18..f4b6bddbf42 100644
--- a/be/src/vec/exprs/table_function/vexplode_v2.h
+++ b/be/src/vec/exprs/table_function/vexplode_v2.h
@@ -46,12 +46,19 @@ public:
     void get_same_many_values(MutableColumnPtr& column, int length) override;
     int get_value(MutableColumnPtr& column, int max_step) override;
 
+    void set_generate_row_index(bool generate_row_index) {
+        _generate_row_index = generate_row_index;
+    }
+
 private:
     Status _process_init_variant(Block* block, int value_column_idx, int 
children_column_idx);
     std::vector<ColumnPtr> _array_columns;
-    size_t _row_idx;
+    size_t _row_idx {0};
     ColumnArrayExecutionDatas _multi_detail;
     std::vector<size_t> _array_offsets;
+
+    // `posexplode` & `posexplode_outer`
+    bool _generate_row_index {false};
 };
 
 #include "common/compile_check_end.h"
diff --git a/be/src/vec/exprs/table_function/vposexplode.cpp 
b/be/src/vec/exprs/table_function/vposexplode.cpp
deleted file mode 100644
index 53a95602737..00000000000
--- a/be/src/vec/exprs/table_function/vposexplode.cpp
+++ /dev/null
@@ -1,154 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "vec/exprs/table_function/vposexplode.h"
-
-#include <glog/logging.h>
-
-#include <ostream>
-#include <vector>
-
-#include "common/status.h"
-#include "vec/columns/column.h"
-#include "vec/columns/column_nullable.h"
-#include "vec/common/assert_cast.h"
-#include "vec/common/string_ref.h"
-#include "vec/core/block.h"
-#include "vec/core/column_with_type_and_name.h"
-#include "vec/exprs/vexpr.h"
-#include "vec/exprs/vexpr_context.h"
-
-namespace doris::vectorized {
-
-VPosExplodeTableFunction::VPosExplodeTableFunction() {
-    _fn_name = "posexplode";
-}
-
-Status VPosExplodeTableFunction::process_init(Block* block, RuntimeState* 
state) {
-    CHECK(_expr_context->root()->children().size() == 1)
-            << "VPosExplodeTableFunction only support 1 child but has "
-            << _expr_context->root()->children().size();
-
-    int value_column_idx = -1;
-    
RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(),
 block,
-                                                                  
&value_column_idx));
-
-    _collection_column =
-            
block->get_by_position(value_column_idx).column->convert_to_full_column_if_const();
-
-    if (!extract_column_array_info(*_collection_column, _array_detail)) {
-        return Status::NotSupported("column type {} not supported now, only 
support array",
-                                    
block->get_by_position(value_column_idx).column->get_name());
-    }
-    if (is_column_nullable(*_collection_column)) {
-        _array_data_column =
-                assert_cast<const ColumnArray&>(
-                        assert_cast<const 
ColumnNullable&>(*_collection_column).get_nested_column())
-                        .get_data_ptr();
-    } else {
-        _array_data_column = assert_cast<const 
ColumnArray&>(*_collection_column).get_data_ptr();
-    }
-    return Status::OK();
-}
-
-void VPosExplodeTableFunction::process_row(size_t row_idx) {
-    DCHECK(row_idx < _collection_column->size());
-    TableFunction::process_row(row_idx);
-
-    if (!_array_detail.array_nullmap_data || 
!_array_detail.array_nullmap_data[row_idx]) {
-        _collection_offset = (*_array_detail.offsets_ptr)[row_idx - 1];
-        _cur_size = (*_array_detail.offsets_ptr)[row_idx] - _collection_offset;
-    }
-}
-
-void VPosExplodeTableFunction::process_close() {
-    _collection_column = nullptr;
-    _array_data_column = nullptr;
-    _array_detail.reset();
-    _collection_offset = 0;
-}
-
-void VPosExplodeTableFunction::get_same_many_values(MutableColumnPtr& column, 
int length) {
-    // now we only support array column explode to struct column
-    size_t pos = _collection_offset + _cur_offset;
-    // if current is empty array row, also append a default value
-    if (current_empty()) {
-        column->insert_many_defaults(length);
-        return;
-    }
-    ColumnStruct* ret = nullptr;
-    // this _is_nullable is whole output column's nullable
-    if (_is_nullable) {
-        ret = assert_cast<ColumnStruct*>(
-                
assert_cast<ColumnNullable*>(column.get())->get_nested_column_ptr().get());
-        assert_cast<ColumnUInt8*>(
-                
assert_cast<ColumnNullable*>(column.get())->get_null_map_column_ptr().get())
-                ->insert_many_defaults(length);
-    } else if (is_column<ColumnStruct>(column.get())) {
-        ret = assert_cast<ColumnStruct*>(column.get());
-    } else {
-        throw Exception(ErrorCode::INTERNAL_ERROR,
-                        "only support array column explode to struct column");
-    }
-    if (!ret || ret->tuple_size() != 2) {
-        throw Exception(
-                ErrorCode::INTERNAL_ERROR,
-                "only support array column explode to two column, but given:  
", ret->tuple_size());
-    }
-    auto& pose_column_nullable = 
assert_cast<ColumnNullable&>(ret->get_column(0));
-    pose_column_nullable.get_null_map_column().insert_many_defaults(length);
-    assert_cast<ColumnInt32&>(pose_column_nullable.get_nested_column())
-            .insert_many_vals(_cur_offset, length);
-    ret->get_column(1).insert_many_from(*_array_data_column, pos, length);
-}
-
-int VPosExplodeTableFunction::get_value(MutableColumnPtr& column, int 
max_step) {
-    max_step = std::min(max_step, (int)(_cur_size - _cur_offset));
-    size_t pos = _collection_offset + _cur_offset;
-    if (current_empty()) {
-        column->insert_default();
-        max_step = 1;
-    } else {
-        ColumnStruct* struct_column = nullptr;
-        if (_is_nullable) {
-            auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
-            struct_column =
-                    
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
-            auto* nullmap_column =
-                    
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
-            // here nullmap_column insert max_step many defaults as if 
array[row_idx] is NULL
-            // will be not update value, _cur_size = 0, means current_empty;
-            // so here could insert directly
-            nullmap_column->insert_many_defaults(max_step);
-        } else {
-            struct_column = assert_cast<ColumnStruct*>(column.get());
-        }
-        if (!struct_column || struct_column->tuple_size() != 2) {
-            throw Exception(ErrorCode::INTERNAL_ERROR,
-                            "only support array column explode to two column, 
but given:  ",
-                            struct_column->tuple_size());
-        }
-        auto& pose_column_nullable = 
assert_cast<ColumnNullable&>(struct_column->get_column(0));
-        
pose_column_nullable.get_null_map_column().insert_many_defaults(max_step);
-        assert_cast<ColumnInt32&>(pose_column_nullable.get_nested_column())
-                .insert_range_of_integer(_cur_offset, _cur_offset + max_step);
-        struct_column->get_column(1).insert_range_from(*_array_data_column, 
pos, max_step);
-    }
-    forward(max_step);
-    return max_step;
-}
-} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/table_function/vposexplode.h 
b/be/src/vec/exprs/table_function/vposexplode.h
deleted file mode 100644
index 4e021fd58da..00000000000
--- a/be/src/vec/exprs/table_function/vposexplode.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "common/status.h"
-#include "vec/columns/column_map.h"
-#include "vec/data_types/data_type.h"
-#include "vec/data_types/data_type_array.h"
-#include "vec/exprs/table_function/table_function.h"
-#include "vec/functions/array/function_array_utils.h"
-
-namespace doris::vectorized {
-
-class VPosExplodeTableFunction : public TableFunction {
-    ENABLE_FACTORY_CREATOR(VPosExplodeTableFunction);
-
-public:
-    VPosExplodeTableFunction();
-
-    ~VPosExplodeTableFunction() override = default;
-
-    Status process_init(Block* block, RuntimeState* state) override;
-    void process_row(size_t row_idx) override;
-    void process_close() override;
-    void get_same_many_values(MutableColumnPtr& column, int length) override;
-    int get_value(MutableColumnPtr& column, int max_step) override;
-
-private:
-    ColumnPtr _collection_column;
-    ColumnPtr _array_data_column;
-    ColumnArrayExecutionData _array_detail;
-    size_t _collection_offset; // start offset of array[row_idx]
-};
-
-} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_fake.cpp 
b/be/src/vec/functions/function_fake.cpp
index 45f59e2621a..db20245683f 100644
--- a/be/src/vec/functions/function_fake.cpp
+++ b/be/src/vec/functions/function_fake.cpp
@@ -116,7 +116,7 @@ struct FunctionPoseExplode {
         DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY)
                 << arguments[0]->get_name() << " not supported";
         DataTypes fieldTypes(2);
-        fieldTypes[0] = make_nullable(std::make_shared<DataTypeInt32>());
+        fieldTypes[0] = std::make_shared<DataTypeInt32>();
         fieldTypes[1] =
                 
check_and_get_data_type<DataTypeArray>(arguments[0].get())->get_nested_type();
         auto struct_type = 
std::make_shared<vectorized::DataTypeStruct>(fieldTypes);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java
index 5f1fdd41537..2e9f784de72 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java
@@ -73,7 +73,7 @@ public class PosExplode extends TableGeneratingFunction 
implements UnaryExpressi
     public List<FunctionSignature> getSignatures() {
         return ImmutableList.of(
                 FunctionSignature.ret(new StructType(ImmutableList.of(
-                        new StructField("pos", IntegerType.INSTANCE, true, ""),
+                        new StructField("pos", IntegerType.INSTANCE, false, 
""),
                         new StructField("col", ((ArrayType) 
child().getDataType()).getItemType(), true, ""))))
                         .args(child().getDataType()));
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java
index 826f969599c..f3979904d93 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java
@@ -21,15 +21,15 @@ import org.apache.doris.catalog.FunctionSignature;
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import org.apache.doris.nereids.trees.expressions.literal.StructLiteral;
 import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.ArrayType;
 import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.StructField;
+import org.apache.doris.nereids.types.StructType;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
 
 import java.util.List;
 
@@ -72,9 +72,9 @@ public class PosExplodeOuter extends TableGeneratingFunction 
implements UnaryExp
     @Override
     public List<FunctionSignature> getSignatures() {
         return ImmutableList.of(
-                FunctionSignature.ret(StructLiteral.constructStructType(
-                        Lists.newArrayList(IntegerType.INSTANCE,
-                                ((ArrayType) 
child().getDataType()).getItemType())))
+                FunctionSignature.ret(new StructType(ImmutableList.of(
+                        new StructField("pos", IntegerType.INSTANCE, false, 
""),
+                        new StructField("col", ((ArrayType) 
child().getDataType()).getItemType(), true, ""))))
                         .args(child().getDataType()));
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to