This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3205ac9544d [opt](function) Merge the implementation of the posexplode
function into the explode function (#54845)
3205ac9544d is described below
commit 3205ac9544d2a4c2227b5c6b10dba0fee7de86c2
Author: Jerry Hu <[email protected]>
AuthorDate: Tue Aug 19 10:10:40 2025 +0800
[opt](function) Merge the implementation of the posexplode function into
the explode function (#54845)
### What problem does this PR solve?
By deleting `VPosExplodeTableFunction`.
Doc: https://github.com/apache/doris-website/pull/2755
---
.../table_function/table_function_factory.cpp | 7 +-
be/src/vec/exprs/table_function/vexplode_v2.cpp | 24 +++-
be/src/vec/exprs/table_function/vexplode_v2.h | 9 +-
be/src/vec/exprs/table_function/vposexplode.cpp | 154 ---------------------
be/src/vec/exprs/table_function/vposexplode.h | 50 -------
be/src/vec/functions/function_fake.cpp | 2 +-
.../functions/generator/PosExplode.java | 2 +-
.../functions/generator/PosExplodeOuter.java | 10 +-
8 files changed, 39 insertions(+), 219 deletions(-)
diff --git a/be/src/vec/exprs/table_function/table_function_factory.cpp
b/be/src/vec/exprs/table_function/table_function_factory.cpp
index 88005cf8b9f..310e034e0c2 100644
--- a/be/src/vec/exprs/table_function/table_function_factory.cpp
+++ b/be/src/vec/exprs/table_function/table_function_factory.cpp
@@ -33,7 +33,6 @@
#include "vec/exprs/table_function/vexplode_numbers.h"
#include "vec/exprs/table_function/vexplode_split.h"
#include "vec/exprs/table_function/vexplode_v2.h"
-#include "vec/exprs/table_function/vposexplode.h"
#include "vec/utils/util.hpp"
namespace doris::vectorized {
@@ -52,7 +51,7 @@ const std::unordered_map<std::string,
std::function<std::unique_ptr<TableFunctio
{"explode_bitmap",
TableFunctionCreator<VExplodeBitmapTableFunction>()},
{"explode_map", TableFunctionCreator<VExplodeMapTableFunction>
{}},
{"explode_json_object",
TableFunctionCreator<VExplodeJsonObjectTableFunction> {}},
- {"posexplode", TableFunctionCreator<VPosExplodeTableFunction>
{}},
+ {"posexplode", TableFunctionCreator<VExplodeV2TableFunction>
{}},
{"explode", TableFunctionCreator<VExplodeV2TableFunction> {}},
{"explode_variant_array_old",
TableFunctionCreator<VExplodeTableFunction>()},
{"explode_old", TableFunctionCreator<VExplodeTableFunction>
{}}};
@@ -84,6 +83,10 @@ Status TableFunctionFactory::get_fn(const TFunction& t_fn,
ObjectPool* pool, Tab
(*fn)->set_outer();
}
+ if (fn_name_real_temp == "posexplode") {
+
static_cast<VExplodeV2TableFunction*>(*fn)->set_generate_row_index(true);
+ }
+
return Status::OK();
}
}
diff --git a/be/src/vec/exprs/table_function/vexplode_v2.cpp
b/be/src/vec/exprs/table_function/vexplode_v2.cpp
index 5918dcd69ab..1a40bc1feda 100644
--- a/be/src/vec/exprs/table_function/vexplode_v2.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_v2.cpp
@@ -19,9 +19,12 @@
#include <glog/logging.h>
+#include <algorithm>
+#include <cstdint>
#include <ostream>
#include "common/status.h"
+#include "runtime/primitive_type.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_nothing.h"
@@ -112,9 +115,7 @@ void VExplodeV2TableFunction::process_row(size_t row_idx) {
_array_offsets[i] = (*detail.offsets_ptr)[row_idx - 1];
// find max size in array
auto cur_size = (*detail.offsets_ptr)[row_idx] - _array_offsets[i];
- if (_cur_size < cur_size) {
- _cur_size = cur_size;
- }
+ _cur_size = std::max<unsigned long>(_cur_size, cur_size);
}
}
_row_idx = row_idx;
@@ -146,11 +147,17 @@ void
VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int
throw Exception(ErrorCode::INTERNAL_ERROR,
"Only multiple columns can be returned within a
struct.");
}
+
+ if (_generate_row_index) {
+ auto& pos_column =
assert_cast<ColumnInt32&>(struct_column->get_column(0));
+ pos_column.insert_many_vals(static_cast<int32_t>(_cur_offset), length);
+ }
+
for (int i = 0; i < _multi_detail.size(); i++) {
auto& detail = _multi_detail[i];
size_t pos = _array_offsets[i] + _cur_offset;
size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
- auto& struct_field = struct_column->get_column(i);
+ auto& struct_field = struct_column->get_column(i +
(_generate_row_index ? 1 : 0));
if ((detail.array_nullmap_data &&
detail.array_nullmap_data[_row_idx])) {
struct_field.insert_many_defaults(length);
} else {
@@ -192,11 +199,18 @@ int VExplodeV2TableFunction::get_value(MutableColumnPtr&
column, int max_step) {
throw Exception(ErrorCode::INTERNAL_ERROR,
"Only multiple columns can be returned within a
struct.");
}
+
+ if (_generate_row_index) {
+ auto& pos_column =
assert_cast<ColumnInt32&>(struct_column->get_column(0));
+
pos_column.insert_range_of_integer(static_cast<int32_t>(_cur_offset),
+
static_cast<int32_t>(_cur_offset + max_step));
+ }
+
for (int i = 0; i < _multi_detail.size(); i++) {
auto& detail = _multi_detail[i];
size_t pos = _array_offsets[i] + _cur_offset;
size_t element_size =
_multi_detail[i].array_col->size_at(_row_idx);
- auto& struct_field = struct_column->get_column(i);
+ auto& struct_field = struct_column->get_column(i +
(_generate_row_index ? 1 : 0));
if (detail.array_nullmap_data &&
detail.array_nullmap_data[_row_idx]) {
struct_field.insert_many_defaults(max_step);
} else {
diff --git a/be/src/vec/exprs/table_function/vexplode_v2.h
b/be/src/vec/exprs/table_function/vexplode_v2.h
index 20853f1ad18..f4b6bddbf42 100644
--- a/be/src/vec/exprs/table_function/vexplode_v2.h
+++ b/be/src/vec/exprs/table_function/vexplode_v2.h
@@ -46,12 +46,19 @@ public:
void get_same_many_values(MutableColumnPtr& column, int length) override;
int get_value(MutableColumnPtr& column, int max_step) override;
+ void set_generate_row_index(bool generate_row_index) {
+ _generate_row_index = generate_row_index;
+ }
+
private:
Status _process_init_variant(Block* block, int value_column_idx, int
children_column_idx);
std::vector<ColumnPtr> _array_columns;
- size_t _row_idx;
+ size_t _row_idx {0};
ColumnArrayExecutionDatas _multi_detail;
std::vector<size_t> _array_offsets;
+
+ // `posexplode` & `posexplode_outer`
+ bool _generate_row_index {false};
};
#include "common/compile_check_end.h"
diff --git a/be/src/vec/exprs/table_function/vposexplode.cpp
b/be/src/vec/exprs/table_function/vposexplode.cpp
deleted file mode 100644
index 53a95602737..00000000000
--- a/be/src/vec/exprs/table_function/vposexplode.cpp
+++ /dev/null
@@ -1,154 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "vec/exprs/table_function/vposexplode.h"
-
-#include <glog/logging.h>
-
-#include <ostream>
-#include <vector>
-
-#include "common/status.h"
-#include "vec/columns/column.h"
-#include "vec/columns/column_nullable.h"
-#include "vec/common/assert_cast.h"
-#include "vec/common/string_ref.h"
-#include "vec/core/block.h"
-#include "vec/core/column_with_type_and_name.h"
-#include "vec/exprs/vexpr.h"
-#include "vec/exprs/vexpr_context.h"
-
-namespace doris::vectorized {
-
-VPosExplodeTableFunction::VPosExplodeTableFunction() {
- _fn_name = "posexplode";
-}
-
-Status VPosExplodeTableFunction::process_init(Block* block, RuntimeState*
state) {
- CHECK(_expr_context->root()->children().size() == 1)
- << "VPosExplodeTableFunction only support 1 child but has "
- << _expr_context->root()->children().size();
-
- int value_column_idx = -1;
-
RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(),
block,
-
&value_column_idx));
-
- _collection_column =
-
block->get_by_position(value_column_idx).column->convert_to_full_column_if_const();
-
- if (!extract_column_array_info(*_collection_column, _array_detail)) {
- return Status::NotSupported("column type {} not supported now, only
support array",
-
block->get_by_position(value_column_idx).column->get_name());
- }
- if (is_column_nullable(*_collection_column)) {
- _array_data_column =
- assert_cast<const ColumnArray&>(
- assert_cast<const
ColumnNullable&>(*_collection_column).get_nested_column())
- .get_data_ptr();
- } else {
- _array_data_column = assert_cast<const
ColumnArray&>(*_collection_column).get_data_ptr();
- }
- return Status::OK();
-}
-
-void VPosExplodeTableFunction::process_row(size_t row_idx) {
- DCHECK(row_idx < _collection_column->size());
- TableFunction::process_row(row_idx);
-
- if (!_array_detail.array_nullmap_data ||
!_array_detail.array_nullmap_data[row_idx]) {
- _collection_offset = (*_array_detail.offsets_ptr)[row_idx - 1];
- _cur_size = (*_array_detail.offsets_ptr)[row_idx] - _collection_offset;
- }
-}
-
-void VPosExplodeTableFunction::process_close() {
- _collection_column = nullptr;
- _array_data_column = nullptr;
- _array_detail.reset();
- _collection_offset = 0;
-}
-
-void VPosExplodeTableFunction::get_same_many_values(MutableColumnPtr& column,
int length) {
- // now we only support array column explode to struct column
- size_t pos = _collection_offset + _cur_offset;
- // if current is empty array row, also append a default value
- if (current_empty()) {
- column->insert_many_defaults(length);
- return;
- }
- ColumnStruct* ret = nullptr;
- // this _is_nullable is whole output column's nullable
- if (_is_nullable) {
- ret = assert_cast<ColumnStruct*>(
-
assert_cast<ColumnNullable*>(column.get())->get_nested_column_ptr().get());
- assert_cast<ColumnUInt8*>(
-
assert_cast<ColumnNullable*>(column.get())->get_null_map_column_ptr().get())
- ->insert_many_defaults(length);
- } else if (is_column<ColumnStruct>(column.get())) {
- ret = assert_cast<ColumnStruct*>(column.get());
- } else {
- throw Exception(ErrorCode::INTERNAL_ERROR,
- "only support array column explode to struct column");
- }
- if (!ret || ret->tuple_size() != 2) {
- throw Exception(
- ErrorCode::INTERNAL_ERROR,
- "only support array column explode to two column, but given:
", ret->tuple_size());
- }
- auto& pose_column_nullable =
assert_cast<ColumnNullable&>(ret->get_column(0));
- pose_column_nullable.get_null_map_column().insert_many_defaults(length);
- assert_cast<ColumnInt32&>(pose_column_nullable.get_nested_column())
- .insert_many_vals(_cur_offset, length);
- ret->get_column(1).insert_many_from(*_array_data_column, pos, length);
-}
-
-int VPosExplodeTableFunction::get_value(MutableColumnPtr& column, int
max_step) {
- max_step = std::min(max_step, (int)(_cur_size - _cur_offset));
- size_t pos = _collection_offset + _cur_offset;
- if (current_empty()) {
- column->insert_default();
- max_step = 1;
- } else {
- ColumnStruct* struct_column = nullptr;
- if (_is_nullable) {
- auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
- struct_column =
-
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
- auto* nullmap_column =
-
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
- // here nullmap_column insert max_step many defaults as if
array[row_idx] is NULL
- // will be not update value, _cur_size = 0, means current_empty;
- // so here could insert directly
- nullmap_column->insert_many_defaults(max_step);
- } else {
- struct_column = assert_cast<ColumnStruct*>(column.get());
- }
- if (!struct_column || struct_column->tuple_size() != 2) {
- throw Exception(ErrorCode::INTERNAL_ERROR,
- "only support array column explode to two column,
but given: ",
- struct_column->tuple_size());
- }
- auto& pose_column_nullable =
assert_cast<ColumnNullable&>(struct_column->get_column(0));
-
pose_column_nullable.get_null_map_column().insert_many_defaults(max_step);
- assert_cast<ColumnInt32&>(pose_column_nullable.get_nested_column())
- .insert_range_of_integer(_cur_offset, _cur_offset + max_step);
- struct_column->get_column(1).insert_range_from(*_array_data_column,
pos, max_step);
- }
- forward(max_step);
- return max_step;
-}
-} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/table_function/vposexplode.h
b/be/src/vec/exprs/table_function/vposexplode.h
deleted file mode 100644
index 4e021fd58da..00000000000
--- a/be/src/vec/exprs/table_function/vposexplode.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "common/status.h"
-#include "vec/columns/column_map.h"
-#include "vec/data_types/data_type.h"
-#include "vec/data_types/data_type_array.h"
-#include "vec/exprs/table_function/table_function.h"
-#include "vec/functions/array/function_array_utils.h"
-
-namespace doris::vectorized {
-
-class VPosExplodeTableFunction : public TableFunction {
- ENABLE_FACTORY_CREATOR(VPosExplodeTableFunction);
-
-public:
- VPosExplodeTableFunction();
-
- ~VPosExplodeTableFunction() override = default;
-
- Status process_init(Block* block, RuntimeState* state) override;
- void process_row(size_t row_idx) override;
- void process_close() override;
- void get_same_many_values(MutableColumnPtr& column, int length) override;
- int get_value(MutableColumnPtr& column, int max_step) override;
-
-private:
- ColumnPtr _collection_column;
- ColumnPtr _array_data_column;
- ColumnArrayExecutionData _array_detail;
- size_t _collection_offset; // start offset of array[row_idx]
-};
-
-} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_fake.cpp
b/be/src/vec/functions/function_fake.cpp
index 45f59e2621a..db20245683f 100644
--- a/be/src/vec/functions/function_fake.cpp
+++ b/be/src/vec/functions/function_fake.cpp
@@ -116,7 +116,7 @@ struct FunctionPoseExplode {
DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY)
<< arguments[0]->get_name() << " not supported";
DataTypes fieldTypes(2);
- fieldTypes[0] = make_nullable(std::make_shared<DataTypeInt32>());
+ fieldTypes[0] = std::make_shared<DataTypeInt32>();
fieldTypes[1] =
check_and_get_data_type<DataTypeArray>(arguments[0].get())->get_nested_type();
auto struct_type =
std::make_shared<vectorized::DataTypeStruct>(fieldTypes);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java
index 5f1fdd41537..2e9f784de72 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java
@@ -73,7 +73,7 @@ public class PosExplode extends TableGeneratingFunction
implements UnaryExpressi
public List<FunctionSignature> getSignatures() {
return ImmutableList.of(
FunctionSignature.ret(new StructType(ImmutableList.of(
- new StructField("pos", IntegerType.INSTANCE, true, ""),
+ new StructField("pos", IntegerType.INSTANCE, false,
""),
new StructField("col", ((ArrayType)
child().getDataType()).getItemType(), true, ""))))
.args(child().getDataType()));
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java
index 826f969599c..f3979904d93 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java
@@ -21,15 +21,15 @@ import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import org.apache.doris.nereids.trees.expressions.literal.StructLiteral;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.StructField;
+import org.apache.doris.nereids.types.StructType;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
import java.util.List;
@@ -72,9 +72,9 @@ public class PosExplodeOuter extends TableGeneratingFunction
implements UnaryExp
@Override
public List<FunctionSignature> getSignatures() {
return ImmutableList.of(
- FunctionSignature.ret(StructLiteral.constructStructType(
- Lists.newArrayList(IntegerType.INSTANCE,
- ((ArrayType)
child().getDataType()).getItemType())))
+ FunctionSignature.ret(new StructType(ImmutableList.of(
+ new StructField("pos", IntegerType.INSTANCE, false,
""),
+ new StructField("col", ((ArrayType)
child().getDataType()).getItemType(), true, ""))))
.args(child().getDataType()));
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]