This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 09d19e3f0f [feature-wip](array-type) explode support more sub types (#10673) 09d19e3f0f is described below commit 09d19e3f0fa4ed7a3260a03919696b5e34b035d8 Author: camby <104178...@qq.com> AuthorDate: Sun Jul 17 18:08:30 2022 +0800 [feature-wip](array-type) explode support more sub types (#10673) 1. explode support more sub types; 2. explode support nullable elements; Co-authored-by: cambyzju <zhuxiaol...@baidu.com> --- be/src/vec/exprs/table_function/vexplode.cpp | 54 ++++++------- be/src/vec/exprs/table_function/vexplode.h | 7 +- be/src/vec/functions/array/function_array_utils.h | 9 +++ be/test/vec/function/table_function_test.cpp | 36 ++++++++- .../sql-functions/table-functions/explode.md | 87 +++++++++++++++++++++ .../sql-functions/table-functions/explode.md | 88 ++++++++++++++++++++++ .../java/org/apache/doris/catalog/FunctionSet.java | 11 ++- .../main/java/org/apache/doris/catalog/Type.java | 21 ++++++ .../query/sql_functions/table_function/explode.out | 31 ++++++++ .../sql_functions/table_function/explode.groovy | 37 +++++++++ 10 files changed, 341 insertions(+), 40 deletions(-) diff --git a/be/src/vec/exprs/table_function/vexplode.cpp b/be/src/vec/exprs/table_function/vexplode.cpp index 2669213119..ba33a680cc 100644 --- a/be/src/vec/exprs/table_function/vexplode.cpp +++ b/be/src/vec/exprs/table_function/vexplode.cpp @@ -27,24 +27,16 @@ VExplodeTableFunction::VExplodeTableFunction() { Status VExplodeTableFunction::process_init(vectorized::Block* block) { CHECK(_vexpr_context->root()->children().size() == 1) - << "VExplodeTableFunction must be have 1 children but have " + << "VExplodeTableFunction only support 1 child but has " << _vexpr_context->root()->children().size(); int value_column_idx = -1; _vexpr_context->root()->children()[0]->execute(_vexpr_context, block, &value_column_idx); - if (block->get_by_position(value_column_idx).column->is_nullable()) { - auto array_nullable_column = check_and_get_column<ColumnNullable>( - *block->get_by_position(value_column_idx).column); - _array_null_map = array_nullable_column->get_null_map_column().get_data().data(); - _array_column = - check_and_get_column<ColumnArray>(array_nullable_column->get_nested_column_ptr()); - } else { - _array_null_map = nullptr; - _array_column = - check_and_get_column<ColumnArray>(*block->get_by_position(value_column_idx).column); - } - if (!_array_column) { + _array_column = + block->get_by_position(value_column_idx).column->convert_to_full_column_if_const(); + + if (!extract_column_array_info(*_array_column, _detail)) { return Status::NotSupported("column type {} not supported now", block->get_by_position(value_column_idx).column->get_name()); } @@ -56,26 +48,22 @@ Status VExplodeTableFunction::process_row(size_t row_idx) { DCHECK(row_idx < _array_column->size()); _is_current_empty = false; _eos = false; + _cur_offset = 0; + _array_offset = (*_detail.offsets_ptr)[row_idx - 1]; + _cur_size = (*_detail.offsets_ptr)[row_idx] - _array_offset; - if (_array_null_map && _array_null_map[row_idx]) { + // array is NULL, or array is empty + if (_cur_size == 0 || (_detail.array_nullmap_data && _detail.array_nullmap_data[row_idx])) { _is_current_empty = true; - _cur_size = 0; - _cur_offset = 0; - _pos = 0; - } else { - _cur_size = - _array_column->get_offsets()[row_idx] - _array_column->get_offsets()[row_idx - 1]; - _cur_offset = 0; - _is_current_empty = (_cur_size == 0); - _pos = _array_column->get_offsets()[row_idx - 1]; } + return Status::OK(); } Status VExplodeTableFunction::process_close() { _array_column = nullptr; - _array_null_map = nullptr; - _pos = 0; + _detail.reset(); + _array_offset = 0; return Status::OK(); } @@ -91,7 +79,13 @@ Status VExplodeTableFunction::get_value(void** output) { return Status::OK(); } - *output = const_cast<char*>(_array_column->get_data().get_data_at(_pos + _cur_offset).data); + size_t pos = _array_offset + _cur_offset; + if (_detail.nested_nullmap_data && _detail.nested_nullmap_data[pos]) { + *output = nullptr; + } else { + *output = const_cast<char*>(_detail.nested_col->get_data_at(pos).data); + } + return Status::OK(); } @@ -101,7 +95,13 @@ Status VExplodeTableFunction::get_value_length(int64_t* length) { return Status::OK(); } - *length = _array_column->get_data().get_data_at(_pos + _cur_offset).size; + size_t pos = _array_offset + _cur_offset; + if (_detail.nested_nullmap_data && _detail.nested_nullmap_data[pos]) { + *length = 0; + } else { + *length = _detail.nested_col->get_data_at(pos).size; + } + return Status::OK(); } diff --git a/be/src/vec/exprs/table_function/vexplode.h b/be/src/vec/exprs/table_function/vexplode.h index da4909994b..b82f9be2e8 100644 --- a/be/src/vec/exprs/table_function/vexplode.h +++ b/be/src/vec/exprs/table_function/vexplode.h @@ -22,6 +22,7 @@ #include "vec/columns/column_array.h" #include "vec/columns/column_nullable.h" #include "vec/common/string_ref.h" +#include "vec/functions/array/function_array_utils.h" namespace doris::vectorized { @@ -39,9 +40,9 @@ public: virtual Status get_value_length(int64_t* length) override; private: - const UInt8* _array_null_map; - const ColumnArray* _array_column; - size_t _pos; + ColumnPtr _array_column; + ColumnArrayExecutionData _detail; + size_t _array_offset; // start offset of array[row_idx] }; } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_utils.h b/be/src/vec/functions/array/function_array_utils.h index e7173489e0..c0f8aca8f9 100644 --- a/be/src/vec/functions/array/function_array_utils.h +++ b/be/src/vec/functions/array/function_array_utils.h @@ -22,6 +22,15 @@ namespace doris::vectorized { struct ColumnArrayExecutionData { +public: + void reset() { + array_nullmap_data = nullptr; + array_col = nullptr; + offsets_ptr = nullptr; + nested_nullmap_data = nullptr; + nested_col = nullptr; + } + public: const UInt8* array_nullmap_data = nullptr; const ColumnArray* array_col = nullptr; diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp index 19e53dc8a4..b83c019e32 100644 --- a/be/test/vec/function/table_function_test.cpp +++ b/be/test/vec/function/table_function_test.cpp @@ -71,11 +71,12 @@ TEST_F(TableFunctionTest, vexplode_outer) { // explode_outer(Array<Int32>) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32}; - Array vec = {Int32(1), Int32(2), Int32(3)}; + Array vec = {Int32(1), Null(), Int32(2), Int32(3)}; InputDataSet input_set = {{vec}, {Null()}, {Array()}}; InputTypeSet output_types = {TypeIndex::Int32}; - InputDataSet output_set = {{Int32(1)}, {Int32(2)}, {Int32(3)}, {Null()}, {Null()}}; + InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)}, + {Int32(3)}, {Null()}, {Null()}}; check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); } @@ -92,6 +93,21 @@ TEST_F(TableFunctionTest, vexplode_outer) { check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); } + + // explode_outer(Array<Decimal>) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128}; + Array vec = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67)}; + InputDataSet input_set = {{Null()}, {Array()}, {vec}}; + + InputTypeSet output_types = {TypeIndex::Decimal128}; + InputDataSet output_set = {{Null()}, + {Null()}, + {ut_type::DECIMAL(17014116.67)}, + {ut_type::DECIMAL(-17014116.67)}}; + + check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); + } } TEST_F(TableFunctionTest, vexplode) { @@ -103,11 +119,11 @@ TEST_F(TableFunctionTest, vexplode) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32}; - Array vec = {Int32(1), Int32(2), Int32(3)}; + Array vec = {Int32(1), Null(), Int32(2), Int32(3)}; InputDataSet input_set = {{vec}, {Null()}, {Array()}}; InputTypeSet output_types = {TypeIndex::Int32}; - InputDataSet output_set = {{Int32(1)}, {Int32(2)}, {Int32(3)}}; + InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)}, {Int32(3)}}; check_vec_table_function(&explode, input_types, input_set, output_types, output_set); } @@ -123,6 +139,18 @@ TEST_F(TableFunctionTest, vexplode) { check_vec_table_function(&explode, input_types, input_set, output_types, output_set); } + + // explode(Array<Date>) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date}; + Array vec = {Null(), str_to_date_time("2022-01-02", false)}; + InputDataSet input_set = {{Null()}, {Array()}, {vec}}; + + InputTypeSet output_types = {TypeIndex::Date}; + InputDataSet output_set = {{Null()}, {std::string("2022-01-02")}}; + + check_vec_table_function(&explode, input_types, input_set, output_types, output_set); + } } TEST_F(TableFunctionTest, vexplode_numbers) { diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/explode.md b/docs/en/docs/sql-manual/sql-functions/table-functions/explode.md new file mode 100644 index 0000000000..128076d4f2 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/table-functions/explode.md @@ -0,0 +1,87 @@ +--- +{ + "title": "explode", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## explode + +### description + +Table functions must be used in conjunction with Lateral View. + +explode array column to rows. `explode_outer` will return NULL, while `array` is NULL or empty. +`explode` and `explode_outer` both keep the nested NULL elements of array. + +grammar: + +``` +explode(expr) +explode_outer(expr) +``` + +### example +``` +mysql> set enable_vectorized_engine = true +mysql> set enable_array_type = true + +mysql> select e1 from (select 1 k1) as t lateral view explode([1,2,3]) tmp1 as e1; ++------+ +| e1 | ++------+ +| 1 | +| 2 | +| 3 | ++------+ + +mysql> select e1 from (select 1 k1) as t lateral view explode_outer(null) tmp1 as e1; ++------+ +| e1 | ++------+ +| NULL | ++------+ + +mysql> select e1 from (select 1 k1) as t lateral view explode([]) tmp1 as e1; +Empty set (0.010 sec) + +mysql> select e1 from (select 1 k1) as t lateral view explode([null,1,null]) tmp1 as e1; ++------+ +| e1 | ++------+ +| NULL | +| 1 | +| NULL | ++------+ + +mysql> select e1 from (select 1 k1) as t lateral view explode_outer([null,1,null]) tmp1 as e1; ++------+ +| e1 | ++------+ +| NULL | +| 1 | +| NULL | ++------+ +``` + +### keywords +EXPLODE,EXPLODE_OUTER,ARRAY \ No newline at end of file diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/explode.md b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/explode.md new file mode 100644 index 0000000000..e5da64bd4c --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/explode.md @@ -0,0 +1,88 @@ +--- +{ + "title": "explode", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## explode + +### description + +表函数,需配合 Lateral View 使用。 + +将 array 列展开成多行。当 array 为NULL或者为空时,`explode_outer` 返回NULL。 +`explode` 和 `explode_outer` 均会返回 array 内部的NULL元素。 + +语法: + +``` +explode(expr) +explode_outer(expr) +``` + +### example + +``` +mysql> set enable_vectorized_engine = true +mysql> set enable_array_type = true + +mysql> select e1 from (select 1 k1) as t lateral view explode([1,2,3]) tmp1 as e1; ++------+ +| e1 | ++------+ +| 1 | +| 2 | +| 3 | ++------+ + +mysql> select e1 from (select 1 k1) as t lateral view explode_outer(null) tmp1 as e1; ++------+ +| e1 | ++------+ +| NULL | ++------+ + +mysql> select e1 from (select 1 k1) as t lateral view explode([]) tmp1 as e1; +Empty set (0.010 sec) + +mysql> select e1 from (select 1 k1) as t lateral view explode([null,1,null]) tmp1 as e1; ++------+ +| e1 | ++------+ +| NULL | +| 1 | +| NULL | ++------+ + +mysql> select e1 from (select 1 k1) as t lateral view explode_outer([null,1,null]) tmp1 as e1; ++------+ +| e1 | ++------+ +| NULL | +| 1 | +| NULL | ++------+ +``` + +### keywords +EXPLODE,EXPLODE_OUTER,ARRAY diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index ed895641bb..ffdc4f6131 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -2734,11 +2734,10 @@ public class FunctionSet<T> { "_ZN5doris19DummyTableFunctions22explode_numbersEPN9doris_udf15FunctionContextERKNS1_9IntValE"); initTableFunctionListWithCombinator(EXPLODE); - addTableFunctionWithCombinator(EXPLODE, Type.INT, Function.NullableMode.ALWAYS_NULLABLE, - Lists.newArrayList(new ArrayType(Type.INT)), false, - "_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE"); - addTableFunctionWithCombinator(EXPLODE, Type.VARCHAR, Function.NullableMode.ALWAYS_NULLABLE, - Lists.newArrayList(new ArrayType(Type.VARCHAR)), false, - "_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE"); + for (Type subType : Type.getArraySubTypes()) { + addTableFunctionWithCombinator(EXPLODE, subType, Function.NullableMode.ALWAYS_NULLABLE, + Lists.newArrayList(new ArrayType(subType)), false, + "_ZN5doris19DummyTableFunctions7explodeEPN9doris_udf15FunctionContextERKNS1_13CollectionValE"); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index 4e97454fe6..2709ca09f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -108,6 +108,7 @@ public abstract class Type { private static final ArrayList<ScalarType> integerTypes; private static final ArrayList<ScalarType> numericTypes; private static final ArrayList<ScalarType> supportedTypes; + private static final ArrayList<Type> arraySubTypes; static { integerTypes = Lists.newArrayList(); @@ -156,6 +157,22 @@ public abstract class Type { supportedTypes.add(TIME); supportedTypes.add(TIMEV2); supportedTypes.add(STRING); + + arraySubTypes = Lists.newArrayList(); + arraySubTypes.add(BOOLEAN); + arraySubTypes.add(TINYINT); + arraySubTypes.add(SMALLINT); + arraySubTypes.add(INT); + arraySubTypes.add(BIGINT); + arraySubTypes.add(LARGEINT); + arraySubTypes.add(FLOAT); + arraySubTypes.add(DOUBLE); + arraySubTypes.add(DECIMALV2); + arraySubTypes.add(DATE); + arraySubTypes.add(DATETIME); + arraySubTypes.add(CHAR); + arraySubTypes.add(VARCHAR); + arraySubTypes.add(STRING); } public static ArrayList<ScalarType> getIntegerTypes() { @@ -170,6 +187,10 @@ public abstract class Type { return supportedTypes; } + public static ArrayList<Type> getArraySubTypes() { + return arraySubTypes; + } + /** * The output of this is stored directly in the hive metastore as the column type. * The string must match exactly. diff --git a/regression-test/data/query/sql_functions/table_function/explode.out b/regression-test/data/query/sql_functions/table_function/explode.out new file mode 100644 index 0000000000..55bd9056c6 --- /dev/null +++ b/regression-test/data/query/sql_functions/table_function/explode.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !explode -- +1 +2 +3 + +-- !explode_outer -- +1 +2 +3 + +-- !explode -- + +-- !explode_outer -- +\N + +-- !explode -- + +-- !explode_outer -- +\N + +-- !explode -- +\N +1 +\N + +-- !explode_outer -- +\N +1 +\N + diff --git a/regression-test/suites/query/sql_functions/table_function/explode.groovy b/regression-test/suites/query/sql_functions/table_function/explode.groovy new file mode 100644 index 0000000000..7b46193c4b --- /dev/null +++ b/regression-test/suites/query/sql_functions/table_function/explode.groovy @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("explode") { + // vectorized + sql """ set enable_vectorized_engine = true """ + sql """ set enable_array_type = true """ + + qt_explode """ select e1 from (select 1 k1) as t lateral view explode([1,2,3]) tmp1 as e1; """ + qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer([1,2,3]) tmp1 as e1; """ + + // array is null + qt_explode """ select e1 from (select 1 k1) as t lateral view explode(null) tmp1 as e1; """ + qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer(null) tmp1 as e1; """ + + // array is empty + qt_explode """ select e1 from (select 1 k1) as t lateral view explode([]) tmp1 as e1; """ + qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer([]) tmp1 as e1; """ + + // array with null elements + qt_explode """ select e1 from (select 1 k1) as t lateral view explode([null,1,null]) tmp1 as e1; """ + qt_explode_outer """ select e1 from (select 1 k1) as t lateral view explode_outer([null,1,null]) tmp1 as e1; """ +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org