This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new ec6620ae3e [feature-wip](array-type) add function arrays_overlap (#10233) ec6620ae3e is described below commit ec6620ae3e190774dfeb34a5041ff81ab8ba4654 Author: camby <104178...@qq.com> AuthorDate: Thu Jun 30 08:12:29 2022 +0800 [feature-wip](array-type) add function arrays_overlap (#10233) --- be/src/vec/CMakeLists.txt | 2 + be/src/vec/common/aggregation_common.h | 3 - be/src/vec/common/hash_table/hash.h | 3 + .../functions/array/function_array_register.cpp | 2 + .../vec/functions/array/function_array_utils.cpp | 51 +++++ ...n_array_register.cpp => function_array_utils.h} | 26 +-- ...ay_register.cpp => function_arrays_overlap.cpp} | 17 +- .../vec/functions/array/function_arrays_overlap.h | 248 +++++++++++++++++++++ be/src/vec/io/io_helper.h | 1 + be/test/CMakeLists.txt | 1 + .../vec/function/function_arrays_overlap_test.cpp | 138 ++++++++++++ .../array-functions/arrays_overlap.md | 66 ++++++ .../array-functions/arrays_overlap.md | 66 ++++++ gensrc/script/doris_builtins_functions.py | 13 ++ .../array_functions/test_array_functions.out | 5 + .../array_functions/test_array_functions.groovy | 10 +- 16 files changed, 618 insertions(+), 34 deletions(-) diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index b32142ee80..c51f670ab6 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -134,6 +134,8 @@ set(VEC_FILES functions/array/function_array_register.cpp functions/array/function_array_size.cpp functions/array/function_array_aggregation.cpp + functions/array/function_array_utils.cpp + functions/array/function_arrays_overlap.cpp exprs/table_function/vexplode_json_array.cpp functions/math.cpp functions/function_bitmap.cpp diff --git a/be/src/vec/common/aggregation_common.h b/be/src/vec/common/aggregation_common.h index d6a4d30df5..ee3ef5ce5b 100644 --- a/be/src/vec/common/aggregation_common.h +++ b/be/src/vec/common/aggregation_common.h @@ -32,9 +32,6 @@ #include "vec/common/string_ref.h" #include "vec/common/uint128.h" -template <> -struct DefaultHash<StringRef> : public StringRefHash {}; - namespace doris::vectorized { using Sizes = std::vector<size_t>; diff --git a/be/src/vec/common/hash_table/hash.h b/be/src/vec/common/hash_table/hash.h index 3bb1d94269..fb47657809 100644 --- a/be/src/vec/common/hash_table/hash.h +++ b/be/src/vec/common/hash_table/hash.h @@ -91,6 +91,9 @@ struct DefaultHash<T, std::enable_if_t<std::is_arithmetic_v<T>>> { size_t operator()(T key) const { return default_hash64<T>(key); } }; +template <> +struct DefaultHash<StringRef> : public StringRefHash {}; + template <typename T> struct HashCRC32; diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp index 833bc9fbd8..cb5a091c91 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -26,12 +26,14 @@ void register_function_array_element(SimpleFunctionFactory&); void register_function_array_index(SimpleFunctionFactory&); void register_function_array_size(SimpleFunctionFactory&); void register_function_array_aggregation(SimpleFunctionFactory&); +void register_function_arrays_overlap(SimpleFunctionFactory&); void register_function_array(SimpleFunctionFactory& factory) { register_function_array_element(factory); register_function_array_index(factory); register_function_array_size(factory); register_function_array_aggregation(factory); + register_function_arrays_overlap(factory); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_utils.cpp b/be/src/vec/functions/array/function_array_utils.cpp new file mode 100644 index 0000000000..582bb02c33 --- /dev/null +++ b/be/src/vec/functions/array/function_array_utils.cpp @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/functions/array/function_array_utils.h" + +#include "vec/columns/column_nullable.h" + +namespace doris::vectorized { + +bool extract_column_array_info(const IColumn& src, ColumnArrayExecutionData& data) { + const IColumn* array_col = &src; + // extract array nullable info + if (src.is_nullable()) { + const auto& null_col = reinterpret_cast<const ColumnNullable&>(src); + data.array_nullmap_data = null_col.get_null_map_data().data(); + array_col = null_col.get_nested_column_ptr().get(); + } + + // check and get array column + data.array_col = check_and_get_column<ColumnArray>(array_col); + if (!data.array_col) { + return false; + } + + // extract array offsets and nested column + data.offsets_ptr = &data.array_col->get_offsets(); + data.nested_col = &data.array_col->get_data(); + // extract nested column is nullable + if (data.nested_col->is_nullable()) { + const auto& nested_null_col = reinterpret_cast<const ColumnNullable&>(*data.nested_col); + data.nested_nullmap_data = nested_null_col.get_null_map_data().data(); + data.nested_col = nested_null_col.get_nested_column_ptr().get(); + } + return true; +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_utils.h similarity index 55% copy from be/src/vec/functions/array/function_array_register.cpp copy to be/src/vec/functions/array/function_array_utils.h index 833bc9fbd8..e7173489e0 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_utils.h @@ -14,24 +14,22 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/registerFunctionsArray.cpp -// and modified by Doris +#pragma once -#include "vec/functions/simple_function_factory.h" +#include "vec/columns/column_array.h" +#include "vec/data_types/data_type_array.h" namespace doris::vectorized { -void register_function_array_element(SimpleFunctionFactory&); -void register_function_array_index(SimpleFunctionFactory&); -void register_function_array_size(SimpleFunctionFactory&); -void register_function_array_aggregation(SimpleFunctionFactory&); +struct ColumnArrayExecutionData { +public: + const UInt8* array_nullmap_data = nullptr; + const ColumnArray* array_col = nullptr; + const ColumnArray::Offsets* offsets_ptr = nullptr; + const UInt8* nested_nullmap_data = nullptr; + const IColumn* nested_col = nullptr; +}; -void register_function_array(SimpleFunctionFactory& factory) { - register_function_array_element(factory); - register_function_array_index(factory); - register_function_array_size(factory); - register_function_array_aggregation(factory); -} +bool extract_column_array_info(const IColumn& src, ColumnArrayExecutionData& data); } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_arrays_overlap.cpp similarity index 58% copy from be/src/vec/functions/array/function_array_register.cpp copy to be/src/vec/functions/array/function_arrays_overlap.cpp index 833bc9fbd8..e4e54e9135 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_arrays_overlap.cpp @@ -14,24 +14,15 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/registerFunctionsArray.cpp -// and modified by Doris + +#include "vec/functions/array/function_arrays_overlap.h" #include "vec/functions/simple_function_factory.h" namespace doris::vectorized { -void register_function_array_element(SimpleFunctionFactory&); -void register_function_array_index(SimpleFunctionFactory&); -void register_function_array_size(SimpleFunctionFactory&); -void register_function_array_aggregation(SimpleFunctionFactory&); - -void register_function_array(SimpleFunctionFactory& factory) { - register_function_array_element(factory); - register_function_array_index(factory); - register_function_array_size(factory); - register_function_array_aggregation(factory); +void register_function_arrays_overlap(SimpleFunctionFactory& factory) { + factory.register_function<FunctionArraysOverlap>(); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_arrays_overlap.h b/be/src/vec/functions/array/function_arrays_overlap.h new file mode 100644 index 0000000000..b1e10449fa --- /dev/null +++ b/be/src/vec/functions/array/function_arrays_overlap.h @@ -0,0 +1,248 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include <string_view> + +#include "vec/columns/column_array.h" +#include "vec/columns/column_string.h" +#include "vec/common/hash_table/hash_set.h" +#include "vec/common/string_ref.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_number.h" +#include "vec/functions/array/function_array_utils.h" +#include "vec/functions/function.h" + +namespace doris::vectorized { + +template <typename T> +struct OverlapSetImpl { + using ElementNativeType = typename NativeType<typename T::value_type>::Type; + using Set = HashSetWithStackMemory<ElementNativeType, DefaultHash<ElementNativeType>, 4>; + Set set; + void insert_array(const IColumn* column, size_t start, size_t size) { + const auto& vec = assert_cast<const T&>(*column).get_data(); + for (size_t i = start; i < start + size; ++i) { + set.insert(vec[i]); + } + } + bool find_any(const IColumn* column, size_t start, size_t size) { + const auto& vec = assert_cast<const T&>(*column).get_data(); + for (size_t i = start; i < start + size; ++i) { + if (set.find(vec[i])) { + return true; + } + } + return false; + } +}; + +template <> +struct OverlapSetImpl<ColumnString> { + using Set = HashSetWithStackMemory<StringRef, DefaultHash<StringRef>, 4>; + Set set; + void insert_array(const IColumn* column, size_t start, size_t size) { + for (size_t i = start; i < start + size; ++i) { + set.insert(column->get_data_at(i)); + } + } + bool find_any(const IColumn* column, size_t start, size_t size) { + for (size_t i = start; i < start + size; ++i) { + if (set.find(column->get_data_at(i))) { + return true; + } + } + return false; + } +}; + +class FunctionArraysOverlap : public IFunction { +public: + static constexpr auto name = "arrays_overlap"; + static FunctionPtr create() { return std::make_shared<FunctionArraysOverlap>(); } + + /// Get function name. + String get_name() const override { return name; } + + bool use_default_implementation_for_nulls() const override { return false; } + + bool is_variadic() const override { return false; } + + size_t get_number_of_arguments() const override { return 2; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + auto left_data_type = remove_nullable(arguments[0]); + auto right_data_type = remove_nullable(arguments[1]); + DCHECK(is_array(left_data_type)) << arguments[0]->get_name(); + DCHECK(is_array(right_data_type)) << arguments[1]->get_name(); + DCHECK(left_data_type->equals(*right_data_type)) + << "data type " << arguments[0]->get_name() << " not equal with " + << arguments[1]->get_name(); + return make_nullable(std::make_shared<DataTypeUInt8>()); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto left_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + auto right_column = + block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + ColumnArrayExecutionData left_exec_data; + ColumnArrayExecutionData right_exec_data; + + Status ret = Status::RuntimeError( + fmt::format("execute failed, unsupported types for function {}({}, {})", get_name(), + block.get_by_position(arguments[0]).type->get_name(), + block.get_by_position(arguments[1]).type->get_name())); + + // extract array column + if (!extract_column_array_info(*left_column, left_exec_data) || + !extract_column_array_info(*right_column, right_exec_data)) { + return ret; + } + + // prepare return column + auto dst_nested_col = ColumnVector<UInt8>::create(input_rows_count, 0); + auto dst_null_map = ColumnVector<UInt8>::create(input_rows_count, 0); + UInt8* dst_null_map_data = dst_null_map->get_data().data(); + + // any array is null or any elements in array is null, return null + RETURN_IF_ERROR(_execute_nullable(left_exec_data, dst_null_map_data)); + RETURN_IF_ERROR(_execute_nullable(right_exec_data, dst_null_map_data)); + + // execute overlap check + if (left_exec_data.nested_col->is_column_string()) { + ret = _execute_internal<ColumnString>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (left_exec_data.nested_col->is_date_type()) { + ret = _execute_internal<ColumnDate>(left_exec_data, right_exec_data, dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (left_exec_data.nested_col->is_datetime_type()) { + ret = _execute_internal<ColumnDateTime>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (left_exec_data.nested_col->is_numeric()) { + if (check_column<ColumnUInt8>(*left_exec_data.nested_col)) { + ret = _execute_internal<ColumnUInt8>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (check_column<ColumnInt8>(*left_exec_data.nested_col)) { + ret = _execute_internal<ColumnInt8>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (check_column<ColumnInt16>(*left_exec_data.nested_col)) { + ret = _execute_internal<ColumnInt16>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (check_column<ColumnInt32>(*left_exec_data.nested_col)) { + ret = _execute_internal<ColumnInt32>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (check_column<ColumnInt64>(*left_exec_data.nested_col)) { + ret = _execute_internal<ColumnInt64>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (check_column<ColumnInt128>(*left_exec_data.nested_col)) { + ret = _execute_internal<ColumnInt128>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (check_column<ColumnFloat32>(*left_exec_data.nested_col)) { + ret = _execute_internal<ColumnFloat32>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } else if (check_column<ColumnFloat64>(*left_exec_data.nested_col)) { + ret = _execute_internal<ColumnFloat64>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } + } else if (left_exec_data.nested_col->is_column_decimal()) { + if (check_column<ColumnDecimal128>(*left_exec_data.nested_col)) { + ret = _execute_internal<ColumnDecimal128>(left_exec_data, right_exec_data, + dst_null_map_data, + dst_nested_col->get_data().data()); + } + } + + if (ret == Status::OK()) { + block.replace_by_position(result, ColumnNullable::create(std::move(dst_nested_col), + std::move(dst_null_map))); + } + + return ret; + } + +private: + Status _execute_nullable(const ColumnArrayExecutionData& data, UInt8* dst_nullmap_data) { + for (ssize_t row = 0; row < data.offsets_ptr->size(); ++row) { + if (dst_nullmap_data[row]) { + continue; + } + + if (data.array_nullmap_data && data.array_nullmap_data[row]) { + dst_nullmap_data[row] = 1; + continue; + } + + // any element inside array is NULL, return NULL + if (data.nested_nullmap_data) { + ssize_t start = (*data.offsets_ptr)[row - 1]; + ssize_t size = (*data.offsets_ptr)[row] - start; + for (ssize_t i = start; i < start + size; ++i) { + if (data.nested_nullmap_data[i]) { + dst_nullmap_data[row] = 1; + break; + } + } + } + } + return Status::OK(); + } + + template <typename T> + Status _execute_internal(const ColumnArrayExecutionData& left_data, + const ColumnArrayExecutionData& right_data, + const UInt8* dst_nullmap_data, UInt8* dst_data) { + using ExecutorImpl = OverlapSetImpl<T>; + for (ssize_t row = 0; row < left_data.offsets_ptr->size(); ++row) { + if (dst_nullmap_data[row]) { + continue; + } + + ssize_t left_start = (*left_data.offsets_ptr)[row - 1]; + ssize_t left_size = (*left_data.offsets_ptr)[row] - left_start; + ssize_t right_start = (*right_data.offsets_ptr)[row - 1]; + ssize_t right_size = (*right_data.offsets_ptr)[row] - right_start; + if (left_size == 0 || right_size == 0) { + dst_data[row] = 0; + continue; + } + + ExecutorImpl impl; + if (right_size < left_size) { + impl.insert_array(right_data.nested_col, right_start, right_size); + dst_data[row] = impl.find_any(left_data.nested_col, left_start, left_size); + } else { + impl.insert_array(left_data.nested_col, left_start, left_size); + dst_data[row] = impl.find_any(right_data.nested_col, right_start, right_size); + } + } + return Status::OK(); + } +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h index 8425c0ec4d..01c89ed155 100644 --- a/be/src/vec/io/io_helper.h +++ b/be/src/vec/io/io_helper.h @@ -26,6 +26,7 @@ #include "util/string_parser.hpp" #include "vec/common/arena.h" #include "vec/common/exception.h" +#include "vec/common/string_buffer.hpp" #include "vec/common/string_ref.h" #include "vec/common/uint128.h" #include "vec/core/types.h" diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index b245a343d2..879b6f7b24 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -341,6 +341,7 @@ set(VEC_TEST_FILES vec/function/function_array_element_test.cpp vec/function/function_array_index_test.cpp vec/function/function_array_size_test.cpp + vec/function/function_arrays_overlap_test.cpp vec/function/function_bitmap_test.cpp vec/function/function_comparison_test.cpp vec/function/function_hash_test.cpp diff --git a/be/test/vec/function/function_arrays_overlap_test.cpp b/be/test/vec/function/function_arrays_overlap_test.cpp new file mode 100644 index 0000000000..053205e403 --- /dev/null +++ b/be/test/vec/function/function_arrays_overlap_test.cpp @@ -0,0 +1,138 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> +#include <time.h> + +#include <string> + +#include "function_test_util.h" +#include "runtime/tuple_row.h" +#include "util/url_coding.h" +#include "vec/core/field.h" + +namespace doris::vectorized { + +TEST(function_arrays_overlap_test, arrays_overlap) { + std::string func_name = "arrays_overlap"; + Array empty_arr; + + // arrays_overlap(Array<Int32>, Array<Int32>) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32, TypeIndex::Array, + TypeIndex::Int32}; + + Array vec1 = {Int32(1), Int32(2), Int32(3)}; + Array vec2 = {Int32(3)}; + Array vec3 = {Int32(4), Int32(5)}; + DataSet data_set = {{{vec1, vec2}, UInt8(1)}, + {{vec1, vec3}, UInt8(0)}, + {{Null(), vec1}, Null()}, + {{empty_arr, vec1}, UInt8(0)}}; + + check_function<DataTypeUInt8, true>(func_name, input_types, data_set); + } + + // arrays_overlap(Array<Int128>, Array<Int128>) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int128, TypeIndex::Array, + TypeIndex::Int128}; + + Array vec1 = {Int128(11111111111LL), Int128(22222LL), Int128(333LL)}; + Array vec2 = {Int128(11111111111LL)}; + DataSet data_set = { + {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}}; + + check_function<DataTypeUInt8, true>(func_name, input_types, data_set); + } + + // arrays_overlap(Array<Float64>, Array<Float64>) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Float64, TypeIndex::Array, + TypeIndex::Float64}; + + Array vec1 = {double(1.2345), double(2.222), double(3.0)}; + Array vec2 = {double(1.2345)}; + DataSet data_set = { + {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}}; + + check_function<DataTypeUInt8, true>(func_name, input_types, data_set); + } + + // arrays_overlap(Array<Date>, Array<Date>) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Array, + TypeIndex::Date}; + + Array vec1 = {str_to_date_time("2022-01-02", false), str_to_date_time("", false), + str_to_date_time("2022-07-08", false)}; + Array vec2 = {str_to_date_time("2022-01-02", false)}; + DataSet data_set = { + {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}}; + + check_function<DataTypeUInt8, true>(func_name, input_types, data_set); + } + + // arrays_overlap(Array<DateTime>, Array<DateTime>) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::Array, + TypeIndex::DateTime}; + + Array vec1 = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time(""), + str_to_date_time("2022-07-08 00:00:00")}; + Array vec2 = {str_to_date_time("2022-01-02 00:00:00")}; + Array vec3 = {str_to_date_time("")}; + DataSet data_set = {{{vec1, vec2}, UInt8(1)}, + {{vec1, vec3}, UInt8(1)}, + {{Null(), vec1}, Null()}, + {{empty_arr, vec1}, UInt8(0)}}; + + check_function<DataTypeUInt8, true>(func_name, input_types, data_set); + } + + // arrays_overlap(Array<Decimal128>, Array<Decimal128>) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128, TypeIndex::Array, + TypeIndex::Decimal128}; + + Array vec1 = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67), + ut_type::DECIMALFIELD(0.0)}; + Array vec2 = {ut_type::DECIMALFIELD(17014116.67)}; + DataSet data_set = { + {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}}; + + check_function<DataTypeUInt8, true>(func_name, input_types, data_set); + } + + // arrays_overlap(Array<String>, Array<String>) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::Array, + TypeIndex::String}; + + Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)}; + Array vec2 = {Field("abc", 3)}; + Array vec3 = {Field("", 0)}; + DataSet data_set = {{{vec1, vec2}, UInt8(1)}, + {{vec1, vec3}, UInt8(1)}, + {{Null(), vec1}, Null()}, + {{empty_arr, vec1}, UInt8(0)}}; + + check_function<DataTypeUInt8, true>(func_name, input_types, data_set); + } +} + +} // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md b/docs/en/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md new file mode 100644 index 0000000000..5cd3d30e36 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md @@ -0,0 +1,66 @@ +--- +{ + "title": "arrays_overlap", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## arrays_overlap + +### description + +#### Syntax + +`BOOLEAN arrays_overlap(ARRAY<T> left, ARRAY<T> right)` + +Check if there is any common element for left and right array. Return below values: + +``` +1 - if any common element inside left and right array; +0 - if no common element inside left and right array; +NULL - when left or right array is NULL; OR any element inside left and right array is NULL; +``` + +### notice + +`Only supported in vectorized engine` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select c_left,c_right,arrays_overlap(c_left,c_right) from array_test; ++--------------+-----------+-------------------------------------+ +| c_left | c_right | arrays_overlap(`c_left`, `c_right`) | ++--------------+-----------+-------------------------------------+ +| [1, 2, 3] | [3, 4, 5] | 1 | +| [1, 2, 3] | [5, 6] | 0 | +| [1, 2, NULL] | [1] | NULL | +| NULL | [1, 2] | NULL | +| [1, 2, 3] | [1, 2] | 1 | ++--------------+-----------+-------------------------------------+ +``` + +### keywords + +ARRAYS_OVERLAP diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md new file mode 100644 index 0000000000..e78aa98a3c --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md @@ -0,0 +1,66 @@ +--- +{ + "title": "arrays_overlap", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## arrays_overlap + +### description + +#### Syntax + +`BOOLEAN arrays_overlap(ARRAY<T> left, ARRAY<T> right)` + +判断left和right数组中是否包含公共元素。返回结果如下: + +``` +1 - left和right数组存在公共元素; +0 - left和right数组不存在公共元素; +NULL - left或者right数组为NULL;或者left和right数组中,任意元素为NULL; +``` + +### notice + +`仅支持向量化引擎中使用` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select c_left,c_right,arrays_overlap(c_left,c_right) from array_test; ++--------------+-----------+-------------------------------------+ +| c_left | c_right | arrays_overlap(`c_left`, `c_right`) | ++--------------+-----------+-------------------------------------+ +| [1, 2, 3] | [3, 4, 5] | 1 | +| [1, 2, 3] | [5, 6] | 0 | +| [1, 2, NULL] | [1] | NULL | +| NULL | [1, 2] | NULL | +| [1, 2, 3] | [1, 2] | 1 | ++--------------+-----------+-------------------------------------+ +``` + +### keywords + +ARRAYS_OVERLAP diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index fc57c9bda2..2b916ea908 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -132,6 +132,19 @@ visible_functions = [ [['element_at', '%element_extract%'], 'VARCHAR', ['ARRAY_VARCHAR', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], [['element_at', '%element_extract%'], 'STRING', ['ARRAY_STRING', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_TINYINT', 'ARRAY_TINYINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_SMALLINT', 'ARRAY_SMALLINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_INT', 'ARRAY_INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_BIGINT', 'ARRAY_BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_LARGEINT', 'ARRAY_LARGEINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DATETIME', 'ARRAY_DATETIME'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DATE', 'ARRAY_DATE'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_FLOAT', 'ARRAY_FLOAT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DOUBLE', 'ARRAY_DOUBLE'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DECIMALV2', 'ARRAY_DECIMALV2'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_VARCHAR', 'ARRAY_VARCHAR'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['arrays_overlap'], 'BOOLEAN', ['ARRAY_STRING', 'ARRAY_STRING'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['array_contains'], 'BOOLEAN', ['ARRAY_TINYINT', 'TINYINT'], '', '', '', 'vec', ''], [['array_contains'], 'BOOLEAN', ['ARRAY_SMALLINT', 'SMALLINT'], '', '', '', 'vec', ''], [['array_contains'], 'BOOLEAN', ['ARRAY_INT', 'INT'], '', '', '', 'vec', ''], diff --git a/regression-test/data/query/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query/sql_functions/array_functions/test_array_functions.out index 18ed9ec24d..7ba627d097 100644 --- a/regression-test/data/query/sql_functions/array_functions/test_array_functions.out +++ b/regression-test/data/query/sql_functions/array_functions/test_array_functions.out @@ -9,3 +9,8 @@ 2 1 \N 3 0 0 +-- !select -- +1 true +2 false +3 \N + diff --git a/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy b/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy index 2897da0ae9..3c73c11f6c 100644 --- a/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy +++ b/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy @@ -27,7 +27,8 @@ suite("test_array_functions", "query") { CREATE TABLE IF NOT EXISTS ${tableName} ( `k1` int(11) NULL COMMENT "", `k2` ARRAY<int(11)> NOT NULL COMMENT "", - `k3` ARRAY<VARCHAR(20)> NULL COMMENT "" + `k3` ARRAY<VARCHAR(20)> NULL COMMENT "", + `k4` ARRAY<int(11)> NULL COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(`k1`) DISTRIBUTED BY HASH(`k1`) BUCKETS 1 @@ -36,10 +37,11 @@ suite("test_array_functions", "query") { "storage_format" = "V2" ) """ - sql """ INSERT INTO ${tableName} VALUES(1, [1, 2, 3], ["a", "b", ""]) """ - sql """ INSERT INTO ${tableName} VALUES(2, [4], NULL) """ - sql """ INSERT INTO ${tableName} VALUES(3, [], []) """ + sql """ INSERT INTO ${tableName} VALUES(1, [1, 2, 3], ["a", "b", ""], [1, 2]) """ + sql """ INSERT INTO ${tableName} VALUES(2, [4], NULL, [5]) """ + sql """ INSERT INTO ${tableName} VALUES(3, [], [], NULL) """ qt_select "SELECT k1, size(k2), size(k3) FROM ${tableName} ORDER BY k1" qt_select "SELECT k1, cardinality(k2), cardinality(k3) FROM ${tableName} ORDER BY k1" + qt_select "SELECT k1, arrays_overlap(k2, k4) FROM ${tableName} ORDER BY k1" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org