This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new c0360f80bb [enhancement](aggregate-function) enhance aggregate funtion collect and add group_array aliases (#15339) c0360f80bb is described below commit c0360f80bb721e04ff7e3f687014cdf9ea33e6ae Author: 奕冷 <82279870+tangsiyang2...@users.noreply.github.com> AuthorDate: Mon Feb 27 14:22:30 2023 +0800 [enhancement](aggregate-function) enhance aggregate funtion collect and add group_array aliases (#15339) Enhance aggregate function `collect_set` and `collect_list` to support optional `max_size` param, which enables to limit the number of elements in result array. --- .../aggregate_function_collect.cpp | 100 ++-- .../aggregate_function_collect.h | 199 ++++--- .../aggregate-functions/collect_list.md | 44 +- .../aggregate-functions/collect_set.md | 44 +- .../aggregate-functions/collect_list.md | 43 +- .../aggregate-functions/collect_set.md | 43 +- .../apache/doris/analysis/FunctionCallExpr.java | 5 + .../java/org/apache/doris/catalog/FunctionSet.java | 26 +- .../aggregate_functions/test_aggregate_collect.out | 73 ++- .../test_aggregate_group_array.out | 27 + .../test_aggregate_collect.groovy | 609 ++++++++++++++++++++- 11 files changed, 1001 insertions(+), 212 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp index 577b8e1c23..d18ae860e5 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp @@ -18,78 +18,78 @@ #include "vec/aggregate_functions/aggregate_function_collect.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" +#include "vec/aggregate_functions/helpers.h" namespace doris::vectorized { -template <typename T> -AggregateFunctionPtr create_agg_function_collect(bool distinct, const DataTypes& argument_types) { +#define FOR_DECIMAL_TYPES(M) \ + M(Decimal32) \ + M(Decimal64) \ + M(Decimal128) \ + M(Decimal128I) + +template <typename T, typename HasLimit> +AggregateFunctionPtr do_create_agg_function_collect(bool distinct, + const DataTypePtr& argument_type) { if (distinct) { return AggregateFunctionPtr( - new AggregateFunctionCollect<AggregateFunctionCollectSetData<T>>(argument_types)); + new AggregateFunctionCollect<AggregateFunctionCollectSetData<T, HasLimit>, + HasLimit>(argument_type)); } else { return AggregateFunctionPtr( - new AggregateFunctionCollect<AggregateFunctionCollectListData<T>>(argument_types)); + new AggregateFunctionCollect<AggregateFunctionCollectListData<T, HasLimit>, + HasLimit>(argument_type)); } } -AggregateFunctionPtr create_aggregate_function_collect(const std::string& name, - const DataTypes& argument_types, - const bool result_is_nullable) { - if (argument_types.size() != 1) { - LOG(WARNING) << fmt::format("Illegal number {} of argument for aggregate function {}", - argument_types.size(), name); - return nullptr; - } - +template <typename HasLimit> +AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string& name, + const DataTypePtr& argument_type) { bool distinct = false; if (name == "collect_set") { distinct = true; } - WhichDataType type(argument_types[0]); - if (type.is_uint8()) { - return create_agg_function_collect<UInt8>(distinct, argument_types); - } else if (type.is_int8()) { - return create_agg_function_collect<Int8>(distinct, argument_types); - } else if (type.is_int16()) { - return create_agg_function_collect<Int16>(distinct, argument_types); - } else if (type.is_int32()) { - return create_agg_function_collect<Int32>(distinct, argument_types); - } else if (type.is_int64()) { - return create_agg_function_collect<Int64>(distinct, argument_types); - } else if (type.is_int128()) { - return create_agg_function_collect<Int128>(distinct, argument_types); - } else if (type.is_float32()) { - return create_agg_function_collect<Float32>(distinct, argument_types); - } else if (type.is_float64()) { - return create_agg_function_collect<Float64>(distinct, argument_types); - } else if (type.is_decimal32()) { - return create_agg_function_collect<Decimal32>(distinct, argument_types); - } else if (type.is_decimal64()) { - return create_agg_function_collect<Decimal64>(distinct, argument_types); - } else if (type.is_decimal128()) { - return create_agg_function_collect<Decimal128>(distinct, argument_types); - } else if (type.is_decimal128i()) { - return create_agg_function_collect<Decimal128I>(distinct, argument_types); - } else if (type.is_date()) { - return create_agg_function_collect<Int64>(distinct, argument_types); - } else if (type.is_date_time()) { - return create_agg_function_collect<Int64>(distinct, argument_types); - } else if (type.is_date_v2()) { - return create_agg_function_collect<UInt32>(distinct, argument_types); - } else if (type.is_date_time_v2()) { - return create_agg_function_collect<UInt64>(distinct, argument_types); - } else if (type.is_string()) { - return create_agg_function_collect<StringRef>(distinct, argument_types); + WhichDataType which(argument_type); +#define DISPATCH(TYPE) \ + if (which.idx == TypeIndex::TYPE) \ + return do_create_agg_function_collect<TYPE, HasLimit>(distinct, argument_type); + FOR_NUMERIC_TYPES(DISPATCH) + FOR_DECIMAL_TYPES(DISPATCH) +#undef DISPATCH + if (which.is_date_or_datetime()) { + return do_create_agg_function_collect<Int64, HasLimit>(distinct, argument_type); + } else if (which.is_date_v2()) { + return do_create_agg_function_collect<UInt32, HasLimit>(distinct, argument_type); + } else if (which.is_date_time_v2()) { + return do_create_agg_function_collect<UInt64, HasLimit>(distinct, argument_type); + } else if (which.is_string()) { + return do_create_agg_function_collect<StringRef, HasLimit>(distinct, argument_type); } LOG(WARNING) << fmt::format("unsupported input type {} for aggregate function {}", - argument_types[0]->get_name(), name); + argument_type->get_name(), name); + return nullptr; +} + +AggregateFunctionPtr create_aggregate_function_collect(const std::string& name, + const DataTypes& argument_types, + const bool result_is_nullable) { + if (argument_types.size() == 1) { + return create_aggregate_function_collect_impl<std::false_type>(name, argument_types[0]); + } + if (argument_types.size() == 2) { + return create_aggregate_function_collect_impl<std::true_type>(name, argument_types[0]); + } + LOG(WARNING) << fmt::format("number of parameters for aggregate function {}, should be 1 or 2", + name); return nullptr; } void register_aggregate_function_collect_list(AggregateFunctionSimpleFactory& factory) { factory.register_function("collect_list", create_aggregate_function_collect); factory.register_function("collect_set", create_aggregate_function_collect); + factory.register_alias("collect_list", "group_array"); + factory.register_alias("collect_set", "group_uniq_array"); } -} // namespace doris::vectorized +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h b/be/src/vec/aggregate_functions/aggregate_function_collect.h index f194fc2639..27d9733bd3 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h @@ -17,6 +17,8 @@ #pragma once +#include <algorithm> + #include "common/status.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/key_holder_helpers.h" @@ -31,122 +33,180 @@ namespace doris::vectorized { -template <typename T> +template <typename T, typename HasLimit> struct AggregateFunctionCollectSetData { using ElementType = T; using ColVecType = ColumnVectorOrDecimal<ElementType>; using ElementNativeType = typename NativeType<T>::Type; + using SelfType = AggregateFunctionCollectSetData; using Set = HashSetWithStackMemory<ElementNativeType, DefaultHash<ElementNativeType>, 4>; - Set set; + Set data_set; + Int64 max_size = -1; + + size_t size() const { return data_set.size(); } void add(const IColumn& column, size_t row_num) { - const auto& vec = assert_cast<const ColVecType&>(column).get_data(); - set.insert(vec[row_num]); + data_set.insert(assert_cast<const ColVecType&>(column).get_data()[row_num]); + } + + void merge(const SelfType& rhs) { + if constexpr (HasLimit::value) { + for (auto& rhs_elem : rhs.data_set) { + if (size() >= max_size) { + return; + } + data_set.insert(rhs_elem.get_value()); + } + } else { + data_set.merge(rhs.data_set); + } } - void merge(const AggregateFunctionCollectSetData& rhs) { set.merge(rhs.set); } - void write(BufferWritable& buf) const { set.write(buf); } - void read(BufferReadable& buf) { set.read(buf); } - void reset() { set.clear(); } + + void write(BufferWritable& buf) const { data_set.write(buf); } + + void read(BufferReadable& buf) { data_set.read(buf); } + void insert_result_into(IColumn& to) const { auto& vec = assert_cast<ColVecType&>(to).get_data(); - vec.reserve(set.size()); - for (auto item : set) { + vec.reserve(size()); + for (const auto& item : data_set) { vec.push_back(item.key); } } + + void reset() { data_set.clear(); } }; -template <> -struct AggregateFunctionCollectSetData<StringRef> { +template <typename HasLimit> +struct AggregateFunctionCollectSetData<StringRef, HasLimit> { using ElementType = StringRef; using ColVecType = ColumnString; + using SelfType = AggregateFunctionCollectSetData<ElementType, HasLimit>; using Set = HashSetWithSavedHashWithStackMemory<ElementType, DefaultHash<ElementType>, 4>; - Set set; + Set data_set; + Int64 max_size = -1; + + size_t size() const { return data_set.size(); } void add(const IColumn& column, size_t row_num, Arena* arena) { Set::LookupResult it; bool inserted; auto key_holder = get_key_holder<true>(column, row_num, *arena); - set.emplace(key_holder, it, inserted); + data_set.emplace(key_holder, it, inserted); } - void merge(const AggregateFunctionCollectSetData& rhs, Arena* arena) { - Set::LookupResult it; + void merge(const SelfType& rhs, Arena* arena) { bool inserted; - for (const auto& elem : rhs.set) { - set.emplace(ArenaKeyHolder {elem.get_value(), *arena}, it, inserted); + Set::LookupResult it; + for (auto& rhs_elem : rhs.data_set) { + if constexpr (HasLimit::value) { + if (size() >= max_size) { + return; + } + } + assert(arena != nullptr); + data_set.emplace(ArenaKeyHolder {rhs_elem.get_value(), *arena}, it, inserted); } } + void write(BufferWritable& buf) const { - write_var_uint(set.size(), buf); - for (const auto& elem : set) { + write_var_uint(size(), buf); + for (const auto& elem : data_set) { write_string_binary(elem.get_value(), buf); } } - void read(BufferReadable& buf) { - UInt64 rows; - read_var_uint(rows, buf); + void read(BufferReadable& buf) { + UInt64 size; + read_var_uint(size, buf); StringRef ref; - for (size_t i = 0; i < rows; ++i) { + for (size_t i = 0; i < size; ++i) { read_string_binary(ref, buf); - set.insert(ref); + data_set.insert(ref); } } - void reset() { set.clear(); } + void insert_result_into(IColumn& to) const { auto& vec = assert_cast<ColVecType&>(to); - vec.reserve(set.size()); - for (const auto& item : set) { + vec.reserve(size()); + for (const auto& item : data_set) { vec.insert_data(item.key.data, item.key.size); } } + + void reset() { data_set.clear(); } }; -template <typename T> +template <typename T, typename HasLimit> struct AggregateFunctionCollectListData { using ElementType = T; using ColVecType = ColumnVectorOrDecimal<ElementType>; + using SelfType = AggregateFunctionCollectListData<ElementType, HasLimit>; PaddedPODArray<ElementType> data; + Int64 max_size = -1; + + size_t size() const { return data.size(); } void add(const IColumn& column, size_t row_num) { const auto& vec = assert_cast<const ColVecType&>(column).get_data(); data.push_back(vec[row_num]); } - void merge(const AggregateFunctionCollectListData& rhs) { - data.insert(rhs.data.begin(), rhs.data.end()); + + void merge(const SelfType& rhs) { + if constexpr (HasLimit::value) { + for (auto& rhs_elem : rhs.data) { + if (size() >= max_size) { + return; + } + data.push_back(rhs_elem); + } + } else { + data.insert(rhs.data.begin(), rhs.data.end()); + } } + void write(BufferWritable& buf) const { - write_var_uint(data.size(), buf); - buf.write(data.raw_data(), data.size() * sizeof(ElementType)); + write_var_uint(size(), buf); + buf.write(data.raw_data(), size() * sizeof(ElementType)); } + void read(BufferReadable& buf) { UInt64 rows = 0; read_var_uint(rows, buf); data.resize(rows); buf.read(reinterpret_cast<char*>(data.data()), rows * sizeof(ElementType)); } + void reset() { data.clear(); } + void insert_result_into(IColumn& to) const { auto& vec = assert_cast<ColVecType&>(to).get_data(); size_t old_size = vec.size(); - vec.resize(old_size + data.size()); - memcpy(vec.data() + old_size, data.data(), data.size() * sizeof(ElementType)); + vec.resize(old_size + size()); + memcpy(vec.data() + old_size, data.data(), size() * sizeof(ElementType)); } }; -template <> -struct AggregateFunctionCollectListData<StringRef> { +template <typename HasLimit> +struct AggregateFunctionCollectListData<StringRef, HasLimit> { using ElementType = StringRef; using ColVecType = ColumnString; MutableColumnPtr data; + Int64 max_size = -1; AggregateFunctionCollectListData() { data = ColVecType::create(); } + size_t size() const { return data->size(); } + void add(const IColumn& column, size_t row_num) { data->insert_from(column, row_num); } void merge(const AggregateFunctionCollectListData& rhs) { - data->insert_range_from(*rhs.data, 0, rhs.data->size()); + if constexpr (HasLimit::value) { + data->insert_range_from(*rhs.data, 0, + std::min(static_cast<size_t>(max_size - size()), rhs.size())); + } else { + data->insert_range_from(*rhs.data, 0, rhs.size()); + } } void write(BufferWritable& buf) const { @@ -177,23 +237,27 @@ struct AggregateFunctionCollectListData<StringRef> { void insert_result_into(IColumn& to) const { auto& to_str = assert_cast<ColVecType&>(to); - to_str.insert_range_from(*data, 0, data->size()); + to_str.insert_range_from(*data, 0, size()); } }; -template <typename Data> -class AggregateFunctionCollect final - : public IAggregateFunctionDataHelper<Data, AggregateFunctionCollect<Data>> { -public: - static constexpr bool alloc_memory_in_arena = - std::is_same_v<Data, AggregateFunctionCollectSetData<StringRef>>; +template <typename Data, typename HasLimit> +class AggregateFunctionCollect + : public IAggregateFunctionDataHelper<Data, AggregateFunctionCollect<Data, HasLimit>> { + using GenericType = AggregateFunctionCollectSetData<StringRef, HasLimit>; - AggregateFunctionCollect(const DataTypes& argument_types_) - : IAggregateFunctionDataHelper<Data, AggregateFunctionCollect<Data>>(argument_types_), - _argument_type(argument_types_[0]) {} + static constexpr bool ENABLE_ARENA = std::is_same_v<Data, GenericType>; + +public: + AggregateFunctionCollect(const DataTypePtr& argument_type, + UInt64 max_size_ = std::numeric_limits<UInt64>::max()) + : IAggregateFunctionDataHelper<Data, AggregateFunctionCollect<Data, HasLimit>>( + {argument_type}), + return_type(argument_type) {} std::string get_name() const override { - if constexpr (std::is_same_v<AggregateFunctionCollectListData<typename Data::ElementType>, + if constexpr (std::is_same_v<AggregateFunctionCollectListData<typename Data::ElementType, + HasLimit>, Data>) { return "collect_list"; } else { @@ -202,27 +266,38 @@ public: } DataTypePtr get_return_type() const override { - return std::make_shared<DataTypeArray>(make_nullable(_argument_type)); + return std::make_shared<DataTypeArray>(make_nullable(return_type)); } + bool allocates_memory_in_arena() const override { return ENABLE_ARENA; } + void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num, Arena* arena) const override { - assert(!columns[0]->is_null_at(row_num)); - if constexpr (alloc_memory_in_arena) { - this->data(place).add(*columns[0], row_num, arena); + auto& data = this->data(place); + if constexpr (HasLimit::value) { + if (data.max_size == -1) { + data.max_size = + (UInt64) static_cast<const ColumnInt32*>(columns[1])->get_element(row_num); + } + if (data.size() >= data.max_size) { + return; + } + } + if constexpr (ENABLE_ARENA) { + data.add(*columns[0], row_num, arena); } else { - this->data(place).add(*columns[0], row_num); + data.add(*columns[0], row_num); } } - void reset(AggregateDataPtr place) const override { this->data(place).reset(); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena* arena) const override { - if constexpr (alloc_memory_in_arena) { - this->data(place).merge(this->data(rhs), arena); + auto& data = this->data(place); + auto& rhs_data = this->data(rhs); + if constexpr (ENABLE_ARENA) { + data.merge(rhs_data, arena); } else { - this->data(place).merge(this->data(rhs)); + data.merge(rhs_data); } } @@ -248,10 +323,8 @@ public: to_arr.get_offsets().push_back(to_nested_col.size()); } - bool allocates_memory_in_arena() const override { return alloc_memory_in_arena; } - private: - DataTypePtr _argument_type; + DataTypePtr return_type; }; -} // namespace doris::vectorized +} // namespace doris::vectorized \ No newline at end of file diff --git a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md index eb2a732387..a0ebc957ff 100644 --- a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md +++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md @@ -30,9 +30,8 @@ under the License. `ARRAY<T> collect_list(expr)` -Returns an array consisting of all values in expr within the group. -The order of elements in the array is non-deterministic. NULL values are excluded. - +Returns an array consisting of all values in expr within the group, and ,with the optional `max_size` parameter limits the size of the resulting array to `max_size` elements.The order of elements in the array is non-deterministic. NULL values are excluded. +It has an alias `group_array`. ### notice ``` @@ -44,26 +43,37 @@ Only supported in vectorized engine ``` mysql> set enable_vectorized_engine=true; -mysql> select k1,k2,k3 from collect_test order by k1; +mysql> select k1,k2,k3 from collect_list_test order by k1; +------+------------+-------+ | k1 | k2 | k3 | +------+------------+-------+ -| 1 | 2022-07-05 | hello | -| 2 | 2022-07-04 | NULL | -| 2 | 2022-07-04 | hello | -| 3 | NULL | world | +| 1 | 2023-01-01 | hello | +| 2 | 2023-01-02 | NULL | +| 2 | 2023-01-02 | hello | | 3 | NULL | world | +| 3 | 2023-01-02 | hello | +| 4 | 2023-01-02 | sql | +| 4 | 2023-01-03 | sql | +------+------------+-------+ -mysql> select k1,collect_list(k2),collect_list(k3) from collect_test group by k1 order by k1; -+------+--------------------------+--------------------+ -| k1 | collect_list(`k2`) | collect_list(`k3`) | -+------+--------------------------+--------------------+ -| 1 | [2022-07-05] | [hello] | -| 2 | [2022-07-04, 2022-07-04] | [hello] | -| 3 | NULL | [world, world] | -+------+--------------------------+--------------------+ +mysql> select collect_list(k1),collect_list(k1,2) from collect_list_test; ++-------------------------+--------------------------+ +| collect_list(`k1`) | collect_list(`k1`,3) | ++-------------------------+--------------------------+ +| [1,2,2,3,3,4,4] | [1,2,2] | ++-------------------------+--------------------------+ + +mysql> select k1,collect_list(k2),collect_list(k3,1) from collect_list_test group by k1 order by k1; ++------+-------------------------+--------------------------+ +| k1 | collect_list(`k2`) | collect_list(`k3`,1) | ++------+-------------------------+--------------------------+ +| 1 | [2023-01-01] | [hello] | +| 2 | [2023-01-02,2023-01-02] | [hello] | +| 3 | [2023-01-02] | [world] | +| 4 | [2023-01-02,2023-01-03] | [sql] | ++------+-------------------------+--------------------------+ + ``` ### keywords -COLLECT_LIST,COLLECT_SET,ARRAY +COLLECT_LIST,GROUP_ARRAY,COLLECT_SET,ARRAY diff --git a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md index 06257357c7..42987df4e3 100644 --- a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md +++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md @@ -35,10 +35,9 @@ COLLECT_SET ### description #### Syntax -`ARRAY<T> collect_set(expr)` +`ARRAY<T> collect_set(expr[,max_size])` -Returns an array consisting of all unique values in expr within the group. -The order of elements in the array is non-deterministic. NULL values are excluded. +Creates an array containing distinct elements from `expr`,with the optional `max_size` parameter limits the size of the resulting array to `max_size` elements. It has an alias `group_uniq_array`. ### notice @@ -51,26 +50,37 @@ Only supported in vectorized engine ``` mysql> set enable_vectorized_engine=true; -mysql> select k1,k2,k3 from collect_test order by k1; +mysql> select k1,k2,k3 from collect_set_test order by k1; +------+------------+-------+ | k1 | k2 | k3 | +------+------------+-------+ -| 1 | 2022-07-05 | hello | -| 2 | 2022-07-04 | NULL | -| 2 | 2022-07-04 | hello | -| 3 | NULL | world | +| 1 | 2023-01-01 | hello | +| 2 | 2023-01-01 | NULL | +| 2 | 2023-01-02 | hello | | 3 | NULL | world | +| 3 | 2023-01-02 | hello | +| 4 | 2023-01-02 | doris | +| 4 | 2023-01-03 | sql | +------+------------+-------+ -mysql> select k1,collect_set(k2),collect_set(k3) from collect_test group by k1 order by k1; -+------+-------------------+-------------------+ -| k1 | collect_set(`k2`) | collect_set(`k3`) | -+------+-------------------+-------------------+ -| 1 | [2022-07-05] | [hello] | -| 2 | [2022-07-04] | [hello] | -| 3 | NULL | [world] | -+------+-------------------+-------------------+ +mysql> select collect_set(k1),collect_set(k1,2) from collect_set_test; ++-------------------------+--------------------------+ +| collect_set(`k1`) | collect_set(`k1`,2) | ++-------------------------+--------------------------+ +| [4,3,2,1] | [1,2] | ++----------------------------------------------------+ + +mysql> select k1,collect_set(k2),collect_set(k3,1) from collect_set_test group by k1 order by k1; ++------+-------------------------+--------------------------+ +| k1 | collect_set(`k2`) | collect_set(`k3`,1) | ++------+-------------------------+--------------------------+ +| 1 | [2023-01-01] | [hello] | +| 2 | [2023-01-01,2023-01-02] | [hello] | +| 3 | [2023-01-02] | [world] | +| 4 | [2023-01-02,2023-01-03] | [sql] | ++------+-------------------------+--------------------------+ + ``` ### keywords -COLLECT_SET,COLLECT_LIST,ARRAY +COLLECT_SET,GROUP_UNIQ_ARRAY,COLLECT_LIST,ARRAY \ No newline at end of file diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md index 37ee1010d4..8ef3c01374 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md @@ -28,9 +28,10 @@ under the License. ### description #### Syntax -`ARRAY<T> collect_list(expr)` +`ARRAY<T> collect_list(expr[,max_size])` -返回一个包含 expr 中所有元素(不包括NULL)的数组,数组中元素顺序是不确定的。 +返回一个包含 expr 中所有元素(不包括NULL)的数组,可选参数`max_size`,通过设置该参数能够将结果数组的大小限制为 `max_size` 个元素。 +得到的结果数组中不包含NULL元素,数组中的元素顺序不固定。该函数具有别名`group_array`。 ### notice @@ -44,27 +45,37 @@ under the License. ``` mysql> set enable_vectorized_engine=true; -mysql> select k1,k2,k3 from collect_test order by k1; +mysql> select k1,k2,k3 from collect_list_test order by k1; +------+------------+-------+ | k1 | k2 | k3 | +------+------------+-------+ -| 1 | 2022-07-05 | hello | -| 2 | 2022-07-04 | NULL | -| 2 | 2022-07-04 | hello | -| 3 | NULL | world | +| 1 | 2023-01-01 | hello | +| 2 | 2023-01-02 | NULL | +| 2 | 2023-01-02 | hello | | 3 | NULL | world | +| 3 | 2023-01-02 | hello | +| 4 | 2023-01-02 | sql | +| 4 | 2023-01-03 | sql | +------+------------+-------+ -mysql> select k1,collect_list(k2),collect_list(k3) from collect_test group by k1 order by k1; -+------+--------------------------+--------------------+ -| k1 | collect_list(`k2`) | collect_list(`k3`) | -+------+--------------------------+--------------------+ -| 1 | [2022-07-05] | [hello] | -| 2 | [2022-07-04, 2022-07-04] | [hello] | -| 3 | NULL | [world, world] | -+------+--------------------------+--------------------+ +mysql> select collect_list(k1),collect_list(k1,2) from collect_list_test; ++-------------------------+--------------------------+ +| collect_list(`k1`) | collect_list(`k1`,3) | ++-------------------------+--------------------------+ +| [1,2,2,3,3,4,4] | [1,2,2] | ++-------------------------+--------------------------+ + +mysql> select k1,collect_list(k2),collect_list(k3,1) from collect_list_test group by k1 order by k1; ++------+-------------------------+--------------------------+ +| k1 | collect_list(`k2`) | collect_list(`k3`,1) | ++------+-------------------------+--------------------------+ +| 1 | [2023-01-01] | [hello] | +| 2 | [2023-01-02,2023-01-02] | [hello] | +| 3 | [2023-01-02] | [world] | +| 4 | [2023-01-02,2023-01-03] | [sql] | ++------+-------------------------+--------------------------+ ``` ### keywords -COLLECT_LIST,COLLECT_SET,ARRAY +COLLECT_LIST,GROUP_ARRAY,COLLECT_SET,ARRAY diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md index be9754d356..70aaac611f 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md @@ -35,9 +35,10 @@ COLLECT_SET ### description #### Syntax -`ARRAY<T> collect_set(expr)` +`ARRAY<T> collect_set(expr[,max_size])` -返回一个包含 expr 中所有去重后元素(不包括NULL)的数组,数组中元素顺序是不确定的。 +返回一个对`expr`去重后的数组。可选参数`max_size`,通过设置该参数能够将结果数组的大小限制为 `max_size` 个元素。 +得到的结果数组中不包含NULL元素,数组中的元素顺序不固定。该函数具有别名`group_uniq_array`。 ### notice @@ -50,27 +51,37 @@ COLLECT_SET ``` mysql> set enable_vectorized_engine=true; -mysql> select k1,k2,k3 from collect_test order by k1; +mysql> select k1,k2,k3 from collect_set_test order by k1; +------+------------+-------+ | k1 | k2 | k3 | +------+------------+-------+ -| 1 | 2022-07-05 | hello | -| 2 | 2022-07-04 | NULL | -| 2 | 2022-07-04 | hello | -| 3 | NULL | world | +| 1 | 2023-01-01 | hello | +| 2 | 2023-01-01 | NULL | +| 2 | 2023-01-02 | hello | | 3 | NULL | world | +| 3 | 2023-01-02 | hello | +| 4 | 2023-01-02 | doris | +| 4 | 2023-01-03 | sql | +------+------------+-------+ -mysql> select k1,collect_set(k2),collect_set(k3) from collect_test group by k1 order by k1; -+------+-------------------+-------------------+ -| k1 | collect_set(`k2`) | collect_set(`k3`) | -+------+-------------------+-------------------+ -| 1 | [2022-07-05] | [hello] | -| 2 | [2022-07-04] | [hello] | -| 3 | NULL | [world] | -+------+-------------------+-------------------+ +mysql> select collect_set(k1),collect_set(k1,2) from collect_set_test; ++-------------------------+--------------------------+ +| collect_set(`k1`) | collect_set(`k1`,2) | ++-------------------------+--------------------------+ +| [4,3,2,1] | [1,2] | ++----------------------------------------------------+ + +mysql> select k1,collect_set(k2),collect_set(k3,1) from collect_set_test group by k1 order by k1; ++------+-------------------------+--------------------------+ +| k1 | collect_set(`k2`) | collect_set(`k3`,1) | ++------+-------------------------+--------------------------+ +| 1 | [2023-01-01] | [hello] | +| 2 | [2023-01-01,2023-01-02] | [hello] | +| 3 | [2023-01-02] | [world] | +| 4 | [2023-01-02,2023-01-03] | [sql] | ++------+-------------------------+--------------------------+ ``` ### keywords -COLLECT_SET,COLLECT_LIST,ARRAY +COLLECT_SET,GROUP_UNIQ_ARRAY,COLLECT_LIST,ARRAY diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 75dc5579d4..1459cb9ddc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -1299,6 +1299,11 @@ public class FunctionCallExpr extends Expr { fn.setReturnType(new ArrayType(getChild(0).type)); } + if (fnName.getFunction().equalsIgnoreCase("group_uniq_array") + || fnName.getFunction().equalsIgnoreCase("group_array")) { + fn.setReturnType(new ArrayType(getChild(0).type)); + } + if (fnName.getFunction().equalsIgnoreCase("from_unixtime") || fnName.getFunction().equalsIgnoreCase("date_format")) { // if has only one child, it has default time format: yyyy-MM-dd HH:mm:ss.SSSSSS diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 35c0d40b2a..f344a3c721 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1345,8 +1345,8 @@ public class FunctionSet<T> { } public void addScalarAndVectorizedBuiltin(String fnName, boolean userVisible, - Function.NullableMode nullableMode, Type retType, - boolean varArgs, Type ... args) { + Function.NullableMode nullableMode, Type retType, + boolean varArgs, Type ... args) { ArrayList<Type> argsType = new ArrayList<Type>(); for (Type type : args) { argsType.add(type); @@ -1405,6 +1405,10 @@ public class FunctionSet<T> { public static final String SEQUENCE_COUNT = "sequence_count"; + public static final String GROUP_UNIQ_ARRAY = "group_uniq_array"; + + public static final String GROUP_ARRAY = "group_array"; + // Populate all the aggregate builtins in the catalog. // null symbols indicate the function does not need that step of the evaluation. // An empty symbol indicates a TODO for the BE to implement the function. @@ -2587,6 +2591,10 @@ public class FunctionSet<T> { "", "", "", "", "", true, false, true, true)); addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET, Lists.newArrayList(t), new ArrayType(t), t, "", "", "", "", "", true, false, true, true)); + addBuiltin(AggregateFunction.createBuiltin(COLLECT_LIST, Lists.newArrayList(t, Type.INT), new ArrayType(t), t, + "", "", "", "", "", true, false, true, true)); + addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET, Lists.newArrayList(t, Type.INT), new ArrayType(t), t, + "", "", "", "", "", true, false, true, true)); addBuiltin( AggregateFunction.createBuiltin("topn_array", Lists.newArrayList(t, Type.INT), new ArrayType(t), t, "", "", "", "", "", true, false, true, true)); @@ -2611,8 +2619,20 @@ public class FunctionSet<T> { "", "", "", "", "", true, false, true, true)); addBuiltin(AggregateFunction.createBuiltin(HIST, Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t, "", "", "", "", "", true, false, true, true)); - addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t, + addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, Lists.newArrayList(t, Type.DOUBLE, Type.INT), + Type.VARCHAR, t, "", "", "", "", "", true, false, true, true)); + + addBuiltin(AggregateFunction.createBuiltin(GROUP_UNIQ_ARRAY, Lists.newArrayList(t), new ArrayType(t), t, + "", "", "", "", "", true, false, true, true)); + addBuiltin( + AggregateFunction.createBuiltin(GROUP_UNIQ_ARRAY, Lists.newArrayList(t, Type.INT), new ArrayType(t), + t, "", "", "", "", "", true, false, true, true)); + addBuiltin(AggregateFunction.createBuiltin(GROUP_ARRAY, Lists.newArrayList(t), new ArrayType(t), t, + "", "", "", "", "", true, false, true, true)); + addBuiltin( + AggregateFunction.createBuiltin(GROUP_ARRAY, Lists.newArrayList(t, Type.INT), new ArrayType(t), + t, "", "", "", "", "", true, false, true, true)); } // Avg diff --git a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out index f26cf1146b..3c17fae7af 100644 --- a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out +++ b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out @@ -1,20 +1,79 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !select -- -1 ['hello', 'hello'] [2022-07-04, 2022-07-04] [1.23, 1.23] -2 \N \N \N +1 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +1 false 10 20 30 4444444444444 55555555555 0.1 0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01 2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null +1 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +1 false 11 21 33 4444444444444 55555555555 0.1 0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01 2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null +1 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +1 true 11 12 13 1444444444444 1555555555 1.1 1.222 13333.33 d varchar2 string2 2022-12-02 2022-12-02 2022-12-02T22:23:23 2022-12-02T22:23:24.999999 not null +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +2 false 21 22 23 2444444444444 255555555 2.1 2.222 23333.33 f varchar3 string3 2022-12-03 2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +2 true 31 32 33 3444444444444 3555555555 3.1 3.222 33333.33 l varchar3 string3 2022-12-03 2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +2 false 10 20 30 944444444444 9555555555 9.1 9.222 93333.33 p varchar9 string9 2022-12-09 2022-12-09 2022-12-09T22:23:23 2022-12-09T22:23:24.999999 not null -- !select -- -1 ['hello'] [2022-07-04] [1.23] -2 \N \N \N +[0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23, 33] [2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444] [1555555555, 3555555555, 9555555555, 255555555, 55555555555] [0.1, 1.1, 3.1, 9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222] [23333.33, 3333.33, 33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd', 'f'] ['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9', 'string2', 'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01] [2022-12-01, [...] -- !select -- -['hello', 'hello'] ['hello', 'hello', 'hello', 'hello'] +[0, 0, 1, 0, 1, 0] [10, 11, 11, 21, 31, 10] [20, 21, 12, 22, 32, 20] [30, 33, 13, 23, 33, 30] [4444444444444, 4444444444444, 1444444444444, 2444444444444, 3444444444444, 944444444444] [55555555555, 55555555555, 1555555555, 255555555, 3555555555, 9555555555] [0.1, 0.1, 1.1, 2.1, 3.1, 9.1] [0.222, 0.222, 1.222, 2.222, 3.222, 9.222] [3333.33, 3333.33, 13333.33, 23333.33, 33333.33, 93333.33] ['c', 'c', 'd', 'f', 'l', 'p'] ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 'varchar9 [...] -- !select -- -['hello'] ['hello'] +1 1 1 1 1 3 1 2 1 1 1 1 1 2 1 1 1 -- !select -- -1 [1, 2, 1, 2] ['hello'] [2022-07-04, 2022-07-04] [1.23] ['hello', 'hello', 'hello', 'hello'] +1 1 1 1 1 3 1 2 1 1 1 1 1 2 1 1 1 + +-- !select -- +1 [0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23, 33] [2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444] [1555555555, 3555555555, 9555555555, 255555555, 55555555555] [0.1, 1.1, 3.1, 9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222] [23333.33, 3333.33, 33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd', 'f'] ['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9', 'string2', 'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01] [2022-12-01 [...] + +-- !select -- +1 1 1 1 1 1 3 1 2 1 1 1 1 1 2 1 1 1 + +-- !select -- +1 [0, 0, 1, 0, 1, 0] [10, 11, 11, 21, 31, 10] [20, 21, 12, 22, 32, 20] [30, 33, 13, 23, 33, 30] [4444444444444, 4444444444444, 1444444444444, 2444444444444, 3444444444444, 944444444444] [55555555555, 55555555555, 1555555555, 255555555, 3555555555, 9555555555] [0.1, 0.1, 1.1, 2.1, 3.1, 9.1] [0.222, 0.222, 1.222, 2.222, 3.222, 9.222] [3333.33, 3333.33, 13333.33, 23333.33, 33333.33, 93333.33] ['c', 'c', 'd', 'f', 'l', 'p'] ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 'varcha [...] + +-- !select -- +1 1 1 1 1 1 3 1 2 1 1 1 1 1 2 1 1 1 + +-- !select -- +1 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +1 false 10 20 30 4444444444444 55555555555 0.1 0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01 2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null +1 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +1 false 11 21 33 4444444444444 55555555555 0.1 0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01 2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null +1 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +1 true 11 12 13 1444444444444 1555555555 1.1 1.222 13333.33 d varchar2 string2 2022-12-02 2022-12-02 2022-12-02T22:23:23 2022-12-02T22:23:24.999999 not null +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +2 false 21 22 23 2444444444444 255555555 2.1 2.222 23333.33 f varchar3 string3 2022-12-03 2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +2 true 31 32 33 3444444444444 3555555555 3.1 3.222 33333.33 l varchar3 string3 2022-12-03 2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +2 false 10 20 30 944444444444 9555555555 9.1 9.222 93333.33 p varchar9 string9 2022-12-09 2022-12-09 2022-12-09T22:23:23 2022-12-09T22:23:24.999999 not null + +-- !select -- +[0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23, 33] [2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444] [1555555555, 3555555555, 9555555555, 255555555, 55555555555] [0.1, 1.1, 3.1, 9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222] [23333.33, 3333.33, 33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd', 'f'] ['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9', 'string2', 'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01] [2022-12-01, [...] + +-- !select -- +[0, 0, 1, 0, 1, 0] [10, 11, 11, 21, 31, 10] [20, 21, 12, 22, 32, 20] [30, 33, 13, 23, 33, 30] [4444444444444, 4444444444444, 1444444444444, 2444444444444, 3444444444444, 944444444444] [55555555555, 55555555555, 1555555555, 255555555, 3555555555, 9555555555] [0.1, 0.1, 1.1, 2.1, 3.1, 9.1] [0.222, 0.222, 1.222, 2.222, 3.222, 9.222] [3333.33, 3333.33, 13333.33, 23333.33, 33333.33, 93333.33] ['c', 'c', 'd', 'f', 'l', 'p'] ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 'varchar9 [...] + +-- !select -- +1 1 1 1 1 3 1 2 1 1 1 1 1 2 1 1 1 + +-- !select -- +1 1 1 1 1 3 1 2 1 1 1 1 1 2 1 1 1 + +-- !select -- +1 [0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23, 33] [2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444] [1555555555, 3555555555, 9555555555, 255555555, 55555555555] [0.1, 1.1, 3.1, 9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222] [23333.33, 3333.33, 33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd', 'f'] ['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9', 'string2', 'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01] [2022-12-01 [...] + +-- !select -- +1 1 1 1 1 1 3 1 2 1 1 1 1 1 2 1 1 1 + +-- !select -- +1 [0, 0, 1, 0, 1, 0] [10, 11, 11, 21, 31, 10] [20, 21, 12, 22, 32, 20] [30, 33, 13, 23, 33, 30] [4444444444444, 4444444444444, 1444444444444, 2444444444444, 3444444444444, 944444444444] [55555555555, 55555555555, 1555555555, 255555555, 3555555555, 9555555555] [0.1, 0.1, 1.1, 2.1, 3.1, 9.1] [0.222, 0.222, 1.222, 2.222, 3.222, 9.222] [3333.33, 3333.33, 13333.33, 23333.33, 33333.33, 93333.33] ['c', 'c', 'd', 'f', 'l', 'p'] ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 'varcha [...] + +-- !select -- +1 1 1 1 1 1 3 1 2 1 1 1 1 1 2 1 1 1 -- !select43 -- [10, 8] diff --git a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_group_array.out b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_group_array.out new file mode 100644 index 0000000000..9addf1e4bf --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_group_array.out @@ -0,0 +1,27 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +1 false 10 20 30 4444444444444 55555555555 0.1 0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01 2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null +1 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +1 false 11 21 33 4444444444444 55555555555 0.1 0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01 2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null +1 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +1 true 11 12 13 1444444444444 1555555555 1.1 1.222 13333.33 d varchar2 string2 2022-12-02 2022-12-02 2022-12-02T22:23:23 2022-12-02T22:23:24.999999 not null +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +2 false 21 22 23 2444444444444 255555555 2.1 2.222 23333.33 f varchar3 string3 2022-12-03 2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +2 true 31 32 33 3444444444444 3555555555 3.1 3.222 33333.33 l varchar3 string3 2022-12-03 2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N not null +2 false 10 20 30 944444444444 9555555555 9.1 9.222 93333.33 p varchar9 string9 2022-12-09 2022-12-09 2022-12-09T22:23:23 2022-12-09T22:23:24.999999 not null + +-- !select -- +[0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23, 33] [2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444] [1555555555, 3555555555, 9555555555, 255555555, 55555555555] [0.1, 1.1, 3.1, 9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222] [23333.33, 3333.33, 33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd', 'f'] ['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9', 'string2', 'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01] [2022-12-01, [...] + +-- !select -- +[0] [10] [20] [30] [4444444444444] [1555555555, 255555555, 55555555555] [0.1] [1.222, 0.222] [3333.33] ['c'] ['varchar1'] ['string1'] [2022-12-01] [2022-12-01, 2022-12-02] [2022-12-01 22:23:23] [2022-12-01 22:23:24.999999] ['not null'] + +-- !select -- +1 [0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23, 33] [2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444] [1555555555, 3555555555, 9555555555, 255555555, 55555555555] [0.1, 1.1, 3.1, 9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222] [23333.33, 3333.33, 33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd', 'f'] ['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9', 'string2', 'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01] [2022-12-01 [...] + +-- !select -- +1 [0] [10] [20] [30] [4444444444444] [1555555555, 255555555, 55555555555] [0.1] [1.222, 0.222] [3333.33] ['c'] ['varchar1'] ['string1'] [2022-12-01] [2022-12-01, 2022-12-02] [2022-12-01 22:23:23] [2022-12-01 22:23:24.999999] ['not null'] + diff --git a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy index cff93375ab..d09b09797b 100644 --- a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy +++ b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy @@ -16,35 +16,598 @@ // under the License. suite("test_aggregate_collect") { + + sql "set enable_vectorized_engine = true" + def tableName = "collect_test" - def tableCTAS = "collect_test_ctas" + def tableCTAS1 = "collect_set_test_ctas1" + def tableCTAS2 = "collect_set_test_ctas2" + def tableCTAS3 = "collect_list_test_ctas3" + def tableCTAS4 = "collect_list_test_ctas4" + + sql "DROP TABLE IF EXISTS ${tableName}" - sql "DROP TABLE IF EXISTS ${tableCTAS}" - sql """ - CREATE TABLE IF NOT EXISTS ${tableName} ( - c_int INT, - c_string VARCHAR(10), - c_date Date, - c_decimal DECIMAL(10, 2), - c_string_not_null VARCHAR(10) NOT NULL + sql "DROP TABLE IF EXISTS ${tableCTAS1}" + sql "DROP TABLE IF EXISTS ${tableCTAS2}" + sql "DROP TABLE IF EXISTS ${tableCTAS3}" + sql "DROP TABLE IF EXISTS ${tableCTAS4}" + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + c_id INT, + c_bool BOOLEAN, + c_tinyint TINYINT, + c_smallint SMALLINT, + c_int INT, + c_bigint BIGINT, + c_largeint LARGEINT, + c_float FLOAT, + c_double DOUBLE, + c_decimal DECIMAL(9, 2), + c_char CHAR, + c_varchar VARCHAR(10), + c_string STRING, + c_date DATE, + c_datev2 DATEV2, + c_date_time DATETIME, + c_date_timev2 DATETIMEV2(6), + c_string_not_null VARCHAR(10) NOT NULL ) DISTRIBUTED BY HASH(c_int) BUCKETS 1 PROPERTIES ( "replication_num" = "1" - ) + ) + """ + + sql """ + INSERT INTO ${tableName} values + (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (1, false, 10, 20, 30, 4444444444444, 55555555555, 0.1, 0.222, 3333.33, 'c', 'varchar1', 'string1', + '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01 22:23:24.999999', 'not null') + """ + + sql """ + INSERT INTO ${tableName} values + (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (1, false, 11, 21, 33, 4444444444444, 55555555555, 0.1, 0.222, 3333.33, 'c', 'varchar1', 'string1', + '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01 22:23:24.999999', 'not null') """ - sql "INSERT INTO ${tableName} values(1,'hello','2022-07-04',1.23,'hello'), (2,NULL,NULL,NULL,'hello')" - sql "INSERT INTO ${tableName} values(1,'hello','2022-07-04',1.23,'hello'), (2,NULL,NULL,NULL,'hello')" - qt_select "select c_int,collect_list(c_string),collect_list(c_date),collect_list(c_decimal) from ${tableName} group by c_int order by c_int" - qt_select "select c_int,collect_set(c_string),collect_set(c_date),collect_set(c_decimal) from ${tableName} group by c_int order by c_int" + sql """ + INSERT INTO ${tableName} values + (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (1, true, 11, 12, 13, 1444444444444, 1555555555, 1.1, 1.222, 13333.33, 'd', 'varchar2', 'string2', + '2022-12-02', '2022-12-02', '2022-12-02 22:23:23', '2022-12-02 22:23:24.999999', 'not null') + """ - // test without GROUP BY - qt_select "select collect_list(c_string),collect_list(c_string_not_null) from ${tableName}" - qt_select "select collect_set(c_string),collect_set(c_string_not_null) from ${tableName}" + sql """ + INSERT INTO ${tableName} values + (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (2, false, 21, 22, 23, 2444444444444, 255555555, 2.1, 2.222, 23333.33, 'f', 'varchar3', 'string3', + '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03 22:23:24.999999', 'not null') + """ + + sql """ + INSERT INTO ${tableName} values + (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (2, true, 31, 32, 33, 3444444444444, 3555555555, 3.1, 3.222, 33333.33, 'l', 'varchar3', 'string3', + '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03 22:23:24.999999', 'not null') + """ - sql """ CREATE TABLE ${tableCTAS} PROPERTIES("replication_num" = "1") AS SELECT 1,collect_list(c_int),collect_set(c_string),collect_list(c_date),collect_set(c_decimal),collect_list(c_string_not_null) FROM ${tableName} """ - qt_select "SELECT * from ${tableCTAS}" + sql """ + INSERT INTO ${tableName} values + (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (2, false, 10, 20, 30, 944444444444, 9555555555, 9.1, 9.222, 93333.33, 'p', 'varchar9', 'string9', + '2022-12-09', '2022-12-09', '2022-12-09 22:23:23', '2022-12-09 22:23:24.999999', 'not null') + """ + + qt_select """ + SELECT * FROM ${tableName} + """ + + qt_select """ + SELECT + collect_set(c_bool), + collect_set(c_tinyint), + collect_set(c_smallint), + collect_set(c_int), + collect_set(c_bigint), + collect_set(c_largeint), + collect_set(c_float), + collect_set(c_double), + collect_set(c_decimal), + collect_set(c_char), + collect_set(c_varchar), + collect_set(c_string), + collect_set(c_date), + collect_set(c_datev2), + collect_set(c_date_time), + collect_set(c_date_timev2), + collect_set(c_string_not_null) + FROM + ${tableName} + """ + + qt_select """ + SELECT + collect_list(c_bool), + collect_list(c_tinyint), + collect_list(c_smallint), + collect_list(c_int), + collect_list(c_bigint), + collect_list(c_largeint), + collect_list(c_float), + collect_list(c_double), + collect_list(c_decimal), + collect_list(c_char), + collect_list(c_varchar), + collect_list(c_string), + collect_list(c_date), + collect_list(c_datev2), + collect_list(c_date_time), + collect_list(c_date_timev2), + collect_list(c_string_not_null) + FROM + ${tableName} + """ + + qt_select """ + SELECT + size(collect_set(c_bool,1)), + size(collect_set(c_tinyint,1)), + size(collect_set(c_smallint,1)), + size(collect_set(c_int,1)), + size(collect_set(c_bigint,1)), + size(collect_set(c_largeint,3)), + size(collect_set(c_float,1)), + size(collect_set(c_double,2)), + size(collect_set(c_decimal,1)), + size(collect_set(c_char,1)), + size(collect_set(c_varchar,1)), + size(collect_set(c_string,1)), + size(collect_set(c_date,1)), + size(collect_set(c_datev2,2)), + size(collect_set(c_date_time,1)), + size(collect_set(c_date_timev2,1)), + size(collect_set(c_string_not_null,1)) + FROM + ${tableName} + """ + + qt_select """ + SELECT + size(collect_list(c_bool,1)), + size(collect_list(c_tinyint,1)), + size(collect_list(c_smallint,1)), + size(collect_list(c_int,1)), + size(collect_list(c_bigint,1)), + size(collect_list(c_largeint,3)), + size(collect_list(c_float,1)), + size(collect_list(c_double,2)), + size(collect_list(c_decimal,1)), + size(collect_list(c_char,1)), + size(collect_list(c_varchar,1)), + size(collect_list(c_string,1)), + size(collect_list(c_date,1)), + size(collect_list(c_datev2,2)), + size(collect_list(c_date_time,1)), + size(collect_list(c_date_timev2,1)), + size(collect_list(c_string_not_null,1)) + FROM + ${tableName} + """ + + sql """ + CREATE TABLE ${tableCTAS1} PROPERTIES("replication_num" = "1") AS + SELECT + 1, + collect_set(c_bool), + collect_set(c_tinyint), + collect_set(c_smallint), + collect_set(c_int), + collect_set(c_bigint), + collect_set(c_largeint), + collect_set(c_float), + collect_set(c_double), + collect_set(c_decimal), + collect_set(c_char), + collect_set(c_varchar), + collect_set(c_string), + collect_set(c_date), + collect_set(c_datev2), + collect_set(c_date_time), + collect_set(c_date_timev2), + collect_set(c_string_not_null) + FROM + ${tableName} + """ + + + sql """ + CREATE TABLE ${tableCTAS2} PROPERTIES("replication_num" = "1") AS + SELECT + 1, + size(collect_set(c_bool,1)), + size(collect_set(c_tinyint,1)), + size(collect_set(c_smallint,1)), + size(collect_set(c_int,1)), + size(collect_set(c_bigint,1)), + size(collect_set(c_largeint,3)), + size(collect_set(c_float,1)), + size(collect_set(c_double,2)), + size(collect_set(c_decimal,1)), + size(collect_set(c_char,1)), + size(collect_set(c_varchar,1)), + size(collect_set(c_string,1)), + size(collect_set(c_date,1)), + size(collect_set(c_datev2,2)), + size(collect_set(c_date_time,1)), + size(collect_set(c_date_timev2,1)), + size(collect_set(c_string_not_null,1)) + FROM + ${tableName} + """ + + sql """ + CREATE TABLE ${tableCTAS3} PROPERTIES("replication_num" = "1") AS + SELECT + 1, + collect_list(c_bool), + collect_list(c_tinyint), + collect_list(c_smallint), + collect_list(c_int), + collect_list(c_bigint), + collect_list(c_largeint), + collect_list(c_float), + collect_list(c_double), + collect_list(c_decimal), + collect_list(c_char), + collect_list(c_varchar), + collect_list(c_string), + collect_list(c_date), + collect_list(c_datev2), + collect_list(c_date_time), + collect_list(c_date_timev2), + collect_list(c_string_not_null) + FROM + ${tableName} + """ + + sql """ + CREATE TABLE ${tableCTAS4} PROPERTIES("replication_num" = "1") AS + SELECT + 1, + size(collect_list(c_bool,1)), + size(collect_list(c_tinyint,1)), + size(collect_list(c_smallint,1)), + size(collect_list(c_int,1)), + size(collect_list(c_bigint,1)), + size(collect_list(c_largeint,3)), + size(collect_list(c_float,1)), + size(collect_list(c_double,2)), + size(collect_list(c_decimal,1)), + size(collect_list(c_char,1)), + size(collect_list(c_varchar,1)), + size(collect_list(c_string,1)), + size(collect_list(c_date,1)), + size(collect_list(c_datev2,2)), + size(collect_list(c_date_time,1)), + size(collect_list(c_date_timev2,1)), + size(collect_list(c_string_not_null,1)) + FROM + ${tableName} + """ + + qt_select "SELECT * FROM ${tableCTAS1}" + qt_select "SELECT * FROM ${tableCTAS2}" + qt_select "SELECT * FROM ${tableCTAS3}" + qt_select "SELECT * FROM ${tableCTAS4}" + + sql "DROP TABLE IF EXISTS ${tableName}" + sql "DROP TABLE IF EXISTS ${tableCTAS1}" + sql "DROP TABLE IF EXISTS ${tableCTAS2}" + sql "DROP TABLE IF EXISTS ${tableCTAS3}" + sql "DROP TABLE IF EXISTS ${tableCTAS4}" + + def tableName_11 = "group_uniq_array_test" + def tableCTAS1_11 = "group_uniq_array_test_ctas1" + def tableCTAS2_11 = "group_uniq_array_test_ctas2" + def tableCTAS3_11 = "group_array_test_ctas3" + def tableCTAS4_11 = "group_array_test_ctas4" + + sql "DROP TABLE IF EXISTS ${tableName_11}" + sql "DROP TABLE IF EXISTS ${tableCTAS1_11}" + sql "DROP TABLE IF EXISTS ${tableCTAS2_11}" + sql "DROP TABLE IF EXISTS ${tableCTAS3_11}" + sql "DROP TABLE IF EXISTS ${tableCTAS4_11}" + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName_11} ( + c_id INT, + c_bool BOOLEAN, + c_tinyint TINYINT, + c_smallint SMALLINT, + c_int INT, + c_bigint BIGINT, + c_largeint LARGEINT, + c_float FLOAT, + c_double DOUBLE, + c_decimal DECIMAL(9, 2), + c_char CHAR, + c_varchar VARCHAR(10), + c_string STRING, + c_date DATE, + c_datev2 DATEV2, + c_date_time DATETIME, + c_date_timev2 DATETIMEV2(6), + c_string_not_null VARCHAR(10) NOT NULL + ) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ + INSERT INTO ${tableName_11} values + (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (1, false, 10, 20, 30, 4444444444444, 55555555555, 0.1, 0.222, 3333.33, 'c', 'varchar1', 'string1', + '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01 22:23:24.999999', 'not null') + """ + + sql """ + INSERT INTO ${tableName_11} values + (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (1, false, 11, 21, 33, 4444444444444, 55555555555, 0.1, 0.222, 3333.33, 'c', 'varchar1', 'string1', + '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01 22:23:24.999999', 'not null') + """ + + sql """ + INSERT INTO ${tableName_11} values + (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (1, true, 11, 12, 13, 1444444444444, 1555555555, 1.1, 1.222, 13333.33, 'd', 'varchar2', 'string2', + '2022-12-02', '2022-12-02', '2022-12-02 22:23:23', '2022-12-02 22:23:24.999999', 'not null') + """ + + sql """ + INSERT INTO ${tableName_11} values + (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (2, false, 21, 22, 23, 2444444444444, 255555555, 2.1, 2.222, 23333.33, 'f', 'varchar3', 'string3', + '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03 22:23:24.999999', 'not null') + """ + + sql """ + INSERT INTO ${tableName_11} values + (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (2, true, 31, 32, 33, 3444444444444, 3555555555, 3.1, 3.222, 33333.33, 'l', 'varchar3', 'string3', + '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03 22:23:24.999999', 'not null') + """ + + sql """ + INSERT INTO ${tableName_11} values + (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 'not null'), + (2, false, 10, 20, 30, 944444444444, 9555555555, 9.1, 9.222, 93333.33, 'p', 'varchar9', 'string9', + '2022-12-09', '2022-12-09', '2022-12-09 22:23:23', '2022-12-09 22:23:24.999999', 'not null') + """ + + qt_select """ + SELECT * FROM ${tableName_11} + """ + + qt_select """ + SELECT + group_uniq_array(c_bool), + group_uniq_array(c_tinyint), + group_uniq_array(c_smallint), + group_uniq_array(c_int), + group_uniq_array(c_bigint), + group_uniq_array(c_largeint), + group_uniq_array(c_float), + group_uniq_array(c_double), + group_uniq_array(c_decimal), + group_uniq_array(c_char), + group_uniq_array(c_varchar), + group_uniq_array(c_string), + group_uniq_array(c_date), + group_uniq_array(c_datev2), + group_uniq_array(c_date_time), + group_uniq_array(c_date_timev2), + group_uniq_array(c_string_not_null) + FROM + ${tableName_11} + """ + + qt_select """ + SELECT + group_array(c_bool), + group_array(c_tinyint), + group_array(c_smallint), + group_array(c_int), + group_array(c_bigint), + group_array(c_largeint), + group_array(c_float), + group_array(c_double), + group_array(c_decimal), + group_array(c_char), + group_array(c_varchar), + group_array(c_string), + group_array(c_date), + group_array(c_datev2), + group_array(c_date_time), + group_array(c_date_timev2), + group_array(c_string_not_null) + FROM + ${tableName_11} + """ + + qt_select """ + SELECT + size(group_uniq_array(c_bool,1)), + size(group_uniq_array(c_tinyint,1)), + size(group_uniq_array(c_smallint,1)), + size(group_uniq_array(c_int,1)), + size(group_uniq_array(c_bigint,1)), + size(group_uniq_array(c_largeint,3)), + size(group_uniq_array(c_float,1)), + size(group_uniq_array(c_double,2)), + size(group_uniq_array(c_decimal,1)), + size(group_uniq_array(c_char,1)), + size(group_uniq_array(c_varchar,1)), + size(group_uniq_array(c_string,1)), + size(group_uniq_array(c_date,1)), + size(group_uniq_array(c_datev2,2)), + size(group_uniq_array(c_date_time,1)), + size(group_uniq_array(c_date_timev2,1)), + size(group_uniq_array(c_string_not_null,1)) + FROM + ${tableName_11} + """ + + qt_select """ + SELECT + size(group_array(c_bool,1)), + size(group_array(c_tinyint,1)), + size(group_array(c_smallint,1)), + size(group_array(c_int,1)), + size(group_array(c_bigint,1)), + size(group_array(c_largeint,3)), + size(group_array(c_float,1)), + size(group_array(c_double,2)), + size(group_array(c_decimal,1)), + size(group_array(c_char,1)), + size(group_array(c_varchar,1)), + size(group_array(c_string,1)), + size(group_array(c_date,1)), + size(group_array(c_datev2,2)), + size(group_array(c_date_time,1)), + size(group_array(c_date_timev2,1)), + size(group_array(c_string_not_null,1)) + FROM + ${tableName_11} + """ + + sql """ + CREATE TABLE ${tableCTAS1_11} PROPERTIES("replication_num" = "1") AS + SELECT + 1, + group_uniq_array(c_bool), + group_uniq_array(c_tinyint), + group_uniq_array(c_smallint), + group_uniq_array(c_int), + group_uniq_array(c_bigint), + group_uniq_array(c_largeint), + group_uniq_array(c_float), + group_uniq_array(c_double), + group_uniq_array(c_decimal), + group_uniq_array(c_char), + group_uniq_array(c_varchar), + group_uniq_array(c_string), + group_uniq_array(c_date), + group_uniq_array(c_datev2), + group_uniq_array(c_date_time), + group_uniq_array(c_date_timev2), + group_uniq_array(c_string_not_null) + FROM + ${tableName_11} + """ + + sql """ + CREATE TABLE ${tableCTAS2_11} PROPERTIES("replication_num" = "1") AS + SELECT + 1, + size(group_uniq_array(c_bool,1)), + size(group_uniq_array(c_tinyint,1)), + size(group_uniq_array(c_smallint,1)), + size(group_uniq_array(c_int,1)), + size(group_uniq_array(c_bigint,1)), + size(group_uniq_array(c_largeint,3)), + size(group_uniq_array(c_float,1)), + size(group_uniq_array(c_double,2)), + size(group_uniq_array(c_decimal,1)), + size(group_uniq_array(c_char,1)), + size(group_uniq_array(c_varchar,1)), + size(group_uniq_array(c_string,1)), + size(group_uniq_array(c_date,1)), + size(group_uniq_array(c_datev2,2)), + size(group_uniq_array(c_date_time,1)), + size(group_uniq_array(c_date_timev2,1)), + size(group_uniq_array(c_string_not_null,1)) + FROM + ${tableName_11} + """ + + sql """ + CREATE TABLE ${tableCTAS3_11} PROPERTIES("replication_num" = "1") AS + SELECT + 1, + group_array(c_bool), + group_array(c_tinyint), + group_array(c_smallint), + group_array(c_int), + group_array(c_bigint), + group_array(c_largeint), + group_array(c_float), + group_array(c_double), + group_array(c_decimal), + group_array(c_char), + group_array(c_varchar), + group_array(c_string), + group_array(c_date), + group_array(c_datev2), + group_array(c_date_time), + group_array(c_date_timev2), + group_array(c_string_not_null) + FROM + ${tableName_11} + """ + + sql """ + CREATE TABLE ${tableCTAS4_11} PROPERTIES("replication_num" = "1") AS + SELECT + 1, + size(group_array(c_bool,1)), + size(group_array(c_tinyint,1)), + size(group_array(c_smallint,1)), + size(group_array(c_int,1)), + size(group_array(c_bigint,1)), + size(group_array(c_largeint,3)), + size(group_array(c_float,1)), + size(group_array(c_double,2)), + size(group_array(c_decimal,1)), + size(group_array(c_char,1)), + size(group_array(c_varchar,1)), + size(group_array(c_string,1)), + size(group_array(c_date,1)), + size(group_array(c_datev2,2)), + size(group_array(c_date_time,1)), + size(group_array(c_date_timev2,1)), + size(group_array(c_string_not_null,1)) + FROM + ${tableName_11} + """ + + qt_select "SELECT * FROM ${tableCTAS1_11}" + qt_select "SELECT * FROM ${tableCTAS2_11}" + qt_select "SELECT * FROM ${tableCTAS3_11}" + qt_select "SELECT * FROM ${tableCTAS4_11}" + + sql "DROP TABLE IF EXISTS ${tableName_11}" + sql "DROP TABLE IF EXISTS ${tableCTAS1_11}" + sql "DROP TABLE IF EXISTS ${tableCTAS2_11}" + sql "DROP TABLE IF EXISTS ${tableCTAS3_11}" + sql "DROP TABLE IF EXISTS ${tableCTAS4_11}" // topn_array def tableName_12 = "topn_array" @@ -65,8 +628,8 @@ suite("test_aggregate_collect") { sql "INSERT INTO ${tableName_12} values(1,10,'2022-11-1',6.8754576), (2,8,'2022-11-3',0.576), (2,10,'2022-11-2',1.234) ,(3,10,'2022-11-2',0.576) ,(5,29,'2022-11-2',6.8754576) ,(6,8,'2022-11-1',6.8754576)" qt_select43 "select topn_array(level,2) from ${tableName_12}" - qt_select44 "select topn_array(level,2,100) from ${tableName_12}" - qt_select45 "select topn_array(dt,2,100) from ${tableName_12}" - qt_select46 "select topn_array(num,2,100) from ${tableName_12}" - sql "DROP TABLE IF EXISTS ${tableName_12}" + qt_select44 "select topn_array(level,2,100) from ${tableName_12}" + qt_select45 "select topn_array(dt,2,100) from ${tableName_12}" + qt_select46 "select topn_array(num,2,100) from ${tableName_12}" + sql "DROP TABLE IF EXISTS ${tableName_12}" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org