This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c0360f80bb [enhancement](aggregate-function) enhance aggregate funtion 
collect and add group_array aliases (#15339)
c0360f80bb is described below

commit c0360f80bb721e04ff7e3f687014cdf9ea33e6ae
Author: 奕冷 <82279870+tangsiyang2...@users.noreply.github.com>
AuthorDate: Mon Feb 27 14:22:30 2023 +0800

    [enhancement](aggregate-function) enhance aggregate funtion collect and add 
group_array aliases (#15339)
    
    Enhance aggregate function `collect_set` and `collect_list` to support 
optional `max_size` param,
    which enables to limit the number of elements in result array.
---
 .../aggregate_function_collect.cpp                 | 100 ++--
 .../aggregate_function_collect.h                   | 199 ++++---
 .../aggregate-functions/collect_list.md            |  44 +-
 .../aggregate-functions/collect_set.md             |  44 +-
 .../aggregate-functions/collect_list.md            |  43 +-
 .../aggregate-functions/collect_set.md             |  43 +-
 .../apache/doris/analysis/FunctionCallExpr.java    |   5 +
 .../java/org/apache/doris/catalog/FunctionSet.java |  26 +-
 .../aggregate_functions/test_aggregate_collect.out |  73 ++-
 .../test_aggregate_group_array.out                 |  27 +
 .../test_aggregate_collect.groovy                  | 609 ++++++++++++++++++++-
 11 files changed, 1001 insertions(+), 212 deletions(-)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp 
b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
index 577b8e1c23..d18ae860e5 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
@@ -18,78 +18,78 @@
 #include "vec/aggregate_functions/aggregate_function_collect.h"
 
 #include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/helpers.h"
 
 namespace doris::vectorized {
 
-template <typename T>
-AggregateFunctionPtr create_agg_function_collect(bool distinct, const 
DataTypes& argument_types) {
+#define FOR_DECIMAL_TYPES(M) \
+    M(Decimal32)             \
+    M(Decimal64)             \
+    M(Decimal128)            \
+    M(Decimal128I)
+
+template <typename T, typename HasLimit>
+AggregateFunctionPtr do_create_agg_function_collect(bool distinct,
+                                                    const DataTypePtr& 
argument_type) {
     if (distinct) {
         return AggregateFunctionPtr(
-                new 
AggregateFunctionCollect<AggregateFunctionCollectSetData<T>>(argument_types));
+                new 
AggregateFunctionCollect<AggregateFunctionCollectSetData<T, HasLimit>,
+                                             HasLimit>(argument_type));
     } else {
         return AggregateFunctionPtr(
-                new 
AggregateFunctionCollect<AggregateFunctionCollectListData<T>>(argument_types));
+                new 
AggregateFunctionCollect<AggregateFunctionCollectListData<T, HasLimit>,
+                                             HasLimit>(argument_type));
     }
 }
 
-AggregateFunctionPtr create_aggregate_function_collect(const std::string& name,
-                                                       const DataTypes& 
argument_types,
-                                                       const bool 
result_is_nullable) {
-    if (argument_types.size() != 1) {
-        LOG(WARNING) << fmt::format("Illegal number {} of argument for 
aggregate function {}",
-                                    argument_types.size(), name);
-        return nullptr;
-    }
-
+template <typename HasLimit>
+AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string& 
name,
+                                                            const DataTypePtr& 
argument_type) {
     bool distinct = false;
     if (name == "collect_set") {
         distinct = true;
     }
 
-    WhichDataType type(argument_types[0]);
-    if (type.is_uint8()) {
-        return create_agg_function_collect<UInt8>(distinct, argument_types);
-    } else if (type.is_int8()) {
-        return create_agg_function_collect<Int8>(distinct, argument_types);
-    } else if (type.is_int16()) {
-        return create_agg_function_collect<Int16>(distinct, argument_types);
-    } else if (type.is_int32()) {
-        return create_agg_function_collect<Int32>(distinct, argument_types);
-    } else if (type.is_int64()) {
-        return create_agg_function_collect<Int64>(distinct, argument_types);
-    } else if (type.is_int128()) {
-        return create_agg_function_collect<Int128>(distinct, argument_types);
-    } else if (type.is_float32()) {
-        return create_agg_function_collect<Float32>(distinct, argument_types);
-    } else if (type.is_float64()) {
-        return create_agg_function_collect<Float64>(distinct, argument_types);
-    } else if (type.is_decimal32()) {
-        return create_agg_function_collect<Decimal32>(distinct, 
argument_types);
-    } else if (type.is_decimal64()) {
-        return create_agg_function_collect<Decimal64>(distinct, 
argument_types);
-    } else if (type.is_decimal128()) {
-        return create_agg_function_collect<Decimal128>(distinct, 
argument_types);
-    } else if (type.is_decimal128i()) {
-        return create_agg_function_collect<Decimal128I>(distinct, 
argument_types);
-    } else if (type.is_date()) {
-        return create_agg_function_collect<Int64>(distinct, argument_types);
-    } else if (type.is_date_time()) {
-        return create_agg_function_collect<Int64>(distinct, argument_types);
-    } else if (type.is_date_v2()) {
-        return create_agg_function_collect<UInt32>(distinct, argument_types);
-    } else if (type.is_date_time_v2()) {
-        return create_agg_function_collect<UInt64>(distinct, argument_types);
-    } else if (type.is_string()) {
-        return create_agg_function_collect<StringRef>(distinct, 
argument_types);
+    WhichDataType which(argument_type);
+#define DISPATCH(TYPE)                \
+    if (which.idx == TypeIndex::TYPE) \
+        return do_create_agg_function_collect<TYPE, HasLimit>(distinct, 
argument_type);
+    FOR_NUMERIC_TYPES(DISPATCH)
+    FOR_DECIMAL_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.is_date_or_datetime()) {
+        return do_create_agg_function_collect<Int64, HasLimit>(distinct, 
argument_type);
+    } else if (which.is_date_v2()) {
+        return do_create_agg_function_collect<UInt32, HasLimit>(distinct, 
argument_type);
+    } else if (which.is_date_time_v2()) {
+        return do_create_agg_function_collect<UInt64, HasLimit>(distinct, 
argument_type);
+    } else if (which.is_string()) {
+        return do_create_agg_function_collect<StringRef, HasLimit>(distinct, 
argument_type);
     }
 
     LOG(WARNING) << fmt::format("unsupported input type {} for aggregate 
function {}",
-                                argument_types[0]->get_name(), name);
+                                argument_type->get_name(), name);
+    return nullptr;
+}
+
+AggregateFunctionPtr create_aggregate_function_collect(const std::string& name,
+                                                       const DataTypes& 
argument_types,
+                                                       const bool 
result_is_nullable) {
+    if (argument_types.size() == 1) {
+        return create_aggregate_function_collect_impl<std::false_type>(name, 
argument_types[0]);
+    }
+    if (argument_types.size() == 2) {
+        return create_aggregate_function_collect_impl<std::true_type>(name, 
argument_types[0]);
+    }
+    LOG(WARNING) << fmt::format("number of parameters for aggregate function 
{}, should be 1 or 2",
+                                name);
     return nullptr;
 }
 
 void register_aggregate_function_collect_list(AggregateFunctionSimpleFactory& 
factory) {
     factory.register_function("collect_list", 
create_aggregate_function_collect);
     factory.register_function("collect_set", 
create_aggregate_function_collect);
+    factory.register_alias("collect_list", "group_array");
+    factory.register_alias("collect_set", "group_uniq_array");
 }
-} // namespace doris::vectorized
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h 
b/be/src/vec/aggregate_functions/aggregate_function_collect.h
index f194fc2639..27d9733bd3 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include <algorithm>
+
 #include "common/status.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/aggregate_functions/key_holder_helpers.h"
@@ -31,122 +33,180 @@
 
 namespace doris::vectorized {
 
-template <typename T>
+template <typename T, typename HasLimit>
 struct AggregateFunctionCollectSetData {
     using ElementType = T;
     using ColVecType = ColumnVectorOrDecimal<ElementType>;
     using ElementNativeType = typename NativeType<T>::Type;
+    using SelfType = AggregateFunctionCollectSetData;
     using Set = HashSetWithStackMemory<ElementNativeType, 
DefaultHash<ElementNativeType>, 4>;
-    Set set;
+    Set data_set;
+    Int64 max_size = -1;
+
+    size_t size() const { return data_set.size(); }
 
     void add(const IColumn& column, size_t row_num) {
-        const auto& vec = assert_cast<const ColVecType&>(column).get_data();
-        set.insert(vec[row_num]);
+        data_set.insert(assert_cast<const 
ColVecType&>(column).get_data()[row_num]);
+    }
+
+    void merge(const SelfType& rhs) {
+        if constexpr (HasLimit::value) {
+            for (auto& rhs_elem : rhs.data_set) {
+                if (size() >= max_size) {
+                    return;
+                }
+                data_set.insert(rhs_elem.get_value());
+            }
+        } else {
+            data_set.merge(rhs.data_set);
+        }
     }
-    void merge(const AggregateFunctionCollectSetData& rhs) { 
set.merge(rhs.set); }
-    void write(BufferWritable& buf) const { set.write(buf); }
-    void read(BufferReadable& buf) { set.read(buf); }
-    void reset() { set.clear(); }
+
+    void write(BufferWritable& buf) const { data_set.write(buf); }
+
+    void read(BufferReadable& buf) { data_set.read(buf); }
+
     void insert_result_into(IColumn& to) const {
         auto& vec = assert_cast<ColVecType&>(to).get_data();
-        vec.reserve(set.size());
-        for (auto item : set) {
+        vec.reserve(size());
+        for (const auto& item : data_set) {
             vec.push_back(item.key);
         }
     }
+
+    void reset() { data_set.clear(); }
 };
 
-template <>
-struct AggregateFunctionCollectSetData<StringRef> {
+template <typename HasLimit>
+struct AggregateFunctionCollectSetData<StringRef, HasLimit> {
     using ElementType = StringRef;
     using ColVecType = ColumnString;
+    using SelfType = AggregateFunctionCollectSetData<ElementType, HasLimit>;
     using Set = HashSetWithSavedHashWithStackMemory<ElementType, 
DefaultHash<ElementType>, 4>;
-    Set set;
+    Set data_set;
+    Int64 max_size = -1;
+
+    size_t size() const { return data_set.size(); }
 
     void add(const IColumn& column, size_t row_num, Arena* arena) {
         Set::LookupResult it;
         bool inserted;
         auto key_holder = get_key_holder<true>(column, row_num, *arena);
-        set.emplace(key_holder, it, inserted);
+        data_set.emplace(key_holder, it, inserted);
     }
 
-    void merge(const AggregateFunctionCollectSetData& rhs, Arena* arena) {
-        Set::LookupResult it;
+    void merge(const SelfType& rhs, Arena* arena) {
         bool inserted;
-        for (const auto& elem : rhs.set) {
-            set.emplace(ArenaKeyHolder {elem.get_value(), *arena}, it, 
inserted);
+        Set::LookupResult it;
+        for (auto& rhs_elem : rhs.data_set) {
+            if constexpr (HasLimit::value) {
+                if (size() >= max_size) {
+                    return;
+                }
+            }
+            assert(arena != nullptr);
+            data_set.emplace(ArenaKeyHolder {rhs_elem.get_value(), *arena}, 
it, inserted);
         }
     }
+
     void write(BufferWritable& buf) const {
-        write_var_uint(set.size(), buf);
-        for (const auto& elem : set) {
+        write_var_uint(size(), buf);
+        for (const auto& elem : data_set) {
             write_string_binary(elem.get_value(), buf);
         }
     }
-    void read(BufferReadable& buf) {
-        UInt64 rows;
-        read_var_uint(rows, buf);
 
+    void read(BufferReadable& buf) {
+        UInt64 size;
+        read_var_uint(size, buf);
         StringRef ref;
-        for (size_t i = 0; i < rows; ++i) {
+        for (size_t i = 0; i < size; ++i) {
             read_string_binary(ref, buf);
-            set.insert(ref);
+            data_set.insert(ref);
         }
     }
-    void reset() { set.clear(); }
+
     void insert_result_into(IColumn& to) const {
         auto& vec = assert_cast<ColVecType&>(to);
-        vec.reserve(set.size());
-        for (const auto& item : set) {
+        vec.reserve(size());
+        for (const auto& item : data_set) {
             vec.insert_data(item.key.data, item.key.size);
         }
     }
+
+    void reset() { data_set.clear(); }
 };
 
-template <typename T>
+template <typename T, typename HasLimit>
 struct AggregateFunctionCollectListData {
     using ElementType = T;
     using ColVecType = ColumnVectorOrDecimal<ElementType>;
+    using SelfType = AggregateFunctionCollectListData<ElementType, HasLimit>;
     PaddedPODArray<ElementType> data;
+    Int64 max_size = -1;
+
+    size_t size() const { return data.size(); }
 
     void add(const IColumn& column, size_t row_num) {
         const auto& vec = assert_cast<const ColVecType&>(column).get_data();
         data.push_back(vec[row_num]);
     }
-    void merge(const AggregateFunctionCollectListData& rhs) {
-        data.insert(rhs.data.begin(), rhs.data.end());
+
+    void merge(const SelfType& rhs) {
+        if constexpr (HasLimit::value) {
+            for (auto& rhs_elem : rhs.data) {
+                if (size() >= max_size) {
+                    return;
+                }
+                data.push_back(rhs_elem);
+            }
+        } else {
+            data.insert(rhs.data.begin(), rhs.data.end());
+        }
     }
+
     void write(BufferWritable& buf) const {
-        write_var_uint(data.size(), buf);
-        buf.write(data.raw_data(), data.size() * sizeof(ElementType));
+        write_var_uint(size(), buf);
+        buf.write(data.raw_data(), size() * sizeof(ElementType));
     }
+
     void read(BufferReadable& buf) {
         UInt64 rows = 0;
         read_var_uint(rows, buf);
         data.resize(rows);
         buf.read(reinterpret_cast<char*>(data.data()), rows * 
sizeof(ElementType));
     }
+
     void reset() { data.clear(); }
+
     void insert_result_into(IColumn& to) const {
         auto& vec = assert_cast<ColVecType&>(to).get_data();
         size_t old_size = vec.size();
-        vec.resize(old_size + data.size());
-        memcpy(vec.data() + old_size, data.data(), data.size() * 
sizeof(ElementType));
+        vec.resize(old_size + size());
+        memcpy(vec.data() + old_size, data.data(), size() * 
sizeof(ElementType));
     }
 };
 
-template <>
-struct AggregateFunctionCollectListData<StringRef> {
+template <typename HasLimit>
+struct AggregateFunctionCollectListData<StringRef, HasLimit> {
     using ElementType = StringRef;
     using ColVecType = ColumnString;
     MutableColumnPtr data;
+    Int64 max_size = -1;
 
     AggregateFunctionCollectListData() { data = ColVecType::create(); }
 
+    size_t size() const { return data->size(); }
+
     void add(const IColumn& column, size_t row_num) { 
data->insert_from(column, row_num); }
 
     void merge(const AggregateFunctionCollectListData& rhs) {
-        data->insert_range_from(*rhs.data, 0, rhs.data->size());
+        if constexpr (HasLimit::value) {
+            data->insert_range_from(*rhs.data, 0,
+                                    std::min(static_cast<size_t>(max_size - 
size()), rhs.size()));
+        } else {
+            data->insert_range_from(*rhs.data, 0, rhs.size());
+        }
     }
 
     void write(BufferWritable& buf) const {
@@ -177,23 +237,27 @@ struct AggregateFunctionCollectListData<StringRef> {
 
     void insert_result_into(IColumn& to) const {
         auto& to_str = assert_cast<ColVecType&>(to);
-        to_str.insert_range_from(*data, 0, data->size());
+        to_str.insert_range_from(*data, 0, size());
     }
 };
 
-template <typename Data>
-class AggregateFunctionCollect final
-        : public IAggregateFunctionDataHelper<Data, 
AggregateFunctionCollect<Data>> {
-public:
-    static constexpr bool alloc_memory_in_arena =
-            std::is_same_v<Data, AggregateFunctionCollectSetData<StringRef>>;
+template <typename Data, typename HasLimit>
+class AggregateFunctionCollect
+        : public IAggregateFunctionDataHelper<Data, 
AggregateFunctionCollect<Data, HasLimit>> {
+    using GenericType = AggregateFunctionCollectSetData<StringRef, HasLimit>;
 
-    AggregateFunctionCollect(const DataTypes& argument_types_)
-            : IAggregateFunctionDataHelper<Data, 
AggregateFunctionCollect<Data>>(argument_types_),
-              _argument_type(argument_types_[0]) {}
+    static constexpr bool ENABLE_ARENA = std::is_same_v<Data, GenericType>;
+
+public:
+    AggregateFunctionCollect(const DataTypePtr& argument_type,
+                             UInt64 max_size_ = 
std::numeric_limits<UInt64>::max())
+            : IAggregateFunctionDataHelper<Data, 
AggregateFunctionCollect<Data, HasLimit>>(
+                      {argument_type}),
+              return_type(argument_type) {}
 
     std::string get_name() const override {
-        if constexpr (std::is_same_v<AggregateFunctionCollectListData<typename 
Data::ElementType>,
+        if constexpr (std::is_same_v<AggregateFunctionCollectListData<typename 
Data::ElementType,
+                                                                      
HasLimit>,
                                      Data>) {
             return "collect_list";
         } else {
@@ -202,27 +266,38 @@ public:
     }
 
     DataTypePtr get_return_type() const override {
-        return std::make_shared<DataTypeArray>(make_nullable(_argument_type));
+        return std::make_shared<DataTypeArray>(make_nullable(return_type));
     }
 
+    bool allocates_memory_in_arena() const override { return ENABLE_ARENA; }
+
     void add(AggregateDataPtr __restrict place, const IColumn** columns, 
size_t row_num,
              Arena* arena) const override {
-        assert(!columns[0]->is_null_at(row_num));
-        if constexpr (alloc_memory_in_arena) {
-            this->data(place).add(*columns[0], row_num, arena);
+        auto& data = this->data(place);
+        if constexpr (HasLimit::value) {
+            if (data.max_size == -1) {
+                data.max_size =
+                        (UInt64) static_cast<const 
ColumnInt32*>(columns[1])->get_element(row_num);
+            }
+            if (data.size() >= data.max_size) {
+                return;
+            }
+        }
+        if constexpr (ENABLE_ARENA) {
+            data.add(*columns[0], row_num, arena);
         } else {
-            this->data(place).add(*columns[0], row_num);
+            data.add(*columns[0], row_num);
         }
     }
 
-    void reset(AggregateDataPtr place) const override { 
this->data(place).reset(); }
-
     void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
                Arena* arena) const override {
-        if constexpr (alloc_memory_in_arena) {
-            this->data(place).merge(this->data(rhs), arena);
+        auto& data = this->data(place);
+        auto& rhs_data = this->data(rhs);
+        if constexpr (ENABLE_ARENA) {
+            data.merge(rhs_data, arena);
         } else {
-            this->data(place).merge(this->data(rhs));
+            data.merge(rhs_data);
         }
     }
 
@@ -248,10 +323,8 @@ public:
         to_arr.get_offsets().push_back(to_nested_col.size());
     }
 
-    bool allocates_memory_in_arena() const override { return 
alloc_memory_in_arena; }
-
 private:
-    DataTypePtr _argument_type;
+    DataTypePtr return_type;
 };
 
-} // namespace doris::vectorized
+} // namespace doris::vectorized
\ No newline at end of file
diff --git 
a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md 
b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md
index eb2a732387..a0ebc957ff 100644
--- a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md
@@ -30,9 +30,8 @@ under the License.
 
 `ARRAY<T> collect_list(expr)`
 
-Returns an array consisting of all values in expr within the group.
-The order of elements in the array is non-deterministic. NULL values are 
excluded.
-
+Returns an array consisting of all values in expr within the group, and ,with 
the optional `max_size` parameter limits the size of the resulting array to 
`max_size` elements.The order of elements in the array is non-deterministic. 
NULL values are excluded.
+It has an alias `group_array`.
 ### notice
 
 ```
@@ -44,26 +43,37 @@ Only supported in vectorized engine
 ```
 mysql> set enable_vectorized_engine=true;
 
-mysql> select k1,k2,k3 from collect_test order by k1;
+mysql> select k1,k2,k3 from collect_list_test order by k1;
 +------+------------+-------+
 | k1   | k2         | k3    |
 +------+------------+-------+
-|    1 | 2022-07-05 | hello |
-|    2 | 2022-07-04 | NULL  |
-|    2 | 2022-07-04 | hello |
-|    3 | NULL       | world |
+|    1 | 2023-01-01 | hello |
+|    2 | 2023-01-02 | NULL  |
+|    2 | 2023-01-02 | hello |
 |    3 | NULL       | world |
+|    3 | 2023-01-02 | hello |
+|    4 | 2023-01-02 | sql   |
+|    4 | 2023-01-03 | sql   |
 +------+------------+-------+
 
-mysql> select k1,collect_list(k2),collect_list(k3) from collect_test group by 
k1 order by k1;
-+------+--------------------------+--------------------+
-| k1   | collect_list(`k2`)       | collect_list(`k3`) |
-+------+--------------------------+--------------------+
-|    1 | [2022-07-05]             | [hello]            |
-|    2 | [2022-07-04, 2022-07-04] | [hello]            |
-|    3 | NULL                     | [world, world]     |
-+------+--------------------------+--------------------+
+mysql> select collect_list(k1),collect_list(k1,2) from collect_list_test;
++-------------------------+--------------------------+
+| collect_list(`k1`)      | collect_list(`k1`,3)     |
++-------------------------+--------------------------+
+| [1,2,2,3,3,4,4]         | [1,2,2]                  |
++-------------------------+--------------------------+
+
+mysql> select k1,collect_list(k2),collect_list(k3,1) from collect_list_test 
group by k1 order by k1;
++------+-------------------------+--------------------------+
+| k1   | collect_list(`k2`)      | collect_list(`k3`,1)     |
++------+-------------------------+--------------------------+
+|    1 | [2023-01-01]            | [hello]                  |
+|    2 | [2023-01-02,2023-01-02] | [hello]                  |
+|    3 | [2023-01-02]            | [world]                  |
+|    4 | [2023-01-02,2023-01-03] | [sql]                    |
++------+-------------------------+--------------------------+
+
 ```
 
 ### keywords
-COLLECT_LIST,COLLECT_SET,ARRAY
+COLLECT_LIST,GROUP_ARRAY,COLLECT_SET,ARRAY
diff --git 
a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md 
b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md
index 06257357c7..42987df4e3 100644
--- a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md
@@ -35,10 +35,9 @@ COLLECT_SET
 ### description
 #### Syntax
 
-`ARRAY<T> collect_set(expr)`
+`ARRAY<T> collect_set(expr[,max_size])`
 
-Returns an array consisting of all unique values in expr within the group.
-The order of elements in the array is non-deterministic. NULL values are 
excluded.
+Creates an array containing distinct elements from `expr`,with the optional 
`max_size` parameter limits the size of the resulting array to `max_size` 
elements. It has an alias `group_uniq_array`.
 
 ### notice
 
@@ -51,26 +50,37 @@ Only supported in vectorized engine
 ```
 mysql> set enable_vectorized_engine=true;
 
-mysql> select k1,k2,k3 from collect_test order by k1;
+mysql> select k1,k2,k3 from collect_set_test order by k1;
 +------+------------+-------+
 | k1   | k2         | k3    |
 +------+------------+-------+
-|    1 | 2022-07-05 | hello |
-|    2 | 2022-07-04 | NULL  |
-|    2 | 2022-07-04 | hello |
-|    3 | NULL       | world |
+|    1 | 2023-01-01 | hello |
+|    2 | 2023-01-01 | NULL  |
+|    2 | 2023-01-02 | hello |
 |    3 | NULL       | world |
+|    3 | 2023-01-02 | hello |
+|    4 | 2023-01-02 | doris |
+|    4 | 2023-01-03 | sql   |
 +------+------------+-------+
 
-mysql> select k1,collect_set(k2),collect_set(k3) from collect_test group by k1 
order by k1;
-+------+-------------------+-------------------+
-| k1   | collect_set(`k2`) | collect_set(`k3`) |
-+------+-------------------+-------------------+
-|    1 | [2022-07-05]      | [hello]           |
-|    2 | [2022-07-04]      | [hello]           |
-|    3 | NULL              | [world]           |
-+------+-------------------+-------------------+
+mysql> select collect_set(k1),collect_set(k1,2) from collect_set_test;
++-------------------------+--------------------------+
+| collect_set(`k1`)       | collect_set(`k1`,2)      |
++-------------------------+--------------------------+
+| [4,3,2,1]               | [1,2]                    |
++----------------------------------------------------+
+
+mysql> select k1,collect_set(k2),collect_set(k3,1) from collect_set_test group 
by k1 order by k1;
++------+-------------------------+--------------------------+
+| k1   | collect_set(`k2`)       | collect_set(`k3`,1)      |
++------+-------------------------+--------------------------+
+|    1 | [2023-01-01]            | [hello]                  |
+|    2 | [2023-01-01,2023-01-02] | [hello]                  |
+|    3 | [2023-01-02]            | [world]                  |
+|    4 | [2023-01-02,2023-01-03] | [sql]                    |
++------+-------------------------+--------------------------+
+
 ```
 
 ### keywords
-COLLECT_SET,COLLECT_LIST,ARRAY
+COLLECT_SET,GROUP_UNIQ_ARRAY,COLLECT_LIST,ARRAY
\ No newline at end of file
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md
index 37ee1010d4..8ef3c01374 100644
--- 
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md
+++ 
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_list.md
@@ -28,9 +28,10 @@ under the License.
 ### description
 #### Syntax
 
-`ARRAY<T> collect_list(expr)`
+`ARRAY<T> collect_list(expr[,max_size])`
 
-返回一个包含 expr 中所有元素(不包括NULL)的数组,数组中元素顺序是不确定的。
+返回一个包含 expr 中所有元素(不包括NULL)的数组,可选参数`max_size`,通过设置该参数能够将结果数组的大小限制为 `max_size` 
个元素。
+得到的结果数组中不包含NULL元素,数组中的元素顺序不固定。该函数具有别名`group_array`。
 
 
 ### notice
@@ -44,27 +45,37 @@ under the License.
 ```
 mysql> set enable_vectorized_engine=true;
 
-mysql> select k1,k2,k3 from collect_test order by k1;
+mysql> select k1,k2,k3 from collect_list_test order by k1;
 +------+------------+-------+
 | k1   | k2         | k3    |
 +------+------------+-------+
-|    1 | 2022-07-05 | hello |
-|    2 | 2022-07-04 | NULL  |
-|    2 | 2022-07-04 | hello |
-|    3 | NULL       | world |
+|    1 | 2023-01-01 | hello |
+|    2 | 2023-01-02 | NULL  |
+|    2 | 2023-01-02 | hello |
 |    3 | NULL       | world |
+|    3 | 2023-01-02 | hello |
+|    4 | 2023-01-02 | sql   |
+|    4 | 2023-01-03 | sql   |
 +------+------------+-------+
 
-mysql> select k1,collect_list(k2),collect_list(k3) from collect_test group by 
k1 order by k1;
-+------+--------------------------+--------------------+
-| k1   | collect_list(`k2`)       | collect_list(`k3`) |
-+------+--------------------------+--------------------+
-|    1 | [2022-07-05]             | [hello]            |
-|    2 | [2022-07-04, 2022-07-04] | [hello]            |
-|    3 | NULL                     | [world, world]     |
-+------+--------------------------+--------------------+
+mysql> select collect_list(k1),collect_list(k1,2) from collect_list_test;
++-------------------------+--------------------------+
+| collect_list(`k1`)      | collect_list(`k1`,3)     |
++-------------------------+--------------------------+
+| [1,2,2,3,3,4,4]         | [1,2,2]                  |
++-------------------------+--------------------------+
+
+mysql> select k1,collect_list(k2),collect_list(k3,1) from collect_list_test 
group by k1 order by k1;
++------+-------------------------+--------------------------+
+| k1   | collect_list(`k2`)      | collect_list(`k3`,1)     |
++------+-------------------------+--------------------------+
+|    1 | [2023-01-01]            | [hello]                  |
+|    2 | [2023-01-02,2023-01-02] | [hello]                  |
+|    3 | [2023-01-02]            | [world]                  |
+|    4 | [2023-01-02,2023-01-03] | [sql]                    |
++------+-------------------------+--------------------------+
 
 ```
 
 ### keywords
-COLLECT_LIST,COLLECT_SET,ARRAY
+COLLECT_LIST,GROUP_ARRAY,COLLECT_SET,ARRAY
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md
index be9754d356..70aaac611f 100644
--- 
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md
+++ 
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/collect_set.md
@@ -35,9 +35,10 @@ COLLECT_SET
 ### description
 #### Syntax
 
-`ARRAY<T> collect_set(expr)`
+`ARRAY<T> collect_set(expr[,max_size])`
 
-返回一个包含 expr 中所有去重后元素(不包括NULL)的数组,数组中元素顺序是不确定的。
+返回一个对`expr`去重后的数组。可选参数`max_size`,通过设置该参数能够将结果数组的大小限制为 `max_size` 个元素。
+得到的结果数组中不包含NULL元素,数组中的元素顺序不固定。该函数具有别名`group_uniq_array`。
 
 ### notice
 
@@ -50,27 +51,37 @@ COLLECT_SET
 ```
 mysql> set enable_vectorized_engine=true;
 
-mysql> select k1,k2,k3 from collect_test order by k1;
+mysql> select k1,k2,k3 from collect_set_test order by k1;
 +------+------------+-------+
 | k1   | k2         | k3    |
 +------+------------+-------+
-|    1 | 2022-07-05 | hello |
-|    2 | 2022-07-04 | NULL  |
-|    2 | 2022-07-04 | hello |
-|    3 | NULL       | world |
+|    1 | 2023-01-01 | hello |
+|    2 | 2023-01-01 | NULL  |
+|    2 | 2023-01-02 | hello |
 |    3 | NULL       | world |
+|    3 | 2023-01-02 | hello |
+|    4 | 2023-01-02 | doris |
+|    4 | 2023-01-03 | sql   |
 +------+------------+-------+
 
-mysql> select k1,collect_set(k2),collect_set(k3) from collect_test group by k1 
order by k1;
-+------+-------------------+-------------------+
-| k1   | collect_set(`k2`) | collect_set(`k3`) |
-+------+-------------------+-------------------+
-|    1 | [2022-07-05]      | [hello]           |
-|    2 | [2022-07-04]      | [hello]           |
-|    3 | NULL              | [world]           |
-+------+-------------------+-------------------+
+mysql> select collect_set(k1),collect_set(k1,2) from collect_set_test;
++-------------------------+--------------------------+
+| collect_set(`k1`)       | collect_set(`k1`,2)      |
++-------------------------+--------------------------+
+| [4,3,2,1]               | [1,2]                    |
++----------------------------------------------------+
+
+mysql> select k1,collect_set(k2),collect_set(k3,1) from collect_set_test group 
by k1 order by k1;
++------+-------------------------+--------------------------+
+| k1   | collect_set(`k2`)       | collect_set(`k3`,1)      |
++------+-------------------------+--------------------------+
+|    1 | [2023-01-01]            | [hello]                  |
+|    2 | [2023-01-01,2023-01-02] | [hello]                  |
+|    3 | [2023-01-02]            | [world]                  |
+|    4 | [2023-01-02,2023-01-03] | [sql]                    |
++------+-------------------------+--------------------------+
 
 ```
 
 ### keywords
-COLLECT_SET,COLLECT_LIST,ARRAY
+COLLECT_SET,GROUP_UNIQ_ARRAY,COLLECT_LIST,ARRAY
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 75dc5579d4..1459cb9ddc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -1299,6 +1299,11 @@ public class FunctionCallExpr extends Expr {
             fn.setReturnType(new ArrayType(getChild(0).type));
         }
 
+        if (fnName.getFunction().equalsIgnoreCase("group_uniq_array")
+                || fnName.getFunction().equalsIgnoreCase("group_array")) {
+            fn.setReturnType(new ArrayType(getChild(0).type));
+        }
+
         if (fnName.getFunction().equalsIgnoreCase("from_unixtime")
                 || fnName.getFunction().equalsIgnoreCase("date_format")) {
             // if has only one child, it has default time format: yyyy-MM-dd 
HH:mm:ss.SSSSSS
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 35c0d40b2a..f344a3c721 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -1345,8 +1345,8 @@ public class FunctionSet<T> {
     }
 
     public void addScalarAndVectorizedBuiltin(String fnName, boolean 
userVisible,
-                                              Function.NullableMode 
nullableMode, Type retType,
-                                              boolean varArgs, Type ... args) {
+            Function.NullableMode nullableMode, Type retType,
+            boolean varArgs, Type ... args) {
         ArrayList<Type> argsType = new ArrayList<Type>();
         for (Type type : args) {
             argsType.add(type);
@@ -1405,6 +1405,10 @@ public class FunctionSet<T> {
 
     public static final String SEQUENCE_COUNT = "sequence_count";
 
+    public static final String GROUP_UNIQ_ARRAY = "group_uniq_array";
+
+    public static final String GROUP_ARRAY = "group_array";
+
     // Populate all the aggregate builtins in the catalog.
     // null symbols indicate the function does not need that step of the 
evaluation.
     // An empty symbol indicates a TODO for the BE to implement the function.
@@ -2587,6 +2591,10 @@ public class FunctionSet<T> {
                     "", "", "", "", "", true, false, true, true));
             addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET, 
Lists.newArrayList(t), new ArrayType(t), t,
                     "", "", "", "", "", true, false, true, true));
+            addBuiltin(AggregateFunction.createBuiltin(COLLECT_LIST, 
Lists.newArrayList(t, Type.INT), new ArrayType(t), t,
+                    "", "", "", "", "", true, false, true, true));
+            addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET, 
Lists.newArrayList(t, Type.INT), new ArrayType(t), t,
+                    "", "", "", "", "", true, false, true, true));
             addBuiltin(
                     AggregateFunction.createBuiltin("topn_array", 
Lists.newArrayList(t, Type.INT), new ArrayType(t), t,
                             "", "", "", "", "", true, false, true, true));
@@ -2611,8 +2619,20 @@ public class FunctionSet<T> {
                     "", "", "", "", "", true, false, true, true));
             addBuiltin(AggregateFunction.createBuiltin(HIST, 
Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t,
                                     "", "", "", "", "", true, false, true, 
true));
-            addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, 
Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t,
+            addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, 
Lists.newArrayList(t, Type.DOUBLE, Type.INT),
+                    Type.VARCHAR, t,
                     "", "", "", "", "", true, false, true, true));
+
+            addBuiltin(AggregateFunction.createBuiltin(GROUP_UNIQ_ARRAY, 
Lists.newArrayList(t), new ArrayType(t), t,
+                    "", "", "", "", "", true, false, true, true));
+            addBuiltin(
+                    AggregateFunction.createBuiltin(GROUP_UNIQ_ARRAY, 
Lists.newArrayList(t, Type.INT), new ArrayType(t),
+                            t, "", "", "", "", "", true, false, true, true));
+            addBuiltin(AggregateFunction.createBuiltin(GROUP_ARRAY, 
Lists.newArrayList(t), new ArrayType(t), t,
+                    "", "", "", "", "", true, false, true, true));
+            addBuiltin(
+                    AggregateFunction.createBuiltin(GROUP_ARRAY, 
Lists.newArrayList(t, Type.INT), new ArrayType(t),
+                            t, "", "", "", "", "", true, false, true, true));
         }
 
         // Avg
diff --git 
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
 
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
index f26cf1146b..3c17fae7af 100644
--- 
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
+++ 
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
@@ -1,20 +1,79 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !select --
-1      ['hello', 'hello']      [2022-07-04, 2022-07-04]        [1.23, 1.23]
-2      \N      \N      \N
+1      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+1      false   10      20      30      4444444444444   55555555555     0.1     
0.222   3333.33 c       varchar1        string1 2022-12-01      2022-12-01      
2022-12-01T22:23:23     2022-12-01T22:23:24.999999      not null
+1      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+1      false   11      21      33      4444444444444   55555555555     0.1     
0.222   3333.33 c       varchar1        string1 2022-12-01      2022-12-01      
2022-12-01T22:23:23     2022-12-01T22:23:24.999999      not null
+1      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+1      true    11      12      13      1444444444444   1555555555      1.1     
1.222   13333.33        d       varchar2        string2 2022-12-02      
2022-12-02      2022-12-02T22:23:23     2022-12-02T22:23:24.999999      not null
+2      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+2      false   21      22      23      2444444444444   255555555       2.1     
2.222   23333.33        f       varchar3        string3 2022-12-03      
2022-12-03      2022-12-03T22:23:23     2022-12-03T22:23:24.999999      not null
+2      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+2      true    31      32      33      3444444444444   3555555555      3.1     
3.222   33333.33        l       varchar3        string3 2022-12-03      
2022-12-03      2022-12-03T22:23:23     2022-12-03T22:23:24.999999      not null
+2      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+2      false   10      20      30      944444444444    9555555555      9.1     
9.222   93333.33        p       varchar9        string9 2022-12-09      
2022-12-09      2022-12-09T22:23:23     2022-12-09T22:23:24.999999      not null
 
 -- !select --
-1      ['hello']       [2022-07-04]    [1.23]
-2      \N      \N      \N
+[0, 1] [21, 11, 31, 10]        [21, 12, 20, 22, 32]    [30, 13, 23, 33]        
[2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444]      
[1555555555, 3555555555, 9555555555, 255555555, 55555555555]    [0.1, 1.1, 3.1, 
9.1, 2.1]       [3.222, 2.222, 9.222, 1.222, 0.222]     [23333.33, 3333.33, 
33333.33, 93333.33, 13333.33]       ['c', 'p', 'l', 'd', 'f']       
['varchar3', 'varchar1', 'varchar2', 'varchar9']        ['string9', 'string2', 
'string1', 'string3']    [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01]       
 [2022-12-01,  [...]
 
 -- !select --
-['hello', 'hello']     ['hello', 'hello', 'hello', 'hello']
+[0, 0, 1, 0, 1, 0]     [10, 11, 11, 21, 31, 10]        [20, 21, 12, 22, 32, 
20]        [30, 33, 13, 23, 33, 30]        [4444444444444, 4444444444444, 
1444444444444, 2444444444444, 3444444444444, 944444444444]       [55555555555, 
55555555555, 1555555555, 255555555, 3555555555, 9555555555]       [0.1, 0.1, 
1.1, 2.1, 3.1, 9.1]  [0.222, 0.222, 1.222, 2.222, 3.222, 9.222]      [3333.33, 
3333.33, 13333.33, 23333.33, 33333.33, 93333.33]      ['c', 'c', 'd', 'f', 'l', 
'p']  ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 'varchar9 
[...]
 
 -- !select --
-['hello']      ['hello']
+1      1       1       1       1       3       1       2       1       1       
1       1       1       2       1       1       1
 
 -- !select --
-1      [1, 2, 1, 2]    ['hello']       [2022-07-04, 2022-07-04]        [1.23]  
['hello', 'hello', 'hello', 'hello']
+1      1       1       1       1       3       1       2       1       1       
1       1       1       2       1       1       1
+
+-- !select --
+1      [0, 1]  [21, 11, 31, 10]        [21, 12, 20, 22, 32]    [30, 13, 23, 
33]        [2444444444444, 4444444444444, 3444444444444, 1444444444444, 
944444444444]      [1555555555, 3555555555, 9555555555, 255555555, 55555555555] 
   [0.1, 1.1, 3.1, 9.1, 2.1]       [3.222, 2.222, 9.222, 1.222, 0.222]     
[23333.33, 3333.33, 33333.33, 93333.33, 13333.33]       ['c', 'p', 'l', 'd', 
'f']       ['varchar3', 'varchar1', 'varchar2', 'varchar9']        ['string9', 
'string2', 'string1', 'string3']    [2022-12-03, 2022-12-09, 2022-12-02, 
2022-12-01]        [2022-12-01 [...]
+
+-- !select --
+1      1       1       1       1       1       3       1       2       1       
1       1       1       1       2       1       1       1
+
+-- !select --
+1      [0, 0, 1, 0, 1, 0]      [10, 11, 11, 21, 31, 10]        [20, 21, 12, 
22, 32, 20]        [30, 33, 13, 23, 33, 30]        [4444444444444, 
4444444444444, 1444444444444, 2444444444444, 3444444444444, 944444444444]       
[55555555555, 55555555555, 1555555555, 255555555, 3555555555, 9555555555]       
[0.1, 0.1, 1.1, 2.1, 3.1, 9.1]  [0.222, 0.222, 1.222, 2.222, 3.222, 9.222]      
[3333.33, 3333.33, 13333.33, 23333.33, 33333.33, 93333.33]      ['c', 'c', 'd', 
'f', 'l', 'p']  ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 
'varcha [...]
+
+-- !select --
+1      1       1       1       1       1       3       1       2       1       
1       1       1       1       2       1       1       1
+
+-- !select --
+1      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+1      false   10      20      30      4444444444444   55555555555     0.1     
0.222   3333.33 c       varchar1        string1 2022-12-01      2022-12-01      
2022-12-01T22:23:23     2022-12-01T22:23:24.999999      not null
+1      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+1      false   11      21      33      4444444444444   55555555555     0.1     
0.222   3333.33 c       varchar1        string1 2022-12-01      2022-12-01      
2022-12-01T22:23:23     2022-12-01T22:23:24.999999      not null
+1      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+1      true    11      12      13      1444444444444   1555555555      1.1     
1.222   13333.33        d       varchar2        string2 2022-12-02      
2022-12-02      2022-12-02T22:23:23     2022-12-02T22:23:24.999999      not null
+2      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+2      false   21      22      23      2444444444444   255555555       2.1     
2.222   23333.33        f       varchar3        string3 2022-12-03      
2022-12-03      2022-12-03T22:23:23     2022-12-03T22:23:24.999999      not null
+2      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+2      true    31      32      33      3444444444444   3555555555      3.1     
3.222   33333.33        l       varchar3        string3 2022-12-03      
2022-12-03      2022-12-03T22:23:23     2022-12-03T22:23:24.999999      not null
+2      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+2      false   10      20      30      944444444444    9555555555      9.1     
9.222   93333.33        p       varchar9        string9 2022-12-09      
2022-12-09      2022-12-09T22:23:23     2022-12-09T22:23:24.999999      not null
+
+-- !select --
+[0, 1] [21, 11, 31, 10]        [21, 12, 20, 22, 32]    [30, 13, 23, 33]        
[2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444]      
[1555555555, 3555555555, 9555555555, 255555555, 55555555555]    [0.1, 1.1, 3.1, 
9.1, 2.1]       [3.222, 2.222, 9.222, 1.222, 0.222]     [23333.33, 3333.33, 
33333.33, 93333.33, 13333.33]       ['c', 'p', 'l', 'd', 'f']       
['varchar3', 'varchar1', 'varchar2', 'varchar9']        ['string9', 'string2', 
'string1', 'string3']    [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01]       
 [2022-12-01,  [...]
+
+-- !select --
+[0, 0, 1, 0, 1, 0]     [10, 11, 11, 21, 31, 10]        [20, 21, 12, 22, 32, 
20]        [30, 33, 13, 23, 33, 30]        [4444444444444, 4444444444444, 
1444444444444, 2444444444444, 3444444444444, 944444444444]       [55555555555, 
55555555555, 1555555555, 255555555, 3555555555, 9555555555]       [0.1, 0.1, 
1.1, 2.1, 3.1, 9.1]  [0.222, 0.222, 1.222, 2.222, 3.222, 9.222]      [3333.33, 
3333.33, 13333.33, 23333.33, 33333.33, 93333.33]      ['c', 'c', 'd', 'f', 'l', 
'p']  ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 'varchar9 
[...]
+
+-- !select --
+1      1       1       1       1       3       1       2       1       1       
1       1       1       2       1       1       1
+
+-- !select --
+1      1       1       1       1       3       1       2       1       1       
1       1       1       2       1       1       1
+
+-- !select --
+1      [0, 1]  [21, 11, 31, 10]        [21, 12, 20, 22, 32]    [30, 13, 23, 
33]        [2444444444444, 4444444444444, 3444444444444, 1444444444444, 
944444444444]      [1555555555, 3555555555, 9555555555, 255555555, 55555555555] 
   [0.1, 1.1, 3.1, 9.1, 2.1]       [3.222, 2.222, 9.222, 1.222, 0.222]     
[23333.33, 3333.33, 33333.33, 93333.33, 13333.33]       ['c', 'p', 'l', 'd', 
'f']       ['varchar3', 'varchar1', 'varchar2', 'varchar9']        ['string9', 
'string2', 'string1', 'string3']    [2022-12-03, 2022-12-09, 2022-12-02, 
2022-12-01]        [2022-12-01 [...]
+
+-- !select --
+1      1       1       1       1       1       3       1       2       1       
1       1       1       1       2       1       1       1
+
+-- !select --
+1      [0, 0, 1, 0, 1, 0]      [10, 11, 11, 21, 31, 10]        [20, 21, 12, 
22, 32, 20]        [30, 33, 13, 23, 33, 30]        [4444444444444, 
4444444444444, 1444444444444, 2444444444444, 3444444444444, 944444444444]       
[55555555555, 55555555555, 1555555555, 255555555, 3555555555, 9555555555]       
[0.1, 0.1, 1.1, 2.1, 3.1, 9.1]  [0.222, 0.222, 1.222, 2.222, 3.222, 9.222]      
[3333.33, 3333.33, 13333.33, 23333.33, 33333.33, 93333.33]      ['c', 'c', 'd', 
'f', 'l', 'p']  ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 
'varcha [...]
+
+-- !select --
+1      1       1       1       1       1       3       1       2       1       
1       1       1       1       2       1       1       1
 
 -- !select43 --
 [10, 8]
diff --git 
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_group_array.out
 
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_group_array.out
new file mode 100644
index 0000000000..9addf1e4bf
--- /dev/null
+++ 
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_group_array.out
@@ -0,0 +1,27 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+1      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+1      false   10      20      30      4444444444444   55555555555     0.1     
0.222   3333.33 c       varchar1        string1 2022-12-01      2022-12-01      
2022-12-01T22:23:23     2022-12-01T22:23:24.999999      not null
+1      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+1      false   11      21      33      4444444444444   55555555555     0.1     
0.222   3333.33 c       varchar1        string1 2022-12-01      2022-12-01      
2022-12-01T22:23:23     2022-12-01T22:23:24.999999      not null
+1      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+1      true    11      12      13      1444444444444   1555555555      1.1     
1.222   13333.33        d       varchar2        string2 2022-12-02      
2022-12-02      2022-12-02T22:23:23     2022-12-02T22:23:24.999999      not null
+2      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+2      false   21      22      23      2444444444444   255555555       2.1     
2.222   23333.33        f       varchar3        string3 2022-12-03      
2022-12-03      2022-12-03T22:23:23     2022-12-03T22:23:24.999999      not null
+2      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+2      true    31      32      33      3444444444444   3555555555      3.1     
3.222   33333.33        l       varchar3        string3 2022-12-03      
2022-12-03      2022-12-03T22:23:23     2022-12-03T22:23:24.999999      not null
+2      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      not null
+2      false   10      20      30      944444444444    9555555555      9.1     
9.222   93333.33        p       varchar9        string9 2022-12-09      
2022-12-09      2022-12-09T22:23:23     2022-12-09T22:23:24.999999      not null
+
+-- !select --
+[0, 1] [21, 11, 31, 10]        [21, 12, 20, 22, 32]    [30, 13, 23, 33]        
[2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444]      
[1555555555, 3555555555, 9555555555, 255555555, 55555555555]    [0.1, 1.1, 3.1, 
9.1, 2.1]       [3.222, 2.222, 9.222, 1.222, 0.222]     [23333.33, 3333.33, 
33333.33, 93333.33, 13333.33]       ['c', 'p', 'l', 'd', 'f']       
['varchar3', 'varchar1', 'varchar2', 'varchar9']        ['string9', 'string2', 
'string1', 'string3']    [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01]       
 [2022-12-01,  [...]
+
+-- !select --
+[0]    [10]    [20]    [30]    [4444444444444] [1555555555, 255555555, 
55555555555]    [0.1]   [1.222, 0.222]  [3333.33]       ['c']   ['varchar1']    
['string1']     [2022-12-01]    [2022-12-01, 2022-12-02]        [2022-12-01 
22:23:23]   [2022-12-01 22:23:24.999999]    ['not null']
+
+-- !select --
+1      [0, 1]  [21, 11, 31, 10]        [21, 12, 20, 22, 32]    [30, 13, 23, 
33]        [2444444444444, 4444444444444, 3444444444444, 1444444444444, 
944444444444]      [1555555555, 3555555555, 9555555555, 255555555, 55555555555] 
   [0.1, 1.1, 3.1, 9.1, 2.1]       [3.222, 2.222, 9.222, 1.222, 0.222]     
[23333.33, 3333.33, 33333.33, 93333.33, 13333.33]       ['c', 'p', 'l', 'd', 
'f']       ['varchar3', 'varchar1', 'varchar2', 'varchar9']        ['string9', 
'string2', 'string1', 'string3']    [2022-12-03, 2022-12-09, 2022-12-02, 
2022-12-01]        [2022-12-01 [...]
+
+-- !select --
+1      [0]     [10]    [20]    [30]    [4444444444444] [1555555555, 255555555, 
55555555555]    [0.1]   [1.222, 0.222]  [3333.33]       ['c']   ['varchar1']    
['string1']     [2022-12-01]    [2022-12-01, 2022-12-02]        [2022-12-01 
22:23:23]   [2022-12-01 22:23:24.999999]    ['not null']
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
index cff93375ab..d09b09797b 100644
--- 
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
@@ -16,35 +16,598 @@
 // under the License.
 
 suite("test_aggregate_collect") {
+
+    sql "set enable_vectorized_engine = true"
+
     def tableName = "collect_test"
-    def tableCTAS = "collect_test_ctas"
+    def tableCTAS1 = "collect_set_test_ctas1"
+    def tableCTAS2 = "collect_set_test_ctas2"
+    def tableCTAS3 = "collect_list_test_ctas3"
+    def tableCTAS4 = "collect_list_test_ctas4"
+
+
     sql "DROP TABLE IF EXISTS ${tableName}"
-    sql "DROP TABLE IF EXISTS ${tableCTAS}"
-    sql """
-           CREATE TABLE IF NOT EXISTS ${tableName} (
-               c_int INT,
-               c_string VARCHAR(10),
-          c_date Date,
-          c_decimal DECIMAL(10, 2),
-          c_string_not_null VARCHAR(10) NOT NULL
+    sql "DROP TABLE IF EXISTS ${tableCTAS1}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS2}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS3}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS4}"
+
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName} (
+               c_id INT,
+            c_bool BOOLEAN,
+            c_tinyint TINYINT,
+            c_smallint SMALLINT,
+            c_int INT,
+            c_bigint BIGINT,
+            c_largeint LARGEINT,
+            c_float FLOAT,
+            c_double DOUBLE,
+            c_decimal DECIMAL(9, 2),
+            c_char CHAR,
+            c_varchar VARCHAR(10),
+            c_string STRING,
+            c_date DATE,
+            c_datev2 DATEV2,
+            c_date_time DATETIME,
+            c_date_timev2 DATETIMEV2(6),
+            c_string_not_null VARCHAR(10) NOT NULL
            )
            DISTRIBUTED BY HASH(c_int) BUCKETS 1
            PROPERTIES (
              "replication_num" = "1"
-           ) 
+           )
+    """
+
+    sql """
+        INSERT INTO ${tableName} values
+            (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (1, false, 10, 20, 30, 4444444444444, 55555555555, 0.1, 0.222, 
3333.33, 'c', 'varchar1', 'string1',
+            '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01 
22:23:24.999999', 'not null')
+    """
+
+    sql """
+        INSERT INTO ${tableName} values
+            (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (1, false, 11, 21, 33, 4444444444444, 55555555555, 0.1, 0.222, 
3333.33, 'c', 'varchar1', 'string1',
+            '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01 
22:23:24.999999', 'not null')
     """
-    sql "INSERT INTO ${tableName} values(1,'hello','2022-07-04',1.23,'hello'), 
(2,NULL,NULL,NULL,'hello')"
-    sql "INSERT INTO ${tableName} values(1,'hello','2022-07-04',1.23,'hello'), 
(2,NULL,NULL,NULL,'hello')"
 
-    qt_select "select 
c_int,collect_list(c_string),collect_list(c_date),collect_list(c_decimal) from 
${tableName} group by c_int order by c_int"
-    qt_select "select 
c_int,collect_set(c_string),collect_set(c_date),collect_set(c_decimal) from 
${tableName} group by c_int order by c_int"
+    sql """
+        INSERT INTO ${tableName} values
+            (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (1, true, 11, 12, 13, 1444444444444, 1555555555, 1.1, 1.222, 
13333.33, 'd', 'varchar2', 'string2',
+            '2022-12-02', '2022-12-02', '2022-12-02 22:23:23', '2022-12-02 
22:23:24.999999', 'not null')
+    """
 
-    // test without GROUP BY
-    qt_select "select collect_list(c_string),collect_list(c_string_not_null) 
from ${tableName}"
-    qt_select "select collect_set(c_string),collect_set(c_string_not_null) 
from ${tableName}"
+    sql """
+        INSERT INTO ${tableName} values
+            (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (2, false, 21, 22, 23, 2444444444444, 255555555, 2.1, 2.222, 
23333.33, 'f', 'varchar3', 'string3',
+            '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03 
22:23:24.999999', 'not null')
+    """
+
+    sql """
+        INSERT INTO ${tableName} values
+            (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (2, true, 31, 32, 33, 3444444444444, 3555555555, 3.1, 3.222, 
33333.33, 'l', 'varchar3', 'string3',
+            '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03 
22:23:24.999999', 'not null')
+    """
 
-    sql """ CREATE TABLE ${tableCTAS} PROPERTIES("replication_num" = "1") AS 
SELECT 
1,collect_list(c_int),collect_set(c_string),collect_list(c_date),collect_set(c_decimal),collect_list(c_string_not_null)
 FROM ${tableName} """
-    qt_select "SELECT * from ${tableCTAS}"
+    sql """
+        INSERT INTO ${tableName} values
+            (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (2, false, 10, 20, 30, 944444444444, 9555555555, 9.1, 9.222, 
93333.33, 'p', 'varchar9', 'string9',
+            '2022-12-09', '2022-12-09', '2022-12-09 22:23:23', '2022-12-09 
22:23:24.999999', 'not null')
+    """
+
+    qt_select """
+        SELECT * FROM ${tableName}
+    """
+
+    qt_select """
+        SELECT
+            collect_set(c_bool),
+            collect_set(c_tinyint),
+            collect_set(c_smallint),
+            collect_set(c_int),
+            collect_set(c_bigint),
+            collect_set(c_largeint),
+            collect_set(c_float),
+            collect_set(c_double),
+            collect_set(c_decimal),
+            collect_set(c_char),
+            collect_set(c_varchar),
+            collect_set(c_string),
+            collect_set(c_date),
+            collect_set(c_datev2),
+            collect_set(c_date_time),
+            collect_set(c_date_timev2),
+            collect_set(c_string_not_null)
+        FROM
+            ${tableName}
+    """
+
+    qt_select """
+        SELECT
+            collect_list(c_bool),
+            collect_list(c_tinyint),
+            collect_list(c_smallint),
+            collect_list(c_int),
+            collect_list(c_bigint),
+            collect_list(c_largeint),
+            collect_list(c_float),
+            collect_list(c_double),
+            collect_list(c_decimal),
+            collect_list(c_char),
+            collect_list(c_varchar),
+            collect_list(c_string),
+            collect_list(c_date),
+            collect_list(c_datev2),
+            collect_list(c_date_time),
+            collect_list(c_date_timev2),
+            collect_list(c_string_not_null)
+        FROM
+            ${tableName}
+    """
+
+    qt_select """
+        SELECT
+            size(collect_set(c_bool,1)),
+            size(collect_set(c_tinyint,1)),
+            size(collect_set(c_smallint,1)),
+            size(collect_set(c_int,1)),
+            size(collect_set(c_bigint,1)),
+            size(collect_set(c_largeint,3)),
+            size(collect_set(c_float,1)),
+            size(collect_set(c_double,2)),
+            size(collect_set(c_decimal,1)),
+            size(collect_set(c_char,1)),
+            size(collect_set(c_varchar,1)),
+            size(collect_set(c_string,1)),
+            size(collect_set(c_date,1)),
+            size(collect_set(c_datev2,2)),
+            size(collect_set(c_date_time,1)),
+            size(collect_set(c_date_timev2,1)),
+            size(collect_set(c_string_not_null,1))
+        FROM
+            ${tableName}
+    """
+
+    qt_select """
+        SELECT
+            size(collect_list(c_bool,1)),
+            size(collect_list(c_tinyint,1)),
+            size(collect_list(c_smallint,1)),
+            size(collect_list(c_int,1)),
+            size(collect_list(c_bigint,1)),
+            size(collect_list(c_largeint,3)),
+            size(collect_list(c_float,1)),
+            size(collect_list(c_double,2)),
+            size(collect_list(c_decimal,1)),
+            size(collect_list(c_char,1)),
+            size(collect_list(c_varchar,1)),
+            size(collect_list(c_string,1)),
+            size(collect_list(c_date,1)),
+            size(collect_list(c_datev2,2)),
+            size(collect_list(c_date_time,1)),
+            size(collect_list(c_date_timev2,1)),
+            size(collect_list(c_string_not_null,1))
+        FROM
+            ${tableName}
+    """
+
+    sql """
+        CREATE TABLE ${tableCTAS1} PROPERTIES("replication_num" = "1") AS
+        SELECT
+           1,
+            collect_set(c_bool),
+            collect_set(c_tinyint),
+            collect_set(c_smallint),
+            collect_set(c_int),
+            collect_set(c_bigint),
+            collect_set(c_largeint),
+            collect_set(c_float),
+            collect_set(c_double),
+            collect_set(c_decimal),
+            collect_set(c_char),
+            collect_set(c_varchar),
+            collect_set(c_string),
+            collect_set(c_date),
+            collect_set(c_datev2),
+            collect_set(c_date_time),
+            collect_set(c_date_timev2),
+            collect_set(c_string_not_null)
+        FROM
+            ${tableName}
+    """
+
+
+    sql """
+        CREATE TABLE ${tableCTAS2} PROPERTIES("replication_num" = "1") AS
+        SELECT
+           1,
+            size(collect_set(c_bool,1)),
+            size(collect_set(c_tinyint,1)),
+            size(collect_set(c_smallint,1)),
+            size(collect_set(c_int,1)),
+            size(collect_set(c_bigint,1)),
+            size(collect_set(c_largeint,3)),
+            size(collect_set(c_float,1)),
+            size(collect_set(c_double,2)),
+            size(collect_set(c_decimal,1)),
+            size(collect_set(c_char,1)),
+            size(collect_set(c_varchar,1)),
+            size(collect_set(c_string,1)),
+            size(collect_set(c_date,1)),
+            size(collect_set(c_datev2,2)),
+            size(collect_set(c_date_time,1)),
+            size(collect_set(c_date_timev2,1)),
+            size(collect_set(c_string_not_null,1))
+        FROM
+            ${tableName}
+    """
+
+    sql """
+        CREATE TABLE ${tableCTAS3} PROPERTIES("replication_num" = "1") AS
+        SELECT
+           1,
+            collect_list(c_bool),
+            collect_list(c_tinyint),
+            collect_list(c_smallint),
+            collect_list(c_int),
+            collect_list(c_bigint),
+            collect_list(c_largeint),
+            collect_list(c_float),
+            collect_list(c_double),
+            collect_list(c_decimal),
+            collect_list(c_char),
+            collect_list(c_varchar),
+            collect_list(c_string),
+            collect_list(c_date),
+            collect_list(c_datev2),
+            collect_list(c_date_time),
+            collect_list(c_date_timev2),
+            collect_list(c_string_not_null)
+        FROM
+            ${tableName}
+    """
+
+    sql """
+        CREATE TABLE ${tableCTAS4} PROPERTIES("replication_num" = "1") AS
+        SELECT
+           1,
+            size(collect_list(c_bool,1)),
+            size(collect_list(c_tinyint,1)),
+            size(collect_list(c_smallint,1)),
+            size(collect_list(c_int,1)),
+            size(collect_list(c_bigint,1)),
+            size(collect_list(c_largeint,3)),
+            size(collect_list(c_float,1)),
+            size(collect_list(c_double,2)),
+            size(collect_list(c_decimal,1)),
+            size(collect_list(c_char,1)),
+            size(collect_list(c_varchar,1)),
+            size(collect_list(c_string,1)),
+            size(collect_list(c_date,1)),
+            size(collect_list(c_datev2,2)),
+            size(collect_list(c_date_time,1)),
+            size(collect_list(c_date_timev2,1)),
+            size(collect_list(c_string_not_null,1))
+        FROM
+            ${tableName}
+    """
+
+    qt_select "SELECT * FROM ${tableCTAS1}"
+    qt_select "SELECT * FROM ${tableCTAS2}"
+    qt_select "SELECT * FROM ${tableCTAS3}"
+    qt_select "SELECT * FROM ${tableCTAS4}"
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS1}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS2}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS3}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS4}"
+
+    def tableName_11 = "group_uniq_array_test"
+    def tableCTAS1_11 = "group_uniq_array_test_ctas1"
+    def tableCTAS2_11 = "group_uniq_array_test_ctas2"
+    def tableCTAS3_11 = "group_array_test_ctas3"
+    def tableCTAS4_11 = "group_array_test_ctas4"
+
+    sql "DROP TABLE IF EXISTS ${tableName_11}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS1_11}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS2_11}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS3_11}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS4_11}"
+
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName_11} (
+               c_id INT,
+            c_bool BOOLEAN,
+            c_tinyint TINYINT,
+            c_smallint SMALLINT,
+            c_int INT,
+            c_bigint BIGINT,
+            c_largeint LARGEINT,
+            c_float FLOAT,
+            c_double DOUBLE,
+            c_decimal DECIMAL(9, 2),
+            c_char CHAR,
+            c_varchar VARCHAR(10),
+            c_string STRING,
+            c_date DATE,
+            c_datev2 DATEV2,
+            c_date_time DATETIME,
+            c_date_timev2 DATETIMEV2(6),
+            c_string_not_null VARCHAR(10) NOT NULL
+           )
+           DISTRIBUTED BY HASH(c_int) BUCKETS 1
+           PROPERTIES (
+             "replication_num" = "1"
+           )
+    """
+
+    sql """
+        INSERT INTO ${tableName_11} values
+            (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (1, false, 10, 20, 30, 4444444444444, 55555555555, 0.1, 0.222, 
3333.33, 'c', 'varchar1', 'string1',
+            '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01 
22:23:24.999999', 'not null')
+    """
+
+    sql """
+        INSERT INTO ${tableName_11} values
+            (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (1, false, 11, 21, 33, 4444444444444, 55555555555, 0.1, 0.222, 
3333.33, 'c', 'varchar1', 'string1',
+            '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01 
22:23:24.999999', 'not null')
+    """
+
+    sql """
+        INSERT INTO ${tableName_11} values
+            (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (1, true, 11, 12, 13, 1444444444444, 1555555555, 1.1, 1.222, 
13333.33, 'd', 'varchar2', 'string2',
+            '2022-12-02', '2022-12-02', '2022-12-02 22:23:23', '2022-12-02 
22:23:24.999999', 'not null')
+    """
+
+    sql """
+        INSERT INTO ${tableName_11} values
+            (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (2, false, 21, 22, 23, 2444444444444, 255555555, 2.1, 2.222, 
23333.33, 'f', 'varchar3', 'string3',
+            '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03 
22:23:24.999999', 'not null')
+    """
+
+    sql """
+        INSERT INTO ${tableName_11} values
+            (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (2, true, 31, 32, 33, 3444444444444, 3555555555, 3.1, 3.222, 
33333.33, 'l', 'varchar3', 'string3',
+            '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03 
22:23:24.999999', 'not null')
+    """
+
+    sql """
+        INSERT INTO ${tableName_11} values
+            (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
NULL, NULL,
+            NULL, NULL, NULL, NULL, 'not null'),
+            (2, false, 10, 20, 30, 944444444444, 9555555555, 9.1, 9.222, 
93333.33, 'p', 'varchar9', 'string9',
+            '2022-12-09', '2022-12-09', '2022-12-09 22:23:23', '2022-12-09 
22:23:24.999999', 'not null')
+    """
+
+    qt_select """
+        SELECT * FROM ${tableName_11}
+    """
+
+    qt_select """
+        SELECT
+            group_uniq_array(c_bool),
+            group_uniq_array(c_tinyint),
+            group_uniq_array(c_smallint),
+            group_uniq_array(c_int),
+            group_uniq_array(c_bigint),
+            group_uniq_array(c_largeint),
+            group_uniq_array(c_float),
+            group_uniq_array(c_double),
+            group_uniq_array(c_decimal),
+            group_uniq_array(c_char),
+            group_uniq_array(c_varchar),
+            group_uniq_array(c_string),
+            group_uniq_array(c_date),
+            group_uniq_array(c_datev2),
+            group_uniq_array(c_date_time),
+            group_uniq_array(c_date_timev2),
+            group_uniq_array(c_string_not_null)
+        FROM
+            ${tableName_11}
+    """
+
+    qt_select """
+        SELECT
+            group_array(c_bool),
+            group_array(c_tinyint),
+            group_array(c_smallint),
+            group_array(c_int),
+            group_array(c_bigint),
+            group_array(c_largeint),
+            group_array(c_float),
+            group_array(c_double),
+            group_array(c_decimal),
+            group_array(c_char),
+            group_array(c_varchar),
+            group_array(c_string),
+            group_array(c_date),
+            group_array(c_datev2),
+            group_array(c_date_time),
+            group_array(c_date_timev2),
+            group_array(c_string_not_null)
+        FROM
+            ${tableName_11}
+    """
+
+    qt_select """
+        SELECT
+            size(group_uniq_array(c_bool,1)),
+            size(group_uniq_array(c_tinyint,1)),
+            size(group_uniq_array(c_smallint,1)),
+            size(group_uniq_array(c_int,1)),
+            size(group_uniq_array(c_bigint,1)),
+            size(group_uniq_array(c_largeint,3)),
+            size(group_uniq_array(c_float,1)),
+            size(group_uniq_array(c_double,2)),
+            size(group_uniq_array(c_decimal,1)),
+            size(group_uniq_array(c_char,1)),
+            size(group_uniq_array(c_varchar,1)),
+            size(group_uniq_array(c_string,1)),
+            size(group_uniq_array(c_date,1)),
+            size(group_uniq_array(c_datev2,2)),
+            size(group_uniq_array(c_date_time,1)),
+            size(group_uniq_array(c_date_timev2,1)),
+            size(group_uniq_array(c_string_not_null,1))
+        FROM
+            ${tableName_11}
+    """
+
+    qt_select """
+        SELECT
+            size(group_array(c_bool,1)),
+            size(group_array(c_tinyint,1)),
+            size(group_array(c_smallint,1)),
+            size(group_array(c_int,1)),
+            size(group_array(c_bigint,1)),
+            size(group_array(c_largeint,3)),
+            size(group_array(c_float,1)),
+            size(group_array(c_double,2)),
+            size(group_array(c_decimal,1)),
+            size(group_array(c_char,1)),
+            size(group_array(c_varchar,1)),
+            size(group_array(c_string,1)),
+            size(group_array(c_date,1)),
+            size(group_array(c_datev2,2)),
+            size(group_array(c_date_time,1)),
+            size(group_array(c_date_timev2,1)),
+            size(group_array(c_string_not_null,1))
+        FROM
+            ${tableName_11}
+    """
+
+    sql """
+        CREATE TABLE ${tableCTAS1_11} PROPERTIES("replication_num" = "1") AS
+        SELECT
+           1,
+            group_uniq_array(c_bool),
+            group_uniq_array(c_tinyint),
+            group_uniq_array(c_smallint),
+            group_uniq_array(c_int),
+            group_uniq_array(c_bigint),
+            group_uniq_array(c_largeint),
+            group_uniq_array(c_float),
+            group_uniq_array(c_double),
+            group_uniq_array(c_decimal),
+            group_uniq_array(c_char),
+            group_uniq_array(c_varchar),
+            group_uniq_array(c_string),
+            group_uniq_array(c_date),
+            group_uniq_array(c_datev2),
+            group_uniq_array(c_date_time),
+            group_uniq_array(c_date_timev2),
+            group_uniq_array(c_string_not_null)
+        FROM
+            ${tableName_11}
+    """
+
+    sql """
+        CREATE TABLE ${tableCTAS2_11} PROPERTIES("replication_num" = "1") AS
+        SELECT
+           1,
+            size(group_uniq_array(c_bool,1)),
+            size(group_uniq_array(c_tinyint,1)),
+            size(group_uniq_array(c_smallint,1)),
+            size(group_uniq_array(c_int,1)),
+            size(group_uniq_array(c_bigint,1)),
+            size(group_uniq_array(c_largeint,3)),
+            size(group_uniq_array(c_float,1)),
+            size(group_uniq_array(c_double,2)),
+            size(group_uniq_array(c_decimal,1)),
+            size(group_uniq_array(c_char,1)),
+            size(group_uniq_array(c_varchar,1)),
+            size(group_uniq_array(c_string,1)),
+            size(group_uniq_array(c_date,1)),
+            size(group_uniq_array(c_datev2,2)),
+            size(group_uniq_array(c_date_time,1)),
+            size(group_uniq_array(c_date_timev2,1)),
+            size(group_uniq_array(c_string_not_null,1))
+        FROM
+            ${tableName_11}
+    """
+
+    sql """
+        CREATE TABLE ${tableCTAS3_11} PROPERTIES("replication_num" = "1") AS
+        SELECT
+           1,
+            group_array(c_bool),
+            group_array(c_tinyint),
+            group_array(c_smallint),
+            group_array(c_int),
+            group_array(c_bigint),
+            group_array(c_largeint),
+            group_array(c_float),
+            group_array(c_double),
+            group_array(c_decimal),
+            group_array(c_char),
+            group_array(c_varchar),
+            group_array(c_string),
+            group_array(c_date),
+            group_array(c_datev2),
+            group_array(c_date_time),
+            group_array(c_date_timev2),
+            group_array(c_string_not_null)
+        FROM
+            ${tableName_11}
+    """
+
+    sql """
+        CREATE TABLE ${tableCTAS4_11} PROPERTIES("replication_num" = "1") AS
+        SELECT
+           1,
+            size(group_array(c_bool,1)),
+            size(group_array(c_tinyint,1)),
+            size(group_array(c_smallint,1)),
+            size(group_array(c_int,1)),
+            size(group_array(c_bigint,1)),
+            size(group_array(c_largeint,3)),
+            size(group_array(c_float,1)),
+            size(group_array(c_double,2)),
+            size(group_array(c_decimal,1)),
+            size(group_array(c_char,1)),
+            size(group_array(c_varchar,1)),
+            size(group_array(c_string,1)),
+            size(group_array(c_date,1)),
+            size(group_array(c_datev2,2)),
+            size(group_array(c_date_time,1)),
+            size(group_array(c_date_timev2,1)),
+            size(group_array(c_string_not_null,1))
+        FROM
+            ${tableName_11}
+    """
+
+    qt_select "SELECT * FROM ${tableCTAS1_11}"
+    qt_select "SELECT * FROM ${tableCTAS2_11}"
+    qt_select "SELECT * FROM ${tableCTAS3_11}"
+    qt_select "SELECT * FROM ${tableCTAS4_11}"
+
+    sql "DROP TABLE IF EXISTS ${tableName_11}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS1_11}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS2_11}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS3_11}"
+    sql "DROP TABLE IF EXISTS ${tableCTAS4_11}"
 
     // topn_array
     def tableName_12 = "topn_array"
@@ -65,8 +628,8 @@ suite("test_aggregate_collect") {
     sql "INSERT INTO ${tableName_12} values(1,10,'2022-11-1',6.8754576), 
(2,8,'2022-11-3',0.576), (2,10,'2022-11-2',1.234) ,(3,10,'2022-11-2',0.576) 
,(5,29,'2022-11-2',6.8754576) ,(6,8,'2022-11-1',6.8754576)"
 
     qt_select43 "select topn_array(level,2) from ${tableName_12}"
-    qt_select44 "select topn_array(level,2,100) from ${tableName_12}" 
-    qt_select45 "select topn_array(dt,2,100) from ${tableName_12}"  
-    qt_select46 "select topn_array(num,2,100) from ${tableName_12}"  
-    sql "DROP TABLE IF EXISTS ${tableName_12}"    
+    qt_select44 "select topn_array(level,2,100) from ${tableName_12}"
+    qt_select45 "select topn_array(dt,2,100) from ${tableName_12}"
+    qt_select46 "select topn_array(num,2,100) from ${tableName_12}"
+    sql "DROP TABLE IF EXISTS ${tableName_12}"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to