[GitHub] [incubator-doris] cambyzju commented on a diff in pull request #9056: [feature-wip](array-type) Add array aggregation functions

GitBox Fri, 22 Apr 2022 02:36:25 -0700


cambyzju commented on code in PR #9056:
URL: https://github.com/apache/incubator-doris/pull/9056#discussion_r855986107



##########
be/src/vec/functions/array/function_array_aggregation.cpp:
##########
@@ -0,0 +1,312 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/arrayAggregation.cpp
+// and modified by Doris
+
+#include <condition_variable>
+#include <type_traits>
+
+#include "vec/common/arithmetic_overflow.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/array/function_array_mapped.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris {
+namespace vectorized {
+
+enum class AggregationStatus {
+    MISMATCHED_TYPE = 0,
+    OK,
+    MATH_OVERFLOW,
+    SCALE_OUT_OF_BOUNDS,
+};
+
+enum class AggregateOperation { MIN, MAX, SUM, AVERAGE, PRODUCT };
+
+/**
+ * During array aggregation we derive result type from operation.
+ * For array min or array max we use array element as result type.
+ * For array average we use Float64.
+ * For array sum for for big integers, we use same type representation, 
decimal numbers we use Decimal128,
+ * for floating point numbers Float64, for numeric unsigned Int64, and for 
numeric signed UInt64.
+ */
+
+template <typename ArrayElement, AggregateOperation operation>
+struct ArrayAggregateResultImpl;
+
+template <typename ArrayElement>
+struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::MIN> {
+    using Result = ArrayElement;
+};
+
+template <typename ArrayElement>
+struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::MAX> {
+    using Result = ArrayElement;
+};
+
+template <typename ArrayElement>
+struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::AVERAGE> {
+    using Result = Float64;
+};
+
+template <typename ArrayElement>
+struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::PRODUCT> {
+    using Result = Float64;
+};
+
+template <typename ArrayElement>
+struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::SUM> {
+    using Result = std::conditional_t<
+            IsDecimalNumber<ArrayElement>, Decimal128,
+            std::conditional_t<
+                    IsFloatNumber<ArrayElement>, Float64,
+                    std::conditional_t<std::is_same_v<ArrayElement, Int128>, 
Int128, Int64>>>;
+};
+
+template <typename ArrayElement, AggregateOperation operation>
+using ArrayAggregateResult = typename ArrayAggregateResultImpl<ArrayElement, 
operation>::Result;
+
+template <AggregateOperation aggregate_operation>
+struct ArrayAggregateImpl {
+    using column_type = ColumnArray;
+    using data_type = DataTypeArray;
+
+    static DataTypePtr get_return_type(const DataTypePtr& expression_return,
+                                       const DataTypePtr& /*array_element*/) {
+        DataTypePtr result;
+
+        auto call = [&expression_return, &result](const auto& types) {
+            using Types = std::decay_t<decltype(types)>;
+            using DataType = typename Types::LeftType;
+
+            if constexpr (!IsDataTypeDecimalOrNumber<DataType>) {
+                return false;
+            } else if constexpr (aggregate_operation == 
AggregateOperation::AVERAGE ||
+                                 aggregate_operation == 
AggregateOperation::PRODUCT ||
+                                 IsDataTypeNumber<DataType>) {
+                using NumberReturnType =
+                        ArrayAggregateResult<typename DataType::FieldType, 
aggregate_operation>;
+                result = std::make_shared<DataTypeNumber<NumberReturnType>>();
+                return true;
+            } else if constexpr (IsDataTypeDecimal<DataType>) {
+                using DecimalReturnType =
+                        ArrayAggregateResult<typename DataType::FieldType, 
aggregate_operation>;
+                UInt32 scale = get_decimal_scale(*expression_return);
+                result = std::make_shared<DataTypeDecimal<DecimalReturnType>>(
+                        DataTypeDecimal<DecimalReturnType>::max_precision(), 
scale);
+                return true;
+            }
+            return false;
+        };
+        if 
(!call_on_index_and_data_type<void>(expression_return->get_type_id(), call)) {
+            LOG(WARNING) << "array aggregation function cannot be performed on 
type "
+                         << expression_return->get_name();
+        }
+        return result;
+    }
+
+    static Status execute(Block& block, size_t result, const ColumnArray& 
array, ColumnPtr mapped) {
+        const IColumn::Offsets& offsets = array.get_offsets();
+        ColumnPtr res;
+        AggregationStatus status;
+        if (static_cast<bool>(status = execute_type<Int8>(res, mapped, 
offsets)) ||
+            static_cast<bool>(status = execute_type<Int16>(res, mapped, 
offsets)) ||
+            static_cast<bool>(status = execute_type<Int32>(res, mapped, 
offsets)) ||
+            static_cast<bool>(status = execute_type<Int64>(res, mapped, 
offsets)) ||
+            static_cast<bool>(status = execute_type<Int128>(res, mapped, 
offsets)) ||
+            static_cast<bool>(status = execute_type<Float32>(res, mapped, 
offsets)) ||
+            static_cast<bool>(status = execute_type<Float64>(res, mapped, 
offsets)) ||
+            static_cast<bool>(status = execute_type<Decimal32>(res, mapped, 
offsets)) ||
+            static_cast<bool>(status = execute_type<Decimal64>(res, mapped, 
offsets)) ||

Review Comment:
   doris only support Decimal128



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[GitHub] [incubator-doris] cambyzju commented on a diff in pull request #9056: [feature-wip](array-type) Add array aggregation functions

Reply via email to