github-actions[bot] commented on code in PR #33464: URL: https://github.com/apache/doris/pull/33464#discussion_r1558948199
########## be/src/vec/functions/function_truncate.h: ########## @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstddef> +#include <functional> +#include <type_traits> +#include <utility> + +#include "common/exception.h" +#include "common/status.h" +#include "olap/olap_common.h" +#include "round.h" +#include "vec/columns/column.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_decimal.h" +#include "vec/columns/column_vector.h" +#include "vec/common/assert_cast.h" +#include "vec/core/call_on_type_index.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_number.h" + +namespace doris::vectorized { + +struct TruncateFloatOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; } +}; + +struct TruncateFloatTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeInt32>()}; + } +}; + +struct TruncateDecimalOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + // All Decimal types are named Decimal, and real scale will be passed as type argument for execute function + // So we can just register Decimal32 here + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0)}; + } +}; + +struct TruncateDecimalTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0), + std::make_shared<DataTypeInt32>()}; + } +}; + +template <typename Impl> +class FunctionTruncate : public FunctionRounding<Impl, RoundingMode::Trunc, TieBreakingMode::Auto> { +public: + static FunctionPtr create() { return std::make_shared<FunctionTruncate>(); } + + ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; } + // SELECT number, truncate(123.345, 1) FROM number("numbers"="10") + // should NOT behave like two column arguments, so we can not use const column default implementation + bool use_default_implementation_for_constants() const override { return false; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, Review Comment: warning: function 'execute_impl' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, ^ ``` <details> <summary>Additional context</summary> **be/src/vec/functions/function_truncate.h:79:** 161 lines including whitespace and comments (threshold 80) ```cpp Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, ^ ``` </details> ########## be/src/vec/functions/function_truncate.h: ########## @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstddef> +#include <functional> +#include <type_traits> +#include <utility> + +#include "common/exception.h" +#include "common/status.h" +#include "olap/olap_common.h" +#include "round.h" +#include "vec/columns/column.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_decimal.h" +#include "vec/columns/column_vector.h" +#include "vec/common/assert_cast.h" +#include "vec/core/call_on_type_index.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_number.h" + +namespace doris::vectorized { + +struct TruncateFloatOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; } +}; + +struct TruncateFloatTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeInt32>()}; + } +}; + +struct TruncateDecimalOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + // All Decimal types are named Decimal, and real scale will be passed as type argument for execute function + // So we can just register Decimal32 here + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0)}; + } +}; + +struct TruncateDecimalTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0), + std::make_shared<DataTypeInt32>()}; + } +}; + +template <typename Impl> +class FunctionTruncate : public FunctionRounding<Impl, RoundingMode::Trunc, TieBreakingMode::Auto> { +public: + static FunctionPtr create() { return std::make_shared<FunctionTruncate>(); } + + ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; } + // SELECT number, truncate(123.345, 1) FROM number("numbers"="10") + // should NOT behave like two column arguments, so we can not use const column default implementation + bool use_default_implementation_for_constants() const override { return false; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + const ColumnWithTypeAndName& column_general = block.get_by_position(arguments[0]); + ColumnPtr res; + + // potential argument types: + // 0. truncate(ColumnConst, ColumnConst) + // 1. truncate(Column), truncate(Column, ColumnConst) + // 2. truncate(Column, Column) + // 3. truncate(ColumnConst, Column) + + if (arguments.size() == 2 && is_column_const(*block.get_by_position(arguments[0]).column) && + is_column_const(*block.get_by_position(arguments[1]).column)) { + // truncate(ColumnConst, ColumnConst) + auto col_general = + assert_cast<const ColumnConst&>(*column_general.column).get_data_column_ptr(); + Int16 scale_arg = 0; + RETURN_IF_ERROR(FunctionTruncate<Impl>::get_scale_arg( + block.get_by_position(arguments[1]), &scale_arg)); + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, + TieBreakingMode::Auto>::apply_vec_const(col_general, + scale_arg); + return true; + } + + return false; + }; + +#if !defined(__SSE4_1__) && !defined(__aarch64__) + /// In case of "nearbyint" function is used, we should ensure the expected rounding mode for the Banker's rounding. + /// Actually it is by default. But we will set it just in case. + + if constexpr (rounding_mode == RoundingMode::Round) { + if (0 != fesetround(FE_TONEAREST)) { + return Status::InvalidArgument("Cannot set floating point rounding mode"); + } + } +#endif + + if (!call_on_index_and_data_type<void>(column_general.type->get_type_id(), call)) { + return Status::InvalidArgument("Invalid argument type {} for function {}", + column_general.type->get_name(), "truncate"); + } + // Important, make sure the result column has the same size as the input column + res = ColumnConst::create(std::move(res), input_rows_count); + } else if (arguments.size() == 1 || + (arguments.size() == 2 && + is_column_const(*block.get_by_position(arguments[1]).column))) { + // truncate(Column) or truncate(Column, ColumnConst) + Int16 scale_arg = 0; + if (arguments.size() == 2) { + RETURN_IF_ERROR(FunctionTruncate<Impl>::get_scale_arg( + block.get_by_position(arguments[1]), &scale_arg)); + } + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, TieBreakingMode::Auto>:: + apply_vec_const(column_general.column.get(), scale_arg); + return true; + } + + return false; + }; +#if !defined(__SSE4_1__) && !defined(__aarch64__) + /// In case of "nearbyint" function is used, we should ensure the expected rounding mode for the Banker's rounding. + /// Actually it is by default. But we will set it just in case. + + if constexpr (rounding_mode == RoundingMode::Round) { + if (0 != fesetround(FE_TONEAREST)) { + return Status::InvalidArgument("Cannot set floating point rounding mode"); + } + } +#endif + + if (!call_on_index_and_data_type<void>(column_general.type->get_type_id(), call)) { + return Status::InvalidArgument("Invalid argument type {} for function {}", + column_general.type->get_name(), "truncate"); + } + + } else if (is_column_const(*block.get_by_position(arguments[0]).column)) { + // truncate(ColumnConst, Column) + const ColumnWithTypeAndName& column_scale = block.get_by_position(arguments[1]); + const ColumnConst& const_col_general = + assert_cast<const ColumnConst&>(*column_general.column); + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, TieBreakingMode::Auto>:: + apply_const_vec(&const_col_general, column_scale.column.get()); + return true; Review Comment: warning: redundant boolean literal in conditional return statement [readability-simplify-boolean-expr] be/src/vec/functions/function_truncate.h:180: ```diff - if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { - using FieldType = typename DataType::FieldType; - res = Dispatcher<FieldType, RoundingMode::Trunc, TieBreakingMode::Auto>:: - apply_const_vec(&const_col_general, column_scale.column.get()); - return true; - } - - return false; + return IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>; ``` ########## be/src/vec/functions/function_uuid.cpp: ########## @@ -0,0 +1,213 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cctype> +#include <cstddef> +#include <cstring> +#include <memory> +#include <utility> + +#include "common/status.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" +#include "vec/core/block.h" +#include "vec/core/column_numbers.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/function.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris { +class FunctionContext; +} // namespace doris + +namespace doris::vectorized { +constexpr static std::array<int, 5> SPLIT_POS = {8, 13, 18, 23, 36}; // 8-4-4-4-12 +constexpr static char DELIMITER = '-'; + +class FunctionUuidtoInt : public IFunction { +public: + static constexpr auto name = "uuid_to_int"; + + static FunctionPtr create() { return std::make_shared<FunctionUuidtoInt>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { Review Comment: warning: method 'get_return_type_impl' can be made static [readability-convert-member-functions-to-static] ```suggestion static DataTypePtr get_return_type_impl(const DataTypes& arguments) override { ``` ########## be/src/vec/functions/round.h: ########## @@ -446,6 +479,179 @@ struct Dispatcher { return nullptr; } } + + // NOTE: This function is only tested for truncate + // DO NOT USE THIS METHOD FOR OTHER ROUNDING BASED FUNCTION UNTIL YOU KNOW EXACTLY WHAT YOU ARE DOING !!! + static ColumnPtr apply_vec_vec(const IColumn* col_general, const IColumn* col_scale) { + if constexpr (rounding_mode != RoundingMode::Trunc) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, + "Using column as scale is only supported for function truncate"); + } + + const ColumnInt32& col_scale_i32 = assert_cast<const ColumnInt32&>(*col_scale); + const size_t input_row_count = col_scale_i32.size(); + for (size_t i = 0; i < input_row_count; ++i) { + const Int32 scale_arg = col_scale_i32.get_data()[i]; + if (scale_arg > std::numeric_limits<Int16>::max() || + scale_arg < std::numeric_limits<Int16>::min()) { + throw doris::Exception(ErrorCode::OUT_OF_BOUND, + "Scale argument for function is out of bound: {}", + scale_arg); + } + } + + if constexpr (IsNumber<T>) { + const auto* col = assert_cast<const ColumnVector<T>*>(col_general); + auto col_res = ColumnVector<T>::create(); + typename ColumnVector<T>::Container& vec_res = col_res->get_data(); + vec_res.resize(input_row_count); + + for (size_t i = 0; i < input_row_count; ++i) { + const Int32 scale_arg = col_scale_i32.get_data()[i]; + if (scale_arg == 0) { + size_t scale = 1; + FunctionRoundingImpl<ScaleMode::Zero>::apply(col->get_data()[i], scale, + vec_res[i]); + } else if (scale_arg > 0) { + size_t scale = int_exp10(scale_arg); + FunctionRoundingImpl<ScaleMode::Positive>::apply(col->get_data()[i], scale, + vec_res[i]); + } else { + size_t scale = int_exp10(-scale_arg); + FunctionRoundingImpl<ScaleMode::Negative>::apply(col->get_data()[i], scale, + vec_res[i]); + } + } + return col_res; + } else if constexpr (IsDecimalNumber<T>) { + const auto* decimal_col = assert_cast<const ColumnDecimal<T>*>(col_general); + + // For truncate, ALWAYS use SAME scale with source Decimal column + const Int32 input_scale = decimal_col->get_scale(); + auto col_res = ColumnDecimal<T>::create(input_row_count, input_scale); + + for (size_t i = 0; i < input_row_count; ++i) { + DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply( + decimal_col->get_element(i).value, input_scale, + col_res->get_element(i).value, col_scale_i32.get_data()[i]); + } + + for (size_t i = 0; i < input_row_count; ++i) { + // For truncate(ColumnDecimal, ColumnInt32), we should always have same scale with source Decimal column + // So we need this check to make sure the result have correct digits count + // + // Case 0: scale_arg <= -(integer part digits count) + // do nothing, because result is 0 + // Case 1: scale_arg <= 0 && scale_arg > -(integer part digits count) + // decimal parts has been erased, so add them back by multiply 10^(scale_arg) + // Case 2: scale_arg > 0 && scale_arg < decimal part digits count + // decimal part now has scale_arg digits, so multiply 10^(input_scale - scal_arg) + // Case 3: scale_arg >= input_scale + // do nothing + const Int32 scale_arg = col_scale_i32.get_data()[i]; + if (scale_arg <= 0) { + col_res->get_element(i).value *= int_exp10(input_scale); + } else if (scale_arg > 0 && scale_arg < input_scale) { + col_res->get_element(i).value *= int_exp10(input_scale - scale_arg); + } + } + + return col_res; + } else { + LOG(FATAL) << "__builtin_unreachable"; + __builtin_unreachable(); + return nullptr; + } + } + + // NOTE: This function is only tested for truncate + // DO NOT USE THIS METHOD FOR OTHER ROUNDING BASED FUNCTION UNTIL YOU KNOW EXACTLY WHAT YOU ARE DOING !!! only test for truncate + static ColumnPtr apply_const_vec(const ColumnConst* const_col_general, Review Comment: warning: function 'apply_const_vec' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp static ColumnPtr apply_const_vec(const ColumnConst* const_col_general, ^ ``` <details> <summary>Additional context</summary> **be/src/vec/functions/round.h:568:** 84 lines including whitespace and comments (threshold 80) ```cpp static ColumnPtr apply_const_vec(const ColumnConst* const_col_general, ^ ``` </details> ########## be/src/vec/functions/function_truncate.h: ########## @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstddef> +#include <functional> +#include <type_traits> +#include <utility> + +#include "common/exception.h" +#include "common/status.h" +#include "olap/olap_common.h" +#include "round.h" +#include "vec/columns/column.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_decimal.h" +#include "vec/columns/column_vector.h" +#include "vec/common/assert_cast.h" +#include "vec/core/call_on_type_index.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_number.h" + +namespace doris::vectorized { + +struct TruncateFloatOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; } +}; + +struct TruncateFloatTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeInt32>()}; + } +}; + +struct TruncateDecimalOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + // All Decimal types are named Decimal, and real scale will be passed as type argument for execute function + // So we can just register Decimal32 here + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0)}; + } +}; + +struct TruncateDecimalTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0), + std::make_shared<DataTypeInt32>()}; + } +}; + +template <typename Impl> +class FunctionTruncate : public FunctionRounding<Impl, RoundingMode::Trunc, TieBreakingMode::Auto> { +public: + static FunctionPtr create() { return std::make_shared<FunctionTruncate>(); } + + ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; } + // SELECT number, truncate(123.345, 1) FROM number("numbers"="10") + // should NOT behave like two column arguments, so we can not use const column default implementation + bool use_default_implementation_for_constants() const override { return false; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + const ColumnWithTypeAndName& column_general = block.get_by_position(arguments[0]); + ColumnPtr res; + + // potential argument types: + // 0. truncate(ColumnConst, ColumnConst) + // 1. truncate(Column), truncate(Column, ColumnConst) + // 2. truncate(Column, Column) + // 3. truncate(ColumnConst, Column) + + if (arguments.size() == 2 && is_column_const(*block.get_by_position(arguments[0]).column) && + is_column_const(*block.get_by_position(arguments[1]).column)) { + // truncate(ColumnConst, ColumnConst) + auto col_general = + assert_cast<const ColumnConst&>(*column_general.column).get_data_column_ptr(); + Int16 scale_arg = 0; + RETURN_IF_ERROR(FunctionTruncate<Impl>::get_scale_arg( + block.get_by_position(arguments[1]), &scale_arg)); + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, + TieBreakingMode::Auto>::apply_vec_const(col_general, + scale_arg); + return true; Review Comment: warning: redundant boolean literal in conditional return statement [readability-simplify-boolean-expr] be/src/vec/functions/function_truncate.h:103: ```diff - if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { - using FieldType = typename DataType::FieldType; - res = Dispatcher<FieldType, RoundingMode::Trunc, - TieBreakingMode::Auto>::apply_vec_const(col_general, - scale_arg); - return true; - } - - return false; + return IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>; ``` ########## be/src/vec/functions/function_truncate.h: ########## @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstddef> +#include <functional> +#include <type_traits> +#include <utility> + +#include "common/exception.h" +#include "common/status.h" +#include "olap/olap_common.h" +#include "round.h" +#include "vec/columns/column.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_decimal.h" +#include "vec/columns/column_vector.h" +#include "vec/common/assert_cast.h" +#include "vec/core/call_on_type_index.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_number.h" + +namespace doris::vectorized { + +struct TruncateFloatOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; } +}; + +struct TruncateFloatTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeInt32>()}; + } +}; + +struct TruncateDecimalOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + // All Decimal types are named Decimal, and real scale will be passed as type argument for execute function + // So we can just register Decimal32 here + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0)}; + } +}; + +struct TruncateDecimalTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0), + std::make_shared<DataTypeInt32>()}; + } +}; + +template <typename Impl> +class FunctionTruncate : public FunctionRounding<Impl, RoundingMode::Trunc, TieBreakingMode::Auto> { +public: + static FunctionPtr create() { return std::make_shared<FunctionTruncate>(); } + + ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; } + // SELECT number, truncate(123.345, 1) FROM number("numbers"="10") + // should NOT behave like two column arguments, so we can not use const column default implementation + bool use_default_implementation_for_constants() const override { return false; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + const ColumnWithTypeAndName& column_general = block.get_by_position(arguments[0]); + ColumnPtr res; + + // potential argument types: + // 0. truncate(ColumnConst, ColumnConst) + // 1. truncate(Column), truncate(Column, ColumnConst) + // 2. truncate(Column, Column) + // 3. truncate(ColumnConst, Column) + + if (arguments.size() == 2 && is_column_const(*block.get_by_position(arguments[0]).column) && + is_column_const(*block.get_by_position(arguments[1]).column)) { + // truncate(ColumnConst, ColumnConst) + auto col_general = + assert_cast<const ColumnConst&>(*column_general.column).get_data_column_ptr(); + Int16 scale_arg = 0; + RETURN_IF_ERROR(FunctionTruncate<Impl>::get_scale_arg( + block.get_by_position(arguments[1]), &scale_arg)); + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, + TieBreakingMode::Auto>::apply_vec_const(col_general, + scale_arg); + return true; + } + + return false; + }; + +#if !defined(__SSE4_1__) && !defined(__aarch64__) + /// In case of "nearbyint" function is used, we should ensure the expected rounding mode for the Banker's rounding. + /// Actually it is by default. But we will set it just in case. + + if constexpr (rounding_mode == RoundingMode::Round) { + if (0 != fesetround(FE_TONEAREST)) { + return Status::InvalidArgument("Cannot set floating point rounding mode"); + } + } +#endif + + if (!call_on_index_and_data_type<void>(column_general.type->get_type_id(), call)) { + return Status::InvalidArgument("Invalid argument type {} for function {}", + column_general.type->get_name(), "truncate"); + } + // Important, make sure the result column has the same size as the input column + res = ColumnConst::create(std::move(res), input_rows_count); + } else if (arguments.size() == 1 || + (arguments.size() == 2 && + is_column_const(*block.get_by_position(arguments[1]).column))) { + // truncate(Column) or truncate(Column, ColumnConst) + Int16 scale_arg = 0; + if (arguments.size() == 2) { + RETURN_IF_ERROR(FunctionTruncate<Impl>::get_scale_arg( + block.get_by_position(arguments[1]), &scale_arg)); + } + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, TieBreakingMode::Auto>:: + apply_vec_const(column_general.column.get(), scale_arg); + return true; Review Comment: warning: redundant boolean literal in conditional return statement [readability-simplify-boolean-expr] be/src/vec/functions/function_truncate.h:145: ```diff - if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { - using FieldType = typename DataType::FieldType; - res = Dispatcher<FieldType, RoundingMode::Trunc, TieBreakingMode::Auto>:: - apply_vec_const(column_general.column.get(), scale_arg); - return true; - } - - return false; + return IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>; ``` ########## be/src/vec/functions/function_uuid.cpp: ########## @@ -0,0 +1,213 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cctype> +#include <cstddef> +#include <cstring> +#include <memory> +#include <utility> + +#include "common/status.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" +#include "vec/core/block.h" +#include "vec/core/column_numbers.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/function.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris { +class FunctionContext; +} // namespace doris + +namespace doris::vectorized { +constexpr static std::array<int, 5> SPLIT_POS = {8, 13, 18, 23, 36}; // 8-4-4-4-12 +constexpr static char DELIMITER = '-'; + +class FunctionUuidtoInt : public IFunction { +public: + static constexpr auto name = "uuid_to_int"; + + static FunctionPtr create() { return std::make_shared<FunctionUuidtoInt>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeInt128>()); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { Review Comment: warning: method 'execute_impl' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { ``` ########## be/test/vec/function/function_truncate_decimal_test.cpp: ########## @@ -0,0 +1,370 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest-message.h> Review Comment: warning: 'gtest/gtest-message.h' file not found [clang-diagnostic-error] ```cpp #include <gtest/gtest-message.h> ^ ``` ########## be/src/vec/functions/function_uuid.cpp: ########## @@ -0,0 +1,213 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cctype> +#include <cstddef> +#include <cstring> +#include <memory> +#include <utility> + +#include "common/status.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" +#include "vec/core/block.h" +#include "vec/core/column_numbers.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/function.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris { +class FunctionContext; +} // namespace doris + +namespace doris::vectorized { +constexpr static std::array<int, 5> SPLIT_POS = {8, 13, 18, 23, 36}; // 8-4-4-4-12 +constexpr static char DELIMITER = '-'; + +class FunctionUuidtoInt : public IFunction { +public: + static constexpr auto name = "uuid_to_int"; + + static FunctionPtr create() { return std::make_shared<FunctionUuidtoInt>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeInt128>()); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + const auto& arg_column = + assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column); + + auto result_column = ColumnInt128::create(input_rows_count); + auto& result_data = result_column->get_data(); + auto null_column = ColumnUInt8::create(input_rows_count); + auto& null_map = null_column->get_data(); + + for (int row = 0; row < input_rows_count; row++) { + auto str = arg_column.get_data_at(row); + const auto* data = str.data; + Int128* result_cell = &result_data[row]; + *result_cell = 0; + null_map[row] = false; + + if (str.size == 36) { + if (data[SPLIT_POS[0]] != DELIMITER || data[SPLIT_POS[1]] != DELIMITER || + data[SPLIT_POS[2]] != DELIMITER || data[SPLIT_POS[3]] != DELIMITER) { + null_map[row] = true; + continue; + } + char new_data[32]; + memset(new_data, 0, sizeof(new_data)); + // ignore '-' + memcpy(new_data, data, 8); + memcpy(new_data + 8, data + SPLIT_POS[0] + 1, 4); + memcpy(new_data + 12, data + SPLIT_POS[1] + 1, 4); + memcpy(new_data + 16, data + SPLIT_POS[2] + 1, 4); + memcpy(new_data + 20, data + SPLIT_POS[3] + 1, 12); + + if (!serialize(new_data, (char*)result_cell, 32)) { + null_map[row] = true; + continue; + } + } else if (str.size == 32) { + if (!serialize(data, (char*)result_cell, 32)) { + null_map[row] = true; + continue; + } + } else { + null_map[row] = true; + continue; + } + } + + block.replace_by_position( + result, ColumnNullable::create(std::move(result_column), std::move(null_column))); + return Status::OK(); + } + + // use char* to write dst is the only legal way by 'restrict aliasing rule' + static bool serialize(const char* __restrict src, char* __restrict dst, size_t length) { + char target; // 8bit, contains 2 char input + auto translate = [&target](const char ch) { + if (isdigit(ch)) { + target += ch - '0'; + } else if (ch >= 'a' && ch <= 'f') { + target += ch - 'a' + 10; + } else if (ch >= 'A' && ch <= 'F') { + target += ch - 'A' + 10; + } else { + return false; + } + return true; + }; + + bool ok = true; + for (size_t i = 0; i < length; i += 2, src++, dst++) { + target = 0; + if (!translate(*src)) { + ok = false; // dont break for auto-simd + } + + src++; + target <<= 4; + if (!translate(*src)) { + ok = false; + } + *dst = target; + } + + return ok; + } +}; + +class FunctionInttoUuid : public IFunction { +public: + static constexpr auto name = "int_to_uuid"; + + static FunctionPtr create() { return std::make_shared<FunctionInttoUuid>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { Review Comment: warning: method 'get_return_type_impl' can be made static [readability-convert-member-functions-to-static] ```suggestion static DataTypePtr get_return_type_impl(const DataTypes& arguments) override { ``` ########## be/src/vec/functions/function_truncate.h: ########## @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstddef> +#include <functional> +#include <type_traits> +#include <utility> + +#include "common/exception.h" +#include "common/status.h" +#include "olap/olap_common.h" +#include "round.h" +#include "vec/columns/column.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_decimal.h" +#include "vec/columns/column_vector.h" +#include "vec/common/assert_cast.h" +#include "vec/core/call_on_type_index.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_number.h" + +namespace doris::vectorized { + +struct TruncateFloatOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; } +}; + +struct TruncateFloatTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeInt32>()}; + } +}; + +struct TruncateDecimalOneArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + // All Decimal types are named Decimal, and real scale will be passed as type argument for execute function + // So we can just register Decimal32 here + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0)}; + } +}; + +struct TruncateDecimalTwoArgImpl { + static constexpr auto name = "truncate"; + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeDecimal<Decimal32>>(9, 0), + std::make_shared<DataTypeInt32>()}; + } +}; + +template <typename Impl> +class FunctionTruncate : public FunctionRounding<Impl, RoundingMode::Trunc, TieBreakingMode::Auto> { +public: + static FunctionPtr create() { return std::make_shared<FunctionTruncate>(); } + + ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; } + // SELECT number, truncate(123.345, 1) FROM number("numbers"="10") + // should NOT behave like two column arguments, so we can not use const column default implementation + bool use_default_implementation_for_constants() const override { return false; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + const ColumnWithTypeAndName& column_general = block.get_by_position(arguments[0]); + ColumnPtr res; + + // potential argument types: + // 0. truncate(ColumnConst, ColumnConst) + // 1. truncate(Column), truncate(Column, ColumnConst) + // 2. truncate(Column, Column) + // 3. truncate(ColumnConst, Column) + + if (arguments.size() == 2 && is_column_const(*block.get_by_position(arguments[0]).column) && + is_column_const(*block.get_by_position(arguments[1]).column)) { + // truncate(ColumnConst, ColumnConst) + auto col_general = + assert_cast<const ColumnConst&>(*column_general.column).get_data_column_ptr(); + Int16 scale_arg = 0; + RETURN_IF_ERROR(FunctionTruncate<Impl>::get_scale_arg( + block.get_by_position(arguments[1]), &scale_arg)); + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, + TieBreakingMode::Auto>::apply_vec_const(col_general, + scale_arg); + return true; + } + + return false; + }; + +#if !defined(__SSE4_1__) && !defined(__aarch64__) + /// In case of "nearbyint" function is used, we should ensure the expected rounding mode for the Banker's rounding. + /// Actually it is by default. But we will set it just in case. + + if constexpr (rounding_mode == RoundingMode::Round) { + if (0 != fesetround(FE_TONEAREST)) { + return Status::InvalidArgument("Cannot set floating point rounding mode"); + } + } +#endif + + if (!call_on_index_and_data_type<void>(column_general.type->get_type_id(), call)) { + return Status::InvalidArgument("Invalid argument type {} for function {}", + column_general.type->get_name(), "truncate"); + } + // Important, make sure the result column has the same size as the input column + res = ColumnConst::create(std::move(res), input_rows_count); + } else if (arguments.size() == 1 || + (arguments.size() == 2 && + is_column_const(*block.get_by_position(arguments[1]).column))) { + // truncate(Column) or truncate(Column, ColumnConst) + Int16 scale_arg = 0; + if (arguments.size() == 2) { + RETURN_IF_ERROR(FunctionTruncate<Impl>::get_scale_arg( + block.get_by_position(arguments[1]), &scale_arg)); + } + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, TieBreakingMode::Auto>:: + apply_vec_const(column_general.column.get(), scale_arg); + return true; + } + + return false; + }; +#if !defined(__SSE4_1__) && !defined(__aarch64__) + /// In case of "nearbyint" function is used, we should ensure the expected rounding mode for the Banker's rounding. + /// Actually it is by default. But we will set it just in case. + + if constexpr (rounding_mode == RoundingMode::Round) { + if (0 != fesetround(FE_TONEAREST)) { + return Status::InvalidArgument("Cannot set floating point rounding mode"); + } + } +#endif + + if (!call_on_index_and_data_type<void>(column_general.type->get_type_id(), call)) { + return Status::InvalidArgument("Invalid argument type {} for function {}", + column_general.type->get_name(), "truncate"); + } + + } else if (is_column_const(*block.get_by_position(arguments[0]).column)) { + // truncate(ColumnConst, Column) + const ColumnWithTypeAndName& column_scale = block.get_by_position(arguments[1]); + const ColumnConst& const_col_general = + assert_cast<const ColumnConst&>(*column_general.column); + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, TieBreakingMode::Auto>:: + apply_const_vec(&const_col_general, column_scale.column.get()); + return true; + } + + return false; + }; + +#if !defined(__SSE4_1__) && !defined(__aarch64__) + /// In case of "nearbyint" function is used, we should ensure the expected rounding mode for the Banker's rounding. + /// Actually it is by default. But we will set it just in case. + + if constexpr (rounding_mode == RoundingMode::Round) { + if (0 != fesetround(FE_TONEAREST)) { + return Status::InvalidArgument("Cannot set floating point rounding mode"); + } + } +#endif + + if (!call_on_index_and_data_type<void>(column_general.type->get_type_id(), call)) { + return Status::InvalidArgument("Invalid argument type {} for function {}", + column_general.type->get_name(), "truncate"); + } + } else { + // truncate(Column, Column) + const ColumnWithTypeAndName& column_scale = block.get_by_position(arguments[1]); + + auto call = [&](const auto& types) -> bool { + using Types = std::decay_t<decltype(types)>; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { + using FieldType = typename DataType::FieldType; + res = Dispatcher<FieldType, RoundingMode::Trunc, TieBreakingMode::Auto>:: + apply_vec_vec(column_general.column.get(), column_scale.column.get()); + return true; Review Comment: warning: redundant boolean literal in conditional return statement [readability-simplify-boolean-expr] be/src/vec/functions/function_truncate.h:213: ```diff - if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) { - using FieldType = typename DataType::FieldType; - res = Dispatcher<FieldType, RoundingMode::Trunc, TieBreakingMode::Auto>:: - apply_vec_vec(column_general.column.get(), column_scale.column.get()); - return true; - } - return false; + return IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>; ``` ########## be/src/vec/functions/function_uuid.cpp: ########## @@ -0,0 +1,213 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cctype> +#include <cstddef> +#include <cstring> +#include <memory> +#include <utility> + +#include "common/status.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" +#include "vec/core/block.h" +#include "vec/core/column_numbers.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/function.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris { +class FunctionContext; +} // namespace doris + +namespace doris::vectorized { +constexpr static std::array<int, 5> SPLIT_POS = {8, 13, 18, 23, 36}; // 8-4-4-4-12 +constexpr static char DELIMITER = '-'; + +class FunctionUuidtoInt : public IFunction { +public: + static constexpr auto name = "uuid_to_int"; + + static FunctionPtr create() { return std::make_shared<FunctionUuidtoInt>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeInt128>()); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + const auto& arg_column = + assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column); + + auto result_column = ColumnInt128::create(input_rows_count); + auto& result_data = result_column->get_data(); + auto null_column = ColumnUInt8::create(input_rows_count); + auto& null_map = null_column->get_data(); + + for (int row = 0; row < input_rows_count; row++) { + auto str = arg_column.get_data_at(row); + const auto* data = str.data; + Int128* result_cell = &result_data[row]; + *result_cell = 0; + null_map[row] = false; + + if (str.size == 36) { + if (data[SPLIT_POS[0]] != DELIMITER || data[SPLIT_POS[1]] != DELIMITER || + data[SPLIT_POS[2]] != DELIMITER || data[SPLIT_POS[3]] != DELIMITER) { + null_map[row] = true; + continue; + } + char new_data[32]; + memset(new_data, 0, sizeof(new_data)); + // ignore '-' + memcpy(new_data, data, 8); + memcpy(new_data + 8, data + SPLIT_POS[0] + 1, 4); + memcpy(new_data + 12, data + SPLIT_POS[1] + 1, 4); + memcpy(new_data + 16, data + SPLIT_POS[2] + 1, 4); + memcpy(new_data + 20, data + SPLIT_POS[3] + 1, 12); + + if (!serialize(new_data, (char*)result_cell, 32)) { + null_map[row] = true; + continue; + } + } else if (str.size == 32) { + if (!serialize(data, (char*)result_cell, 32)) { + null_map[row] = true; + continue; + } + } else { + null_map[row] = true; + continue; + } + } + + block.replace_by_position( + result, ColumnNullable::create(std::move(result_column), std::move(null_column))); + return Status::OK(); + } + + // use char* to write dst is the only legal way by 'restrict aliasing rule' + static bool serialize(const char* __restrict src, char* __restrict dst, size_t length) { + char target; // 8bit, contains 2 char input + auto translate = [&target](const char ch) { + if (isdigit(ch)) { + target += ch - '0'; + } else if (ch >= 'a' && ch <= 'f') { + target += ch - 'a' + 10; + } else if (ch >= 'A' && ch <= 'F') { + target += ch - 'A' + 10; + } else { + return false; + } + return true; + }; + + bool ok = true; + for (size_t i = 0; i < length; i += 2, src++, dst++) { + target = 0; + if (!translate(*src)) { + ok = false; // dont break for auto-simd + } + + src++; + target <<= 4; + if (!translate(*src)) { + ok = false; + } + *dst = target; + } + + return ok; + } +}; + +class FunctionInttoUuid : public IFunction { +public: + static constexpr auto name = "int_to_uuid"; + + static FunctionPtr create() { return std::make_shared<FunctionInttoUuid>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared<DataTypeString>(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { Review Comment: warning: method 'execute_impl' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org