github-actions[bot] commented on code in PR #36289:
URL: https://github.com/apache/doris/pull/36289#discussion_r1639151156


##########
be/src/util/bit_util.h:
##########
@@ -98,6 +98,25 @@ class BitUtil {
         return (v << n) >> n;
     }
 
+    template <typename T>
+    static std::string IntToByteBuffer(T input) {
+        std::string buffer;
+        T value = input;
+        for (int i = 0; i < sizeof(value); ++i) {
+            // Applies a mask for a byte range on the input.
+            char value_to_save = value & 0XFF;
+            buffer.push_back(value_to_save);
+            // Remove the just processed part from the input so that we can 
exit early if there
+            // is nothing left to process.
+            value >>= 8;
+            if (value == 0 && value_to_save >= 0) break;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
               if (value == 0 && value_to_save >= 0) { break;
   }
   ```
   



##########
be/src/util/bit_util.h:
##########
@@ -98,6 +98,25 @@
         return (v << n) >> n;
     }
 
+    template <typename T>
+    static std::string IntToByteBuffer(T input) {
+        std::string buffer;
+        T value = input;
+        for (int i = 0; i < sizeof(value); ++i) {
+            // Applies a mask for a byte range on the input.
+            char value_to_save = value & 0XFF;
+            buffer.push_back(value_to_save);
+            // Remove the just processed part from the input so that we can 
exit early if there
+            // is nothing left to process.
+            value >>= 8;
+            if (value == 0 && value_to_save >= 0) break;
+            if (value == -1 && value_to_save < 0) break;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
               if (value == -1 && value_to_save < 0) { break;
   }
   ```
   



##########
be/src/vec/sink/writer/iceberg/partition_transformers.h:
##########
@@ -43,38 +43,1231 @@
             const doris::iceberg::PartitionField& field, const TypeDescriptor& 
source_type);
 };
 
+class PartitionColumnTransformUtils {
+public:
+    static DateV2Value<DateV2ValueType>& epoch_date() {
+        static DateV2Value<DateV2ValueType> epoch_date;
+        static bool initialized = false;
+        if (!initialized) {
+            epoch_date.from_date_str("1970-01-01 00:00:00", 19);
+            initialized = true;
+        }
+        return epoch_date;
+    }
+
+    static DateV2Value<DateTimeV2ValueType>& epoch_datetime() {
+        static DateV2Value<DateTimeV2ValueType> epoch_datetime;
+        static bool initialized = false;
+        if (!initialized) {
+            epoch_datetime.from_date_str("1970-01-01 00:00:00", 19);
+            initialized = true;
+        }
+        return epoch_datetime;
+    }
+
+    static std::string human_year(int year_ordinal) {
+        auto year = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::years(year_ordinal))))
+                            .year();
+        return std::to_string(static_cast<int>(year));
+    }
+
+    static std::string human_month(int month_ordinal) {
+        auto ymd = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + 
std::chrono::months(month_ordinal))));
+        return fmt::format("{:04d}-{:02d}", static_cast<int>(ymd.year()), 
static_cast<unsigned>(ymd.month()));
+    }
+
+    static std::string human_day(int day_ordinal) {
+        auto ymd = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::days(day_ordinal))));
+        return fmt::format("{:04d}-{:02d}-{:02d}", 
static_cast<int>(ymd.year()), static_cast<unsigned>(ymd.month()), 
+                            static_cast<unsigned>(ymd.day()));
+    }
+
+
+    static std::string human_hour(int hour_ordinal) {
+        int day_value = hour_ordinal / 24;
+        int housr_value = hour_ordinal % 24;
+        auto ymd = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::days(day_value))));
+        return fmt::format("{:04d}-{:02d}-{:02d}-{:02d}", 
static_cast<int>(ymd.year()), static_cast<unsigned>(ymd.month()), 
+                           static_cast<unsigned>(ymd.day()), housr_value);
+    }
+
+
+private:
+    static const std::chrono::time_point<std::chrono::system_clock> EPOCH;
+    PartitionColumnTransformUtils() {}
+};
+
 class PartitionColumnTransform {
 public:
     PartitionColumnTransform() = default;
 
     virtual ~PartitionColumnTransform() = default;
 
-    virtual bool preserves_non_null() const { return false; }
-
-    virtual bool monotonic() const { return true; }
-
-    virtual bool temporal() const { return false; }
+    virtual std::string name() const;
 
     virtual const TypeDescriptor& get_result_type() const = 0;
 
-    virtual bool is_void() const { return false; }
-
-    virtual ColumnWithTypeAndName apply(Block& block, int idx) = 0;
+    virtual ColumnWithTypeAndName apply(Block& block, int column_pos) = 0;
 
     virtual std::string to_human_string(const TypeDescriptor& type, const 
std::any& value) const;
+
+    virtual std::string get_partition_value(const TypeDescriptor& type, const 
std::any& value) const;
 };
 
 class IdentityPartitionColumnTransform : public PartitionColumnTransform {
 public:
     IdentityPartitionColumnTransform(const TypeDescriptor& source_type)
             : _source_type(source_type) {}
 
-    virtual const TypeDescriptor& get_result_type() const { return 
_source_type; }
+    std::string name() const override { return "Identity"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+        return {column_with_type_and_name.column, 
column_with_type_and_name.type,
+                column_with_type_and_name.name};
+    }
+
+private:
+    TypeDescriptor _source_type;
+};
+
+class StringTruncatePartitionColumnTransform : public PartitionColumnTransform 
{
+public:
+    StringTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "StringTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        auto int_type = std::make_shared<DataTypeInt32>();
+        size_t num_columns_without_result = block.columns();
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr string_column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            string_column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            string_column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        block.replace_by_position(column_pos, std::move(string_column_ptr));
+        block.insert(
+                {int_type->create_column_const(block.rows(), to_field(1)), 
int_type, "const 1"});
+        block.insert({int_type->create_column_const(block.rows(), 
to_field(_width)), int_type,
+                      fmt::format("const {}", _width)});
+        block.insert({nullptr, std::make_shared<DataTypeString>(), "result"});
+        ColumnNumbers temp_arguments(3);
+        temp_arguments[0] = column_pos;                     // str column
+        temp_arguments[1] = num_columns_without_result;     // pos
+        temp_arguments[2] = num_columns_without_result + 1; // width
+        size_t result_column_id = num_columns_without_result + 2;
+
+        SubstringUtil::substring_execute(block, temp_arguments, 
result_column_id, block.rows());
+        if (is_nullable) {
+            auto res_column = 
ColumnNullable::create(block.get_by_position(result_column_id).column,
+                                                     null_map_column_ptr);
+            Block::erase_useless_column(&block, num_columns_without_result);
+            return {std::move(res_column),
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                    column_with_type_and_name.name};
+        } else {
+            auto res_column = block.get_by_position(result_column_id).column;
+            Block::erase_useless_column(&block, num_columns_without_result);
+            return {std::move(res_column),
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                    column_with_type_and_name.name};
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+class IntegerTruncatePartitionColumnTransform : public 
PartitionColumnTransform {
+public:
+    IntegerTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "IntegerTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        if (const ColumnInt32* col_integer = 
check_and_get_column<ColumnInt32>(column_ptr)) {
+            auto col_res = ColumnInt32::create();
+            ColumnInt32::Container& out_data = col_res->get_data();
+            out_data.resize(col_integer->get_data().size());
+            const ColumnInt32::Container& in_data = col_integer->get_data();
+            const int* end_in = in_data.data() + in_data.size();
+
+            const Int32* __restrict p_in = in_data.data();
+            Int32* __restrict p_out = out_data.data();
+
+            while (p_in < end_in) {
+                *p_out = *p_in - ((*p_in % _width) + _width) % _width;
+                ++p_in;
+                ++p_out;
+            }
+            if (is_nullable) {
+                auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+                return {std::move(res_column),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                        column_with_type_and_name.name};
+            } else {
+                return {std::move(col_res),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                        column_with_type_and_name.name};
+            }
+        } else if (auto col_right_const = 
check_and_get_column_const<ColumnInt32>(column_ptr)) {
+               throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "IntegerTruncatePartitionColumnTransform  
transform partition error use column_pos {} ", column_pos);
+        } else {
+           throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "IntegerTruncatePartitionColumnTransform  
transform partition error use column_pos {} ", column_pos);
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+template <typename T>
+class DecimalTruncatePartitionColumnTransform : public 
PartitionColumnTransform {
+public:
+    DecimalTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "DecimalTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+
+        const auto* const decimal_col = 
check_and_get_column<ColumnDecimal<T>>(column_ptr);
+        const auto& vec_src = decimal_col->get_data();
+
+        auto col_res = ColumnDecimal<T>::create(vec_src.size(), 
decimal_col->get_scale());
+        auto& vec_res = col_res->get_data();
+
+        const typename T::NativeType* __restrict p_in =
+                reinterpret_cast<const T::NativeType*>(vec_src.data());
+        const typename T::NativeType* end_in =
+                reinterpret_cast<const T::NativeType*>(vec_src.data()) + 
vec_src.size();
+        typename T::NativeType* __restrict p_out = 
reinterpret_cast<T::NativeType*>(vec_res.data());
+
+        while (p_in < end_in) {
+            typename T::NativeType remainder = ((*p_in % _width) + _width) % 
_width;
+            *p_out = *p_in - remainder;
+            ++p_in;
+            ++p_out;
+        }
+
+        if (is_nullable) {
+            auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+            return {res_column,
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                    column_with_type_and_name.name};
+        } else {
+            return {std::move(col_res),
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                    column_with_type_and_name.name};
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+class IntBucketPartitionColumnTransform : public PartitionColumnTransform {
+public:
+    IntBucketPartitionColumnTransform(const TypeDescriptor& source_type, int 
bucket_num)
+            : _source_type(source_type), _bucket_num(bucket_num), 
_target_type(TYPE_INT) {}
+
+    std::string name() const override { return "IntBucket"; }
+    
+    const TypeDescriptor& get_result_type() const override { return 
_target_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        if (const ColumnInt32* col_integer = 
check_and_get_column<ColumnInt32>(column_ptr)) {
+            auto col_res = ColumnInt32::create();
+            ColumnInt32::Container& out_data = col_res->get_data();
+            out_data.resize(col_integer->get_data().size());
+
+            const ColumnInt32::Container& in_data = col_integer->get_data();
+            const int* end_in = in_data.data() + in_data.size();
+
+            const Int32* __restrict p_in = in_data.data();
+            Int32* __restrict p_out = out_data.data();
+
+            while (p_in < end_in) {
+                Int64 long_value = static_cast<Int64>(*p_in);

Review Comment:
   warning: use auto when initializing with a cast to avoid duplicating the 
type name [modernize-use-auto]
   
   ```suggestion
                   auto long_value = static_cast<Int64>(*p_in);
   ```
   



##########
be/src/vec/sink/writer/iceberg/partition_transformers .cpp:
##########
@@ -0,0 +1,281 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/sink/writer/iceberg/partition_transformers.h"
+
+#include <any>
+
+#include "vec/core/types.h"
+#include "vec/exec/format/table/iceberg/partition_spec.h"
+
+namespace doris {
+namespace vectorized {

Review Comment:
   warning: nested namespaces can be concatenated 
[modernize-concat-nested-namespaces]
   
   ```suggestion
   namespace doris::vectorized {
   ```
   
   be/src/vec/sink/writer/iceberg/partition_transformers .cpp:279:
   ```diff
   - } // namespace vectorized
   - } // namespace doris
   + } // namespace doris
   ```
   



##########
be/src/vec/sink/writer/iceberg/partition_transformers.h:
##########
@@ -43,38 +43,1231 @@ class PartitionColumnTransforms {
             const doris::iceberg::PartitionField& field, const TypeDescriptor& 
source_type);
 };
 
+class PartitionColumnTransformUtils {
+public:
+    static DateV2Value<DateV2ValueType>& epoch_date() {
+        static DateV2Value<DateV2ValueType> epoch_date;
+        static bool initialized = false;
+        if (!initialized) {
+            epoch_date.from_date_str("1970-01-01 00:00:00", 19);
+            initialized = true;
+        }
+        return epoch_date;
+    }
+
+    static DateV2Value<DateTimeV2ValueType>& epoch_datetime() {
+        static DateV2Value<DateTimeV2ValueType> epoch_datetime;
+        static bool initialized = false;
+        if (!initialized) {
+            epoch_datetime.from_date_str("1970-01-01 00:00:00", 19);
+            initialized = true;
+        }
+        return epoch_datetime;
+    }
+
+    static std::string human_year(int year_ordinal) {
+        auto year = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::years(year_ordinal))))
+                            .year();
+        return std::to_string(static_cast<int>(year));
+    }
+
+    static std::string human_month(int month_ordinal) {
+        auto ymd = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + 
std::chrono::months(month_ordinal))));
+        return fmt::format("{:04d}-{:02d}", static_cast<int>(ymd.year()), 
static_cast<unsigned>(ymd.month()));
+    }
+
+    static std::string human_day(int day_ordinal) {
+        auto ymd = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::days(day_ordinal))));
+        return fmt::format("{:04d}-{:02d}-{:02d}", 
static_cast<int>(ymd.year()), static_cast<unsigned>(ymd.month()), 
+                            static_cast<unsigned>(ymd.day()));
+    }
+
+
+    static std::string human_hour(int hour_ordinal) {
+        int day_value = hour_ordinal / 24;
+        int housr_value = hour_ordinal % 24;
+        auto ymd = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::days(day_value))));
+        return fmt::format("{:04d}-{:02d}-{:02d}-{:02d}", 
static_cast<int>(ymd.year()), static_cast<unsigned>(ymd.month()), 
+                           static_cast<unsigned>(ymd.day()), housr_value);
+    }
+
+
+private:
+    static const std::chrono::time_point<std::chrono::system_clock> EPOCH;
+    PartitionColumnTransformUtils() {}

Review Comment:
   warning: use '= default' to define a trivial default constructor 
[modernize-use-equals-default]
   
   ```suggestion
       PartitionColumnTransformUtils() = default;
   ```
   



##########
be/src/vec/sink/writer/iceberg/partition_transformers .cpp:
##########
@@ -0,0 +1,281 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/sink/writer/iceberg/partition_transformers.h"
+
+#include <any>
+
+#include "vec/core/types.h"
+#include "vec/exec/format/table/iceberg/partition_spec.h"
+
+namespace doris {
+namespace vectorized {
+
+const std::chrono::time_point<std::chrono::system_clock> 
PartitionColumnTransformUtils::EPOCH =
+        std::chrono::system_clock::from_time_t(0);
+
+std::unique_ptr<PartitionColumnTransform> PartitionColumnTransforms::create(

Review Comment:
   warning: function 'create' exceeds recommended size/complexity thresholds 
[readability-function-size]
   ```cpp
   std::unique_ptr<PartitionColumnTransform> PartitionColumnTransforms::create(
                                                                        ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/vec/sink/writer/iceberg/partition_transformers .cpp:30:** 158 lines 
including whitespace and comments (threshold 80)
   ```cpp
   std::unique_ptr<PartitionColumnTransform> PartitionColumnTransforms::create(
                                                                        ^
   ```
   
   </details>
   



##########
be/src/vec/sink/writer/iceberg/partition_transformers.h:
##########
@@ -43,38 +43,1231 @@
             const doris::iceberg::PartitionField& field, const TypeDescriptor& 
source_type);
 };
 
+class PartitionColumnTransformUtils {
+public:
+    static DateV2Value<DateV2ValueType>& epoch_date() {
+        static DateV2Value<DateV2ValueType> epoch_date;
+        static bool initialized = false;
+        if (!initialized) {
+            epoch_date.from_date_str("1970-01-01 00:00:00", 19);
+            initialized = true;
+        }
+        return epoch_date;
+    }
+
+    static DateV2Value<DateTimeV2ValueType>& epoch_datetime() {
+        static DateV2Value<DateTimeV2ValueType> epoch_datetime;
+        static bool initialized = false;
+        if (!initialized) {
+            epoch_datetime.from_date_str("1970-01-01 00:00:00", 19);
+            initialized = true;
+        }
+        return epoch_datetime;
+    }
+
+    static std::string human_year(int year_ordinal) {
+        auto year = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::years(year_ordinal))))
+                            .year();
+        return std::to_string(static_cast<int>(year));
+    }
+
+    static std::string human_month(int month_ordinal) {
+        auto ymd = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + 
std::chrono::months(month_ordinal))));
+        return fmt::format("{:04d}-{:02d}", static_cast<int>(ymd.year()), 
static_cast<unsigned>(ymd.month()));
+    }
+
+    static std::string human_day(int day_ordinal) {
+        auto ymd = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::days(day_ordinal))));
+        return fmt::format("{:04d}-{:02d}-{:02d}", 
static_cast<int>(ymd.year()), static_cast<unsigned>(ymd.month()), 
+                            static_cast<unsigned>(ymd.day()));
+    }
+
+
+    static std::string human_hour(int hour_ordinal) {
+        int day_value = hour_ordinal / 24;
+        int housr_value = hour_ordinal % 24;
+        auto ymd = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::days(day_value))));
+        return fmt::format("{:04d}-{:02d}-{:02d}-{:02d}", 
static_cast<int>(ymd.year()), static_cast<unsigned>(ymd.month()), 
+                           static_cast<unsigned>(ymd.day()), housr_value);
+    }
+
+
+private:
+    static const std::chrono::time_point<std::chrono::system_clock> EPOCH;
+    PartitionColumnTransformUtils() {}
+};
+
 class PartitionColumnTransform {
 public:
     PartitionColumnTransform() = default;
 
     virtual ~PartitionColumnTransform() = default;
 
-    virtual bool preserves_non_null() const { return false; }
-
-    virtual bool monotonic() const { return true; }
-
-    virtual bool temporal() const { return false; }
+    virtual std::string name() const;
 
     virtual const TypeDescriptor& get_result_type() const = 0;
 
-    virtual bool is_void() const { return false; }
-
-    virtual ColumnWithTypeAndName apply(Block& block, int idx) = 0;
+    virtual ColumnWithTypeAndName apply(Block& block, int column_pos) = 0;
 
     virtual std::string to_human_string(const TypeDescriptor& type, const 
std::any& value) const;
+
+    virtual std::string get_partition_value(const TypeDescriptor& type, const 
std::any& value) const;
 };
 
 class IdentityPartitionColumnTransform : public PartitionColumnTransform {
 public:
     IdentityPartitionColumnTransform(const TypeDescriptor& source_type)
             : _source_type(source_type) {}
 
-    virtual const TypeDescriptor& get_result_type() const { return 
_source_type; }
+    std::string name() const override { return "Identity"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+        return {column_with_type_and_name.column, 
column_with_type_and_name.type,
+                column_with_type_and_name.name};
+    }
+
+private:
+    TypeDescriptor _source_type;
+};
+
+class StringTruncatePartitionColumnTransform : public PartitionColumnTransform 
{
+public:
+    StringTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "StringTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        auto int_type = std::make_shared<DataTypeInt32>();
+        size_t num_columns_without_result = block.columns();
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr string_column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            string_column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            string_column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        block.replace_by_position(column_pos, std::move(string_column_ptr));
+        block.insert(
+                {int_type->create_column_const(block.rows(), to_field(1)), 
int_type, "const 1"});
+        block.insert({int_type->create_column_const(block.rows(), 
to_field(_width)), int_type,
+                      fmt::format("const {}", _width)});
+        block.insert({nullptr, std::make_shared<DataTypeString>(), "result"});
+        ColumnNumbers temp_arguments(3);
+        temp_arguments[0] = column_pos;                     // str column
+        temp_arguments[1] = num_columns_without_result;     // pos
+        temp_arguments[2] = num_columns_without_result + 1; // width
+        size_t result_column_id = num_columns_without_result + 2;
+
+        SubstringUtil::substring_execute(block, temp_arguments, 
result_column_id, block.rows());
+        if (is_nullable) {
+            auto res_column = 
ColumnNullable::create(block.get_by_position(result_column_id).column,
+                                                     null_map_column_ptr);
+            Block::erase_useless_column(&block, num_columns_without_result);
+            return {std::move(res_column),
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                    column_with_type_and_name.name};
+        } else {
+            auto res_column = block.get_by_position(result_column_id).column;
+            Block::erase_useless_column(&block, num_columns_without_result);
+            return {std::move(res_column),
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                    column_with_type_and_name.name};
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+class IntegerTruncatePartitionColumnTransform : public 
PartitionColumnTransform {
+public:
+    IntegerTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "IntegerTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        if (const ColumnInt32* col_integer = 
check_and_get_column<ColumnInt32>(column_ptr)) {
+            auto col_res = ColumnInt32::create();
+            ColumnInt32::Container& out_data = col_res->get_data();
+            out_data.resize(col_integer->get_data().size());
+            const ColumnInt32::Container& in_data = col_integer->get_data();
+            const int* end_in = in_data.data() + in_data.size();
+
+            const Int32* __restrict p_in = in_data.data();
+            Int32* __restrict p_out = out_data.data();
+
+            while (p_in < end_in) {
+                *p_out = *p_in - ((*p_in % _width) + _width) % _width;
+                ++p_in;
+                ++p_out;
+            }
+            if (is_nullable) {
+                auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+                return {std::move(res_column),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                        column_with_type_and_name.name};
+            } else {
+                return {std::move(col_res),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                        column_with_type_and_name.name};
+            }
+        } else if (auto col_right_const = 
check_and_get_column_const<ColumnInt32>(column_ptr)) {
+               throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "IntegerTruncatePartitionColumnTransform  
transform partition error use column_pos {} ", column_pos);
+        } else {
+           throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "IntegerTruncatePartitionColumnTransform  
transform partition error use column_pos {} ", column_pos);
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+template <typename T>
+class DecimalTruncatePartitionColumnTransform : public 
PartitionColumnTransform {
+public:
+    DecimalTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "DecimalTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+
+        const auto* const decimal_col = 
check_and_get_column<ColumnDecimal<T>>(column_ptr);
+        const auto& vec_src = decimal_col->get_data();
+
+        auto col_res = ColumnDecimal<T>::create(vec_src.size(), 
decimal_col->get_scale());
+        auto& vec_res = col_res->get_data();
+
+        const typename T::NativeType* __restrict p_in =
+                reinterpret_cast<const T::NativeType*>(vec_src.data());
+        const typename T::NativeType* end_in =
+                reinterpret_cast<const T::NativeType*>(vec_src.data()) + 
vec_src.size();
+        typename T::NativeType* __restrict p_out = 
reinterpret_cast<T::NativeType*>(vec_res.data());
+
+        while (p_in < end_in) {
+            typename T::NativeType remainder = ((*p_in % _width) + _width) % 
_width;
+            *p_out = *p_in - remainder;
+            ++p_in;
+            ++p_out;
+        }
+
+        if (is_nullable) {
+            auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+            return {res_column,
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                    column_with_type_and_name.name};
+        } else {
+            return {std::move(col_res),
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                    column_with_type_and_name.name};
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+class IntBucketPartitionColumnTransform : public PartitionColumnTransform {
+public:
+    IntBucketPartitionColumnTransform(const TypeDescriptor& source_type, int 
bucket_num)
+            : _source_type(source_type), _bucket_num(bucket_num), 
_target_type(TYPE_INT) {}
+
+    std::string name() const override { return "IntBucket"; }
+    
+    const TypeDescriptor& get_result_type() const override { return 
_target_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        if (const ColumnInt32* col_integer = 
check_and_get_column<ColumnInt32>(column_ptr)) {
+            auto col_res = ColumnInt32::create();
+            ColumnInt32::Container& out_data = col_res->get_data();
+            out_data.resize(col_integer->get_data().size());
+
+            const ColumnInt32::Container& in_data = col_integer->get_data();
+            const int* end_in = in_data.data() + in_data.size();
+
+            const Int32* __restrict p_in = in_data.data();
+            Int32* __restrict p_out = out_data.data();
+
+            while (p_in < end_in) {
+                Int64 long_value = static_cast<Int64>(*p_in);
+                uint32_t hash_value = HashUtil::murmur_hash3_32(&long_value, 
sizeof(long_value), 0);
+
+                *p_out = ((hash_value >> 1) & INT32_MAX) % _bucket_num;
+                ++p_in;
+                ++p_out;
+            }
+            if (is_nullable) {
+                auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+                return {res_column,
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                        column_with_type_and_name.name};
+            } else {
+                return {std::move(col_res),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                        column_with_type_and_name.name};
+            }
+        } else {
+            //assert(0);
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "IntBucketPartitionColumnTransform  
transform partition error use column_pos {} ", column_pos);
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _bucket_num;
+    TypeDescriptor _target_type;
+};
+
+template <typename T>
+class DecimalBucketPartitionColumnTransform : public PartitionColumnTransform {
+public:
+    DecimalBucketPartitionColumnTransform(const TypeDescriptor& source_type, 
int bucket_num)
+            : _source_type(source_type), _bucket_num(bucket_num), 
_target_type(TYPE_INT) {}
+
+    std::string name() const override { return "DecimalBucket"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_target_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        if (const ColumnDecimal<T>* col_decimal =
+                    check_and_get_column<ColumnDecimal<T>>(column_ptr)) {
+            auto col_res = ColumnInt32::create();
+            ColumnInt32::Container& out_data = col_res->get_data();
+            out_data.resize(col_decimal->get_data().size());
+            const auto& vec_src = col_decimal->get_data();
+
+            auto& vec_res = col_res->get_data();
+
+            const typename T::NativeType* __restrict p_in =
+                    reinterpret_cast<const T::NativeType*>(vec_src.data());
+            const typename T::NativeType* end_in =
+                    reinterpret_cast<const T::NativeType*>(vec_src.data()) + 
vec_src.size();
+            typename T::NativeType* __restrict p_out =
+                    reinterpret_cast<T::NativeType*>(vec_res.data());
+
+            while (p_in < end_in) {
+                std::string buffer = BitUtil::IntToByteBuffer(*p_in);
+
+                uint32_t hash_value = HashUtil::murmur_hash3_32(buffer.data(), 
buffer.size(), 0);
+
+                *p_out = ((hash_value >> 1) & INT32_MAX) % _bucket_num;;
+                ++p_in;
+                ++p_out;
+            }
+            if (is_nullable) {
+                auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+                return {std::move(res_column),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                        column_with_type_and_name.name};
+            } else {
+                return {std::move(col_res),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                        column_with_type_and_name.name};
+            }
+        } else {
+           // assert(0);
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "DecimalBucketPartitionColumnTransform  
transform partition error use column_pos {} ", column_pos);
+        }
+    }
+
+    std::string to_human_string(const TypeDescriptor& type, const std::any& 
value) const override{
+         return get_partition_value(type, value);
+    }
+
+    std::string get_partition_value(const TypeDescriptor& type,const std::any& 
value)  const override{
+        if (value.has_value()) {
+            int bucket_indx = 0;
+             try {
+               bucket_indx = std::any_cast<Int32>(value);
+               } catch (std::bad_any_cast& e) {
+               std::cout << "DecimalBucketPartitionColumnTransform parse value 
 error. "<< e.what() << std::endl;
+               }
+            return std::to_string(bucket_indx);
+        } else {
+            return "null";
+        }
+     }
+
+ private:
+    TypeDescriptor _source_type;
+    int _bucket_num;
+    TypeDescriptor _target_type;
+ };
+
+ class DateBucketPartitionColumnTransform : public PartitionColumnTransform {
+ public:
+     DateBucketPartitionColumnTransform(const TypeDescriptor& source_type, int 
bucket_num)
+            : _source_type(source_type), _bucket_num(bucket_num), 
_target_type(TYPE_INT) {}
+
+    std::string name() const override { return "DateBucket"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_target_type; }
+
+     ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+       const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+         ColumnPtr column_ptr;
+         ColumnPtr null_map_column_ptr;
+         bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+             is_nullable = true;
+         } else {
+             column_ptr = column_with_type_and_name.column;
+             is_nullable = false;
+        }
+        if (const ColumnDateV2* col = 
check_and_get_column<ColumnDateV2>(column_ptr)) {
+            auto col_res = ColumnInt32::create();
+            ColumnInt32::Container& out_data = col_res->get_data();
+            out_data.resize(col->get_data().size());
+
+            const ColumnDateV2::Container& in_data = col->get_data();
+            const auto* end_in = in_data.data() + in_data.size();
+
+            const auto* __restrict p_in = in_data.data();
+            auto* __restrict p_out = out_data.data();
+
+            while (p_in < end_in) {
+                DateV2Value<DateV2ValueType> value =
+                        binary_cast<uint32_t, 
DateV2Value<DateV2ValueType>>(*(UInt32*)p_in);
+
+                int32_t days_from_unix_epoch = value.daynr() - 719528;
+                Int64 long_value = static_cast<Int64>(days_from_unix_epoch);

Review Comment:
   warning: use auto when initializing with a cast to avoid duplicating the 
type name [modernize-use-auto]
   
   ```suggestion
                   auto long_value = static_cast<Int64>(days_from_unix_epoch);
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to