github-actions[bot] commented on code in PR #36289:
URL: https://github.com/apache/doris/pull/36289#discussion_r1644468430


##########
be/src/vec/sink/writer/iceberg/partition_transformers.h:
##########
@@ -43,38 +43,1386 @@ class PartitionColumnTransforms {
             const doris::iceberg::PartitionField& field, const TypeDescriptor& 
source_type);
 };
 
+class PartitionColumnTransformUtils {
+public:
+    static DateV2Value<DateV2ValueType>& epoch_date() {
+        static DateV2Value<DateV2ValueType> epoch_date;
+        static bool initialized = false;
+        if (!initialized) {
+            epoch_date.from_date_str("1970-01-01 00:00:00", 19);
+            initialized = true;
+        }
+        return epoch_date;
+    }
+
+    static DateV2Value<DateTimeV2ValueType>& epoch_datetime() {
+        static DateV2Value<DateTimeV2ValueType> epoch_datetime;
+        static bool initialized = false;
+        if (!initialized) {
+            epoch_datetime.from_date_str("1970-01-01 00:00:00", 19);
+            initialized = true;
+        }
+        return epoch_datetime;
+    }
+
+    static std::string human_year(int year_ordinal) {
+        auto year = std::chrono::year_month_day(
+                            
std::chrono::sys_days(std::chrono::floor<std::chrono::days>(
+                                    EPOCH + std::chrono::years(year_ordinal))))
+                            .year();
+        return std::to_string(static_cast<int>(year));
+    }
+
+    static std::string human_month(int month_ordinal) {
+        auto ymd = std::chrono::year_month_day(std::chrono::sys_days(
+                std::chrono::floor<std::chrono::days>(EPOCH + 
std::chrono::months(month_ordinal))));
+        return fmt::format("{:04d}-{:02d}", static_cast<int>(ymd.year()),
+                           static_cast<unsigned>(ymd.month()));
+    }
+
+    static std::string human_day(int day_ordinal) {
+        auto ymd = std::chrono::year_month_day(std::chrono::sys_days(
+                std::chrono::floor<std::chrono::days>(EPOCH + 
std::chrono::days(day_ordinal))));
+        return fmt::format("{:04d}-{:02d}-{:02d}", 
static_cast<int>(ymd.year()),
+                           static_cast<unsigned>(ymd.month()), 
static_cast<unsigned>(ymd.day()));
+    }
+
+    static std::string human_hour(int hour_ordinal) {
+        int day_value = hour_ordinal / 24;
+        int housr_value = hour_ordinal % 24;
+        auto ymd = std::chrono::year_month_day(std::chrono::sys_days(
+                std::chrono::floor<std::chrono::days>(EPOCH + 
std::chrono::days(day_value))));
+        return fmt::format("{:04d}-{:02d}-{:02d}-{:02d}", 
static_cast<int>(ymd.year()),
+                           static_cast<unsigned>(ymd.month()), 
static_cast<unsigned>(ymd.day()),
+                           housr_value);
+    }
+
+private:
+    static const std::chrono::time_point<std::chrono::system_clock> EPOCH;
+    PartitionColumnTransformUtils() = default;
+};
+
 class PartitionColumnTransform {
 public:
     PartitionColumnTransform() = default;
 
     virtual ~PartitionColumnTransform() = default;
 
-    virtual bool preserves_non_null() const { return false; }
-
-    virtual bool monotonic() const { return true; }
-
-    virtual bool temporal() const { return false; }
+    virtual std::string name() const;
 
     virtual const TypeDescriptor& get_result_type() const = 0;
 
-    virtual bool is_void() const { return false; }
-
-    virtual ColumnWithTypeAndName apply(Block& block, int idx) = 0;
+    virtual ColumnWithTypeAndName apply(Block& block, int column_pos) = 0;
 
     virtual std::string to_human_string(const TypeDescriptor& type, const 
std::any& value) const;
+
+    virtual std::string get_partition_value(const TypeDescriptor& type,
+                                            const std::any& value) const;
 };
 
 class IdentityPartitionColumnTransform : public PartitionColumnTransform {
 public:
     IdentityPartitionColumnTransform(const TypeDescriptor& source_type)
             : _source_type(source_type) {}
 
-    virtual const TypeDescriptor& get_result_type() const { return 
_source_type; }
+    std::string name() const override { return "Identity"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+        return {column_with_type_and_name.column, 
column_with_type_and_name.type,
+                column_with_type_and_name.name};
+    }
+
+private:
+    TypeDescriptor _source_type;
+};
+
+class StringTruncatePartitionColumnTransform : public PartitionColumnTransform 
{
+public:
+    StringTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "StringTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        auto int_type = std::make_shared<DataTypeInt32>();
+        size_t num_columns_without_result = block.columns();
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr string_column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            string_column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            string_column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        block.replace_by_position(column_pos, std::move(string_column_ptr));
+        block.insert(
+                {int_type->create_column_const(block.rows(), to_field(1)), 
int_type, "const 1"});
+        block.insert({int_type->create_column_const(block.rows(), 
to_field(_width)), int_type,
+                      fmt::format("const {}", _width)});
+        block.insert({nullptr, std::make_shared<DataTypeString>(), "result"});
+        ColumnNumbers temp_arguments(3);
+        temp_arguments[0] = column_pos;                     // str column
+        temp_arguments[1] = num_columns_without_result;     // pos
+        temp_arguments[2] = num_columns_without_result + 1; // width
+        size_t result_column_id = num_columns_without_result + 2;
+
+        SubstringUtil::substring_execute(block, temp_arguments, 
result_column_id, block.rows());
+        if (is_nullable) {
+            auto res_column = 
ColumnNullable::create(block.get_by_position(result_column_id).column,
+                                                     null_map_column_ptr);
+            Block::erase_useless_column(&block, num_columns_without_result);
+            return {std::move(res_column),
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                    column_with_type_and_name.name};
+        } else {
+            auto res_column = block.get_by_position(result_column_id).column;
+            Block::erase_useless_column(&block, num_columns_without_result);
+            return {std::move(res_column),
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                    column_with_type_and_name.name};
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+class IntegerTruncatePartitionColumnTransform : public 
PartitionColumnTransform {
+public:
+    IntegerTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "IntegerTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        if (const ColumnInt32* col_integer = 
check_and_get_column<ColumnInt32>(column_ptr)) {
+            auto col_res = ColumnInt32::create();
+            ColumnInt32::Container& out_data = col_res->get_data();
+            out_data.resize(col_integer->get_data().size());
+            const ColumnInt32::Container& in_data = col_integer->get_data();
+            const int* end_in = in_data.data() + in_data.size();
+
+            const Int32* __restrict p_in = in_data.data();
+            Int32* __restrict p_out = out_data.data();
+
+            while (p_in < end_in) {
+                *p_out = *p_in - ((*p_in % _width) + _width) % _width;
+                ++p_in;
+                ++p_out;
+            }
+            if (is_nullable) {
+                auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+                return {std::move(res_column),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                        column_with_type_and_name.name};
+            } else {
+                return {std::move(col_res),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                        column_with_type_and_name.name};
+            }
+        } else if (auto col_right_const = 
check_and_get_column_const<ColumnInt32>(column_ptr)) {
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "IntegerTruncatePartitionColumnTransform  
transform partition "
+                                   "error use column_pos {} ",
+                                   column_pos);
+        } else {
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "IntegerTruncatePartitionColumnTransform  
transform partition "
+                                   "error use column_pos {} ",
+                                   column_pos);
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+class BigintTruncatePartitionColumnTransform : public PartitionColumnTransform 
{
+public:
+    BigintTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "BigintTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        if (const ColumnInt64* col_integer = 
check_and_get_column<ColumnInt64>(column_ptr)) {
+            auto col_res = ColumnInt64::create();
+            ColumnInt64::Container& out_data = col_res->get_data();
+            out_data.resize(col_integer->get_data().size());
+            const ColumnInt64::Container& in_data = col_integer->get_data();
+            const Int64* end_in = in_data.data() + in_data.size();
+
+            const Int64* __restrict p_in = in_data.data();
+            Int64* __restrict p_out = out_data.data();
+
+            while (p_in < end_in) {
+                *p_out = *p_in - ((*p_in % _width) + _width) % _width;
+                ++p_in;
+                ++p_out;
+            }
+            if (is_nullable) {
+                auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+                return {std::move(res_column),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                        column_with_type_and_name.name};
+            } else {
+                return {std::move(col_res),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                        column_with_type_and_name.name};
+            }
+        } else if (auto col_right_const = 
check_and_get_column_const<ColumnInt64>(column_ptr)) {
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "BigintTruncatePartitionColumnTransform  
transform partition "
+                                   "error use column_pos {} ",
+                                   column_pos);
+        } else {
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "BigintTruncatePartitionColumnTransform  
transform partition "
+                                   "error use column_pos {} ",
+                                   column_pos);
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+template <typename T>
+class DecimalTruncatePartitionColumnTransform : public 
PartitionColumnTransform {
+public:
+    DecimalTruncatePartitionColumnTransform(const TypeDescriptor& source_type, 
int width)
+            : _source_type(source_type), _width(width) {}
+
+    std::string name() const override { return "DecimalTruncate"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_source_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+
+        const auto* const decimal_col = 
check_and_get_column<ColumnDecimal<T>>(column_ptr);
+        const auto& vec_src = decimal_col->get_data();
+
+        auto col_res = ColumnDecimal<T>::create(vec_src.size(), 
decimal_col->get_scale());
+        auto& vec_res = col_res->get_data();
+
+        const typename T::NativeType* __restrict p_in =
+                reinterpret_cast<const T::NativeType*>(vec_src.data());
+        const typename T::NativeType* end_in =
+                reinterpret_cast<const T::NativeType*>(vec_src.data()) + 
vec_src.size();
+        typename T::NativeType* __restrict p_out = 
reinterpret_cast<T::NativeType*>(vec_res.data());
+
+        while (p_in < end_in) {
+            typename T::NativeType remainder = ((*p_in % _width) + _width) % 
_width;
+            *p_out = *p_in - remainder;
+            ++p_in;
+            ++p_out;
+        }
+
+        if (is_nullable) {
+            auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+            return {res_column,
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                    column_with_type_and_name.name};
+        } else {
+            return {std::move(col_res),
+                    
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                    column_with_type_and_name.name};
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _width;
+};
+
+class IntBucketPartitionColumnTransform : public PartitionColumnTransform {
+public:
+    IntBucketPartitionColumnTransform(const TypeDescriptor& source_type, int 
bucket_num)
+            : _source_type(source_type), _bucket_num(bucket_num), 
_target_type(TYPE_INT) {}
+
+    std::string name() const override { return "IntBucket"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_target_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        if (const ColumnInt32* col_integer = 
check_and_get_column<ColumnInt32>(column_ptr)) {
+            auto col_res = ColumnInt32::create();
+            ColumnInt32::Container& out_data = col_res->get_data();
+            out_data.resize(col_integer->get_data().size());
+
+            const ColumnInt32::Container& in_data = col_integer->get_data();
+            const int* end_in = in_data.data() + in_data.size();
+
+            const Int32* __restrict p_in = in_data.data();
+            Int32* __restrict p_out = out_data.data();
+
+            while (p_in < end_in) {
+                Int64 long_value = static_cast<Int64>(*p_in);
+                uint32_t hash_value = HashUtil::murmur_hash3_32(&long_value, 
sizeof(long_value), 0);
+
+                *p_out = ((hash_value >> 1) & INT32_MAX) % _bucket_num;
+                ++p_in;
+                ++p_out;
+            }
+            if (is_nullable) {
+                auto res_column = ColumnNullable::create(std::move(col_res), 
null_map_column_ptr);
+                return {res_column,
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), true),
+                        column_with_type_and_name.name};
+            } else {
+                return {std::move(col_res),
+                        
DataTypeFactory::instance().create_data_type(get_result_type(), false),
+                        column_with_type_and_name.name};
+            }
+        } else {
+            //assert(0);
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "IntBucketPartitionColumnTransform  
transform partition error "
+                                   "use column_pos {} ",
+                                   column_pos);
+        }
+    }
+
+private:
+    TypeDescriptor _source_type;
+    int _bucket_num;
+    TypeDescriptor _target_type;
+};
+
+class BigintBucketPartitionColumnTransform : public PartitionColumnTransform {
+public:
+    BigintBucketPartitionColumnTransform(const TypeDescriptor& source_type, 
int bucket_num)
+            : _source_type(source_type), _bucket_num(bucket_num), 
_target_type(TYPE_INT) {}
+
+    std::string name() const override { return "BigintBucket"; }
+
+    const TypeDescriptor& get_result_type() const override { return 
_target_type; }
+
+    ColumnWithTypeAndName apply(Block& block, int column_pos) override {
+        const ColumnWithTypeAndName& column_with_type_and_name = 
block.get_by_position(column_pos);
+
+        ColumnPtr column_ptr;
+        ColumnPtr null_map_column_ptr;
+        bool is_nullable = false;
+        if (auto* nullable_column =
+                    
check_and_get_column<ColumnNullable>(column_with_type_and_name.column)) {
+            null_map_column_ptr = nullable_column->get_null_map_column_ptr();
+            column_ptr = nullable_column->get_nested_column_ptr();
+            is_nullable = true;
+        } else {
+            column_ptr = column_with_type_and_name.column;
+            is_nullable = false;
+        }
+        if (const ColumnInt64* col_integer = 
check_and_get_column<ColumnInt64>(column_ptr)) {
+            auto col_res = ColumnInt64::create();
+            ColumnInt64::Container& out_data = col_res->get_data();
+            out_data.resize(col_integer->get_data().size());
+
+            const ColumnInt64::Container& in_data = col_integer->get_data();
+            const Int64* end_in = in_data.data() + in_data.size();
+
+            const Int64* __restrict p_in = in_data.data();
+            Int64* __restrict p_out = out_data.data();
+
+            while (p_in < end_in) {
+                Int64 long_value = static_cast<Int64>(*p_in);

Review Comment:
   warning: use auto when initializing with a cast to avoid duplicating the 
type name [modernize-use-auto]
   
   ```suggestion
                   auto long_value = static_cast<Int64>(*p_in);
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to