github-actions[bot] commented on code in PR #32873: URL: https://github.com/apache/doris/pull/32873#discussion_r1554809578
########## be/src/vec/exec/format/parquet/parquet_column_convert.cpp: ########## @@ -20,67 +20,287 @@ #include <cctz/time_zone.h> #include "vec/columns/column_nullable.h" -namespace doris::vectorized { -namespace ParquetConvert { +namespace doris::vectorized::parquet { const cctz::time_zone ConvertParams::utc0 = cctz::utc_time_zone(); -ColumnPtr get_column(tparquet::Type::type parquet_physical_type, PrimitiveType show_type, - ColumnPtr& doris_column, DataTypePtr& doris_type, bool* need_convert) { - ColumnPtr ans_column = doris_column; - DataTypePtr tmp_data_type; - - switch (parquet_physical_type) { - case tparquet::Type::type::BOOLEAN: - tmp_data_type = std::make_shared<DataTypeUInt8>(); - break; - case tparquet::Type::type::INT32: - tmp_data_type = std::make_shared<DataTypeInt32>(); - break; - case tparquet::Type::type::INT64: - tmp_data_type = std::make_shared<DataTypeInt64>(); - break; - case tparquet::Type::type::FLOAT: - tmp_data_type = std::make_shared<DataTypeFloat32>(); - break; - case tparquet::Type::type::DOUBLE: - tmp_data_type = std::make_shared<DataTypeFloat64>(); - break; - case tparquet::Type::type::BYTE_ARRAY: - case tparquet::Type::type::FIXED_LEN_BYTE_ARRAY: - tmp_data_type = std::make_shared<DataTypeString>(); - break; - case tparquet::Type::type::INT96: - tmp_data_type = std::make_shared<DataTypeInt8>(); - break; +#define FOR_LOGICAL_DECIMAL_TYPES(M) \ + M(TYPE_DECIMALV2) \ + M(TYPE_DECIMAL32) \ + M(TYPE_DECIMAL64) \ + M(TYPE_DECIMAL128I) + +bool PhysicalToLogicalConverter::is_parquet_native_type(PrimitiveType type) { + switch (type) { + case TYPE_BOOLEAN: + case TYPE_INT: + case TYPE_BIGINT: + case TYPE_FLOAT: + case TYPE_DOUBLE: + case TYPE_STRING: + case TYPE_CHAR: + case TYPE_VARCHAR: + return true; + default: + return false; + } +} + +bool PhysicalToLogicalConverter::is_decimal_type(doris::PrimitiveType type) { + switch (type) { + case TYPE_DECIMAL32: + case TYPE_DECIMAL64: + case TYPE_DECIMAL128I: + case TYPE_DECIMALV2: + return true; + default: + return false; + } +} + +ColumnPtr PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type src_physical_type, + TypeDescriptor src_logical_type, + ColumnPtr& dst_logical_column, + const DataTypePtr& dst_logical_type, + bool is_dict_filter) { + if (is_dict_filter) { + src_physical_type = tparquet::Type::INT32; + src_logical_type = TypeDescriptor(PrimitiveType::TYPE_INT); + } + if (is_consistent() && _logical_converter->is_consistent()) { + if (_cached_src_physical_type == nullptr) { + _cached_src_physical_type = DataTypeFactory::instance().create_data_type( + src_logical_type, dst_logical_type->is_nullable()); + } + return dst_logical_column; } - if (tmp_data_type->get_type_id() == remove_nullable(doris_type)->get_type_id()) { - if (tmp_data_type->get_type_id() == TypeIndex::String && - (show_type == PrimitiveType::TYPE_DECIMAL32 || - show_type == PrimitiveType::TYPE_DECIMAL64 || - show_type == PrimitiveType::TYPE_DECIMALV2 || - show_type == PrimitiveType::TYPE_DECIMAL128I)) { - *need_convert = true; - ans_column = tmp_data_type->create_column(); + if (_cached_src_physical_column == nullptr) { + switch (src_physical_type) { + case tparquet::Type::type::BOOLEAN: + _cached_src_physical_type = std::make_shared<DataTypeUInt8>(); + break; + case tparquet::Type::type::INT32: + _cached_src_physical_type = std::make_shared<DataTypeInt32>(); + break; + case tparquet::Type::type::INT64: + _cached_src_physical_type = std::make_shared<DataTypeInt64>(); + break; + case tparquet::Type::type::FLOAT: + _cached_src_physical_type = std::make_shared<DataTypeFloat32>(); + break; + case tparquet::Type::type::DOUBLE: + _cached_src_physical_type = std::make_shared<DataTypeFloat64>(); + break; + case tparquet::Type::type::BYTE_ARRAY: + _cached_src_physical_type = std::make_shared<DataTypeString>(); + break; + case tparquet::Type::type::FIXED_LEN_BYTE_ARRAY: + _cached_src_physical_type = std::make_shared<DataTypeUInt8>(); + break; + case tparquet::Type::type::INT96: + _cached_src_physical_type = std::make_shared<DataTypeInt8>(); + break; + } + _cached_src_physical_column = _cached_src_physical_type->create_column(); + if (dst_logical_type->is_nullable()) { + _cached_src_physical_type = make_nullable(_cached_src_physical_type); + } + } + // remove the old cached data + _cached_src_physical_column->assume_mutable()->clear(); + if (is_consistent()) { + if (dst_logical_type->is_nullable()) { + auto doris_nullable_column = const_cast<ColumnNullable*>( + static_cast<const ColumnNullable*>(dst_logical_column.get())); + _src_logical_column = ColumnNullable::create( + _cached_src_physical_column, doris_nullable_column->get_null_map_column_ptr()); } else { - *need_convert = false; + _src_logical_column = _cached_src_physical_column; } } else { - ans_column = tmp_data_type->create_column(); - *need_convert = true; + _src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_column, + dst_logical_type); } - if (*need_convert && doris_type->is_nullable()) { + if (dst_logical_type->is_nullable()) { // In order to share null map between parquet converted src column and dst column to avoid copying. It is very tricky that will // call mutable function `doris_nullable_column->get_null_map_column_ptr()` which will set `_need_update_has_null = true`. // Because some operations such as agg will call `has_null()` to set `_need_update_has_null = false`. - auto doris_nullable_column = - const_cast<ColumnNullable*>(static_cast<const ColumnNullable*>(doris_column.get())); - ans_column = ColumnNullable::create(ans_column, - doris_nullable_column->get_null_map_column_ptr()); + auto doris_nullable_column = const_cast<ColumnNullable*>( + static_cast<const ColumnNullable*>(dst_logical_column.get())); + return ColumnNullable::create(_cached_src_physical_column, + doris_nullable_column->get_null_map_column_ptr()); + } + + return _cached_src_physical_column; +} + +static void get_decimal_converter(FieldSchema* field_schema, const TypeDescriptor& src_logical_type, Review Comment: warning: function 'get_decimal_converter' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp static void get_decimal_converter(FieldSchema* field_schema, const TypeDescriptor& src_logical_type, ^ ``` <details> <summary>Additional context</summary> **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:137:** 103 lines including whitespace and comments (threshold 80) ```cpp static void get_decimal_converter(FieldSchema* field_schema, const TypeDescriptor& src_logical_type, ^ ``` </details> ########## be/src/vec/exec/format/parquet/parquet_column_convert.cpp: ########## @@ -20,67 +20,287 @@ #include <cctz/time_zone.h> #include "vec/columns/column_nullable.h" -namespace doris::vectorized { -namespace ParquetConvert { +namespace doris::vectorized::parquet { const cctz::time_zone ConvertParams::utc0 = cctz::utc_time_zone(); -ColumnPtr get_column(tparquet::Type::type parquet_physical_type, PrimitiveType show_type, - ColumnPtr& doris_column, DataTypePtr& doris_type, bool* need_convert) { - ColumnPtr ans_column = doris_column; - DataTypePtr tmp_data_type; - - switch (parquet_physical_type) { - case tparquet::Type::type::BOOLEAN: - tmp_data_type = std::make_shared<DataTypeUInt8>(); - break; - case tparquet::Type::type::INT32: - tmp_data_type = std::make_shared<DataTypeInt32>(); - break; - case tparquet::Type::type::INT64: - tmp_data_type = std::make_shared<DataTypeInt64>(); - break; - case tparquet::Type::type::FLOAT: - tmp_data_type = std::make_shared<DataTypeFloat32>(); - break; - case tparquet::Type::type::DOUBLE: - tmp_data_type = std::make_shared<DataTypeFloat64>(); - break; - case tparquet::Type::type::BYTE_ARRAY: - case tparquet::Type::type::FIXED_LEN_BYTE_ARRAY: - tmp_data_type = std::make_shared<DataTypeString>(); - break; - case tparquet::Type::type::INT96: - tmp_data_type = std::make_shared<DataTypeInt8>(); - break; +#define FOR_LOGICAL_DECIMAL_TYPES(M) \ + M(TYPE_DECIMALV2) \ + M(TYPE_DECIMAL32) \ + M(TYPE_DECIMAL64) \ + M(TYPE_DECIMAL128I) + +bool PhysicalToLogicalConverter::is_parquet_native_type(PrimitiveType type) { + switch (type) { + case TYPE_BOOLEAN: + case TYPE_INT: + case TYPE_BIGINT: + case TYPE_FLOAT: + case TYPE_DOUBLE: + case TYPE_STRING: + case TYPE_CHAR: + case TYPE_VARCHAR: + return true; + default: + return false; + } +} + +bool PhysicalToLogicalConverter::is_decimal_type(doris::PrimitiveType type) { + switch (type) { + case TYPE_DECIMAL32: + case TYPE_DECIMAL64: + case TYPE_DECIMAL128I: + case TYPE_DECIMALV2: + return true; + default: + return false; + } +} + +ColumnPtr PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type src_physical_type, + TypeDescriptor src_logical_type, + ColumnPtr& dst_logical_column, + const DataTypePtr& dst_logical_type, + bool is_dict_filter) { + if (is_dict_filter) { + src_physical_type = tparquet::Type::INT32; + src_logical_type = TypeDescriptor(PrimitiveType::TYPE_INT); + } + if (is_consistent() && _logical_converter->is_consistent()) { + if (_cached_src_physical_type == nullptr) { + _cached_src_physical_type = DataTypeFactory::instance().create_data_type( + src_logical_type, dst_logical_type->is_nullable()); + } + return dst_logical_column; } - if (tmp_data_type->get_type_id() == remove_nullable(doris_type)->get_type_id()) { - if (tmp_data_type->get_type_id() == TypeIndex::String && - (show_type == PrimitiveType::TYPE_DECIMAL32 || - show_type == PrimitiveType::TYPE_DECIMAL64 || - show_type == PrimitiveType::TYPE_DECIMALV2 || - show_type == PrimitiveType::TYPE_DECIMAL128I)) { - *need_convert = true; - ans_column = tmp_data_type->create_column(); + if (_cached_src_physical_column == nullptr) { + switch (src_physical_type) { + case tparquet::Type::type::BOOLEAN: + _cached_src_physical_type = std::make_shared<DataTypeUInt8>(); + break; + case tparquet::Type::type::INT32: + _cached_src_physical_type = std::make_shared<DataTypeInt32>(); + break; + case tparquet::Type::type::INT64: + _cached_src_physical_type = std::make_shared<DataTypeInt64>(); + break; + case tparquet::Type::type::FLOAT: + _cached_src_physical_type = std::make_shared<DataTypeFloat32>(); + break; + case tparquet::Type::type::DOUBLE: + _cached_src_physical_type = std::make_shared<DataTypeFloat64>(); + break; + case tparquet::Type::type::BYTE_ARRAY: + _cached_src_physical_type = std::make_shared<DataTypeString>(); + break; + case tparquet::Type::type::FIXED_LEN_BYTE_ARRAY: + _cached_src_physical_type = std::make_shared<DataTypeUInt8>(); + break; + case tparquet::Type::type::INT96: + _cached_src_physical_type = std::make_shared<DataTypeInt8>(); + break; + } + _cached_src_physical_column = _cached_src_physical_type->create_column(); + if (dst_logical_type->is_nullable()) { + _cached_src_physical_type = make_nullable(_cached_src_physical_type); + } + } + // remove the old cached data + _cached_src_physical_column->assume_mutable()->clear(); + if (is_consistent()) { + if (dst_logical_type->is_nullable()) { + auto doris_nullable_column = const_cast<ColumnNullable*>( + static_cast<const ColumnNullable*>(dst_logical_column.get())); + _src_logical_column = ColumnNullable::create( + _cached_src_physical_column, doris_nullable_column->get_null_map_column_ptr()); } else { - *need_convert = false; + _src_logical_column = _cached_src_physical_column; } } else { - ans_column = tmp_data_type->create_column(); - *need_convert = true; + _src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_column, + dst_logical_type); } - if (*need_convert && doris_type->is_nullable()) { + if (dst_logical_type->is_nullable()) { // In order to share null map between parquet converted src column and dst column to avoid copying. It is very tricky that will // call mutable function `doris_nullable_column->get_null_map_column_ptr()` which will set `_need_update_has_null = true`. // Because some operations such as agg will call `has_null()` to set `_need_update_has_null = false`. - auto doris_nullable_column = - const_cast<ColumnNullable*>(static_cast<const ColumnNullable*>(doris_column.get())); - ans_column = ColumnNullable::create(ans_column, - doris_nullable_column->get_null_map_column_ptr()); + auto doris_nullable_column = const_cast<ColumnNullable*>( + static_cast<const ColumnNullable*>(dst_logical_column.get())); + return ColumnNullable::create(_cached_src_physical_column, + doris_nullable_column->get_null_map_column_ptr()); + } + + return _cached_src_physical_column; +} + +static void get_decimal_converter(FieldSchema* field_schema, const TypeDescriptor& src_logical_type, Review Comment: warning: function 'get_decimal_converter' has cognitive complexity of 131 (threshold 50) [readability-function-cognitive-complexity] ```cpp static void get_decimal_converter(FieldSchema* field_schema, const TypeDescriptor& src_logical_type, ^ ``` <details> <summary>Additional context</summary> **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:146:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (is_decimal(remove_nullable(dst_logical_type))) { ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:149:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (src_physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) { ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:150:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp switch (src_logical_primitive) { ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:171:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:176:** +1, nesting level increased to 1 ```cpp } else if (src_physical_type == tparquet::Type::BYTE_ARRAY) { ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:177:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp switch (src_logical_primitive) { ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:195:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:200:** +1, nesting level increased to 1 ```cpp } else if (src_physical_type == tparquet::Type::INT32 || ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:202:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp switch (src_logical_primitive) { ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 3 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:235:** +1, nesting level increased to 4 ```cpp FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) ^ ``` **be/src/vec/exec/format/parquet/parquet_column_convert.cpp:240:** +1, nesting level increased to 1 ```cpp } else { ^ ``` </details> ########## be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h: ########## @@ -417,9 +451,8 @@ Status DeltaBitPackDecoder<T, PhysicalType>::_init_mini_block(int bit_width) { return Status::OK(); } -template <typename T, tparquet::Type::type PhysicalType> -Status DeltaBitPackDecoder<T, PhysicalType>::_get_internal(T* buffer, int num_values, - int* out_num_values) { +template <typename T> +Status DeltaBitPackDecoder<T>::_get_internal(T* buffer, int num_values, int* out_num_values) { Review Comment: warning: function '_get_internal' has cognitive complexity of 76 (threshold 50) [readability-function-cognitive-complexity] ```cpp Status DeltaBitPackDecoder<T>::_get_internal(T* buffer, int num_values, int* out_num_values) { ^ ``` <details> <summary>Additional context</summary> **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:456:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (num_values == 0) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:461:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp while (i < num_values) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:462:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (PREDICT_FALSE(_values_remaining_current_mini_block == 0)) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:463:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (PREDICT_FALSE(!_block_initialized)) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:466:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (i == num_values) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:474:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp if (_total_value_count != 1) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:475:** +6, including nesting penalty of 5, nesting level increased to 6 ```cpp RETURN_IF_ERROR(_init_block()); ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:475:** +7, including nesting penalty of 6, nesting level increased to 7 ```cpp RETURN_IF_ERROR(_init_block()); ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:479:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(_init_block()); ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:479:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp RETURN_IF_ERROR(_init_block()); ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:480:** +1, nesting level increased to 3 ```cpp } else { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:482:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (_mini_block_idx < _mini_blocks_per_block) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:483:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp RETURN_IF_ERROR(_init_mini_block(_delta_bit_widths.data()[_mini_block_idx])); ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:483:** +6, including nesting penalty of 5, nesting level increased to 6 ```cpp RETURN_IF_ERROR(_init_mini_block(_delta_bit_widths.data()[_mini_block_idx])); ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:484:** +1, nesting level increased to 4 ```cpp } else { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:485:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp RETURN_IF_ERROR(_init_block()); ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:485:** +6, including nesting penalty of 5, nesting level increased to 6 ```cpp RETURN_IF_ERROR(_init_block()); ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:492:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp for (int j = 0; j < values_decode; ++j) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:493:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (!_bit_reader->GetValue(_delta_bit_width, buffer + i + j)) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:497:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp for (int j = 0; j < values_decode; ++j) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:509:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (PREDICT_FALSE(_total_values_remaining == 0)) { ^ ``` **be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h:510:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (!_bit_reader->Advance(_delta_bit_width * _values_remaining_current_mini_block)) { ^ ``` </details> ########## be/src/vec/exec/format/parquet/vparquet_column_reader.cpp: ########## @@ -480,13 +480,17 @@ Status ScalarColumnReader::_try_load_dict_page(bool* loaded, bool* has_dict) { Status ScalarColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr& type, Review Comment: warning: function 'read_column_data' has cognitive complexity of 81 (threshold 50) [readability-function-cognitive-complexity] ```cpp Status ScalarColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr& type, ^ ``` <details> <summary>Additional context</summary> **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:482:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (_converter == nullptr) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:485:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (!_converter->support()) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:494:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp do { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:495:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (_chunk_reader->remaining_num_values() == 0) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:496:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (!_chunk_reader->has_next_page()) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:501:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(_chunk_reader->next_page()); ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:501:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(_chunk_reader->next_page()); ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:503:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (_nested_column) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:504:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(_chunk_reader->load_page_data_idempotent()); ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:504:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(_chunk_reader->load_page_data_idempotent()); ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:505:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(_read_nested_column(resolved_column, resolved_type, select_vector, ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:505:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(_read_nested_column(resolved_column, resolved_type, select_vector, ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:515:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (read_ranges.size() == 0) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:518:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(_chunk_reader->skip_page()); ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:518:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(_chunk_reader->skip_page()); ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:520:** +1, nesting level increased to 2 ```cpp } else { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:524:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (select_vector.has_filter() && select_vector.filter_ratio() > 0.6) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:524:** +1 ```cpp if (select_vector.has_filter() && select_vector.filter_ratio() > 0.6) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:530:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (batch_size >= remaining_num_values && ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:530:** +1 ```cpp if (batch_size >= remaining_num_values && ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:535:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp RETURN_IF_ERROR(_chunk_reader->skip_page()); ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:535:** +6, including nesting penalty of 5, nesting level increased to 6 ```cpp RETURN_IF_ERROR(_chunk_reader->skip_page()); ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:537:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp if (!_chunk_reader->has_next_page()) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:542:** +1 ```cpp skip_whole_batch = batch_size <= remaining_num_values && ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:544:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (skip_whole_batch) { ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:549:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(_chunk_reader->load_page_data_idempotent()); ^ ``` **be/src/common/status.h:541:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:549:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(_chunk_reader->load_page_data_idempotent()); ^ ``` **be/src/common/status.h:543:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:574:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (_chunk_reader->remaining_num_values() == 0 && !_chunk_reader->has_next_page()) { ^ ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org