github-actions[bot] commented on code in PR #34925: URL: https://github.com/apache/doris/pull/34925#discussion_r1619079024
########## be/src/olap/rowset/segment_v2/segment_writer.cpp: ########## @@ -322,6 +321,112 @@ } } +// for variant type, we should do following steps to fill content of block: +// 1. set block data to data convertor, and get all flattened columns from variant subcolumns +// 2. get sparse columns from previous sparse columns stripped in OlapColumnDataConvertorVariant +// 3. merge current columns info(contains extracted columns) with previous merged_tablet_schema +// which will be used to contruct the new schema for rowset +Status SegmentWriter::append_block_with_variant_subcolumns(vectorized::Block& data) { Review Comment: warning: function 'append_block_with_variant_subcolumns' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp Status SegmentWriter::append_block_with_variant_subcolumns(vectorized::Block& data) { ^ ``` <details> <summary>Additional context</summary> **be/src/olap/rowset/segment_v2/segment_writer.cpp:328:** 99 lines including whitespace and comments (threshold 80) ```cpp Status SegmentWriter::append_block_with_variant_subcolumns(vectorized::Block& data) { ^ ``` </details> ########## be/src/olap/rowset/segment_v2/segment_writer.cpp: ########## @@ -181,64 +181,53 @@ Status SegmentWriter::init() { return init(column_ids, true); } -Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key) { - DCHECK(_column_writers.empty()); - DCHECK(_column_ids.empty()); - _has_key = has_key; - _column_writers.reserve(_tablet_schema->columns().size()); - _column_ids.insert(_column_ids.end(), col_ids.begin(), col_ids.end()); - _olap_data_convertor = std::make_unique<vectorized::OlapBlockDataConvertor>(); - if (_opts.compression_type == UNKNOWN_COMPRESSION) { - _opts.compression_type = _tablet_schema->compression_type(); - } - auto create_column_writer = [&](uint32_t cid, const auto& column) -> auto { - ColumnWriterOptions opts; - opts.meta = _footer.add_columns(); - - init_column_meta(opts.meta, cid, column, _tablet_schema); - - // now we create zone map for key columns in AGG_KEYS or all column in UNIQUE_KEYS or DUP_KEYS - // except for columns whose type don't support zone map. - opts.need_zone_map = column.is_key() || _tablet_schema->keys_type() != KeysType::AGG_KEYS; - opts.need_bloom_filter = column.is_bf_column(); - auto* tablet_index = _tablet_schema->get_ngram_bf_index(column.unique_id()); - if (tablet_index) { - opts.need_bloom_filter = true; - opts.is_ngram_bf_index = true; - opts.gram_size = tablet_index->get_gram_size(); - opts.gram_bf_size = tablet_index->get_gram_bf_size(); - } - - opts.need_bitmap_index = column.has_bitmap_index(); - bool skip_inverted_index = false; - if (_opts.rowset_ctx != nullptr) { - // skip write inverted index for index compaction - skip_inverted_index = - _opts.rowset_ctx->skip_inverted_index.count(column.unique_id()) > 0; - } - // skip write inverted index on load if skip_write_index_on_load is true - if (_opts.write_type == DataWriteType::TYPE_DIRECT && - _tablet_schema->skip_write_index_on_load()) { - skip_inverted_index = true; - } - // indexes for this column - opts.indexes = std::move(_tablet_schema->get_indexes_for_column(column)); - if (!InvertedIndexColumnWriter::check_column_valid(column)) { - // skip inverted index if invalid - opts.indexes.clear(); - opts.need_zone_map = false; - opts.need_bloom_filter = false; - opts.need_bitmap_index = false; - } - opts.inverted_index_file_writer = _inverted_index_file_writer.get(); - for (const auto* index : opts.indexes) { - if (!skip_inverted_index && index->index_type() == IndexType::INVERTED) { - opts.inverted_index = index; - opts.need_inverted_index = true; - // TODO support multiple inverted index - break; - } +Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& column, Review Comment: warning: function '_create_column_writer' has cognitive complexity of 51 (threshold 50) [readability-function-cognitive-complexity] ```cpp Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& column, ^ ``` <details> <summary>Additional context</summary> **be/src/olap/rowset/segment_v2/segment_writer.cpp:195:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (tablet_index) { ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:204:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (_opts.rowset_ctx != nullptr) { ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:209:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (_opts.write_type == DataWriteType::TYPE_DIRECT && schema->skip_write_index_on_load()) { ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:214:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (!InvertedIndexColumnWriter::check_column_valid(column)) { ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:241:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp CHECK_FIELD_TYPE(STRUCT, "struct") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:231:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:241:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(STRUCT, "struct") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:233:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bloom_filter) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:241:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(STRUCT, "struct") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:236:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bitmap_index) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:242:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp CHECK_FIELD_TYPE(ARRAY, "array") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:231:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:242:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(ARRAY, "array") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:233:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bloom_filter) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:242:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(ARRAY, "array") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:236:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bitmap_index) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:243:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp CHECK_FIELD_TYPE(JSONB, "jsonb") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:231:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:243:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(JSONB, "jsonb") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:233:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bloom_filter) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:243:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(JSONB, "jsonb") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:236:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bitmap_index) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:244:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp CHECK_FIELD_TYPE(AGG_STATE, "agg_state") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:231:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:244:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(AGG_STATE, "agg_state") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:233:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bloom_filter) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:244:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(AGG_STATE, "agg_state") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:236:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bitmap_index) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:245:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp CHECK_FIELD_TYPE(MAP, "map") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:231:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:245:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(MAP, "map") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:233:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bloom_filter) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:245:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(MAP, "map") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:236:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bitmap_index) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:246:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp CHECK_FIELD_TYPE(OBJECT, "object") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:231:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:246:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(OBJECT, "object") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:233:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bloom_filter) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:246:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(OBJECT, "object") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:236:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bitmap_index) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:247:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp CHECK_FIELD_TYPE(HLL, "hll") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:231:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:247:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(HLL, "hll") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:233:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bloom_filter) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:247:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(HLL, "hll") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:236:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bitmap_index) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:248:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp CHECK_FIELD_TYPE(QUANTILE_STATE, "quantile_state") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:231:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:248:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(QUANTILE_STATE, "quantile_state") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:233:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bloom_filter) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:248:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp CHECK_FIELD_TYPE(QUANTILE_STATE, "quantile_state") ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:236:** expanded from macro 'CHECK_FIELD_TYPE' ```cpp if (opts.need_bitmap_index) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:252:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (column.is_row_store_column()) { ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:257:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer)); ^ ``` **be/src/common/status.h:614:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:257:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer)); ^ ``` **be/src/common/status.h:616:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:258:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp RETURN_IF_ERROR(writer->init()); ^ ``` **be/src/common/status.h:614:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/olap/rowset/segment_v2/segment_writer.cpp:258:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp RETURN_IF_ERROR(writer->init()); ^ ``` **be/src/common/status.h:616:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` </details> ########## be/src/vec/common/schema_util.cpp: ########## @@ -483,25 +481,8 @@ Status get_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas, return Status::OK(); } -Status parse_and_encode_variant_columns(Block& block, const std::vector<int>& variant_pos, - const ParseContext& ctx) { - try { - // Parse each variant column from raw string column - RETURN_IF_ERROR(vectorized::schema_util::parse_variant_columns(block, variant_pos, ctx)); - vectorized::schema_util::finalize_variant_columns(block, variant_pos, - false /*not ingore sparse*/); - RETURN_IF_ERROR( - vectorized::schema_util::encode_variant_sparse_subcolumns(block, variant_pos)); - } catch (const doris::Exception& e) { - // TODO more graceful, max_filter_ratio - LOG(WARNING) << "encounter execption " << e.to_string(); - return Status::InternalError(e.to_string()); - } - return Status::OK(); -} - -Status parse_variant_columns(Block& block, const std::vector<int>& variant_pos, - const ParseContext& ctx) { +Status _parse_variant_columns(Block& block, const std::vector<int>& variant_pos, Review Comment: warning: function '_parse_variant_columns' has cognitive complexity of 59 (threshold 50) [readability-function-cognitive-complexity] ```cpp Status _parse_variant_columns(Block& block, const std::vector<int>& variant_pos, ^ ``` <details> <summary>Additional context</summary> **be/src/vec/common/schema_util.cpp:485:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp for (int i = 0; i < variant_pos.size(); ++i) { ^ ``` **be/src/vec/common/schema_util.cpp:495:** nesting level increased to 2 ```cpp auto encode_rowstore = [&]() { ^ ``` **be/src/vec/common/schema_util.cpp:496:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (!ctx.record_raw_json_column) { ^ ``` **be/src/vec/common/schema_util.cpp:500:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (record_raw_string_with_serialization) { ^ ``` **be/src/vec/common/schema_util.cpp:503:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp for (size_t i = 0; i < var->rows(); ++i) { ^ ``` **be/src/vec/common/schema_util.cpp:505:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp RETURN_IF_ERROR(var->serialize_one_row_to_string(i, &raw_str)); ^ ``` **be/src/common/status.h:614:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/common/schema_util.cpp:505:** +6, including nesting penalty of 5, nesting level increased to 6 ```cpp RETURN_IF_ERROR(var->serialize_one_row_to_string(i, &raw_str)); ^ ``` **be/src/common/status.h:616:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/common/schema_util.cpp:509:** +1, nesting level increased to 3 ```cpp } else { ^ ``` **be/src/vec/common/schema_util.cpp:519:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (!var.is_scalar_variant()) { ^ ``` **be/src/vec/common/schema_util.cpp:522:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(encode_rowstore()); ^ ``` **be/src/common/status.h:614:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/common/schema_util.cpp:522:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(encode_rowstore()); ^ ``` **be/src/common/status.h:616:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/common/schema_util.cpp:527:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (WhichDataType(remove_nullable(var.get_root_type())).is_json()) { ^ ``` **be/src/vec/common/schema_util.cpp:530:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(cast_column({var.get_root(), var.get_root_type(), ""}, ^ ``` **be/src/common/status.h:614:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/common/schema_util.cpp:532:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp ? make_nullable(std::make_shared<DataTypeString>()) ^ ``` **be/src/vec/common/schema_util.cpp:530:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(cast_column({var.get_root(), var.get_root_type(), ""}, ^ ``` **be/src/common/status.h:616:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/common/schema_util.cpp:535:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (raw_json_column->is_nullable()) { ^ ``` **be/src/vec/common/schema_util.cpp:539:** +1, nesting level increased to 2 ```cpp } else { ^ ``` **be/src/vec/common/schema_util.cpp:543:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp ? assert_cast<const ColumnNullable&>(root).get_nested_column_ptr() ^ ``` **be/src/vec/common/schema_util.cpp:553:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (is_nullable) { ^ ``` **be/src/vec/common/schema_util.cpp:559:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp RETURN_IF_ERROR(encode_rowstore()); ^ ``` **be/src/common/status.h:614:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/common/schema_util.cpp:559:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(encode_rowstore()); ^ ``` **be/src/common/status.h:616:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org