This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push: new 19e5a4b54f1 fix 1 (#45517) 19e5a4b54f1 is described below commit 19e5a4b54f1ee6bb8e0673142b1839aecfb979ea Author: lihangyu <lihan...@selectdb.com> AuthorDate: Tue Dec 17 15:20:15 2024 +0800 fix 1 (#45517) --- .../segment_v2/variant_column_writer_impl.cpp | 3 +-- be/src/vec/columns/column_object.cpp | 8 ++++++-- be/src/vec/common/schema_util.cpp | 24 ++++++++++------------ be/src/vec/common/schema_util.h | 2 +- be/src/vec/functions/function_cast.h | 1 + 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp index 761cbec8c49..5fbb7433e10 100644 --- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp +++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp @@ -214,8 +214,7 @@ Status VariantColumnWriterImpl::_process_sparse_column( vectorized::ColumnObject* ptr, vectorized::OlapBlockDataConvertor* converter, size_t num_rows, int& column_id) { // create sparse column writer - TabletColumn sparse_column = - vectorized::schema_util::create_sparse_column(_tablet_column->unique_id()); + TabletColumn sparse_column = vectorized::schema_util::create_sparse_column(*_tablet_column); ColumnWriterOptions sparse_writer_opts; sparse_writer_opts.meta = _opts.footer->add_columns(); diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 568ed7f8bbc..2cb1e013f8c 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -2058,9 +2058,12 @@ Status ColumnObject::finalize(FinalizeMode mode) { if (entry->data.is_root) { continue; } + if (mode != FinalizeMode::WRITE_MODE) { + new_subcolumns.add(entry->path, entry->data); + } } - // merge and encode sparse column + // caculate stats & merge and encode sparse column if (mode == FinalizeMode::WRITE_MODE) { // pick sparse columns std::set<std::string_view> selected_path; @@ -2108,6 +2111,7 @@ Status ColumnObject::finalize(FinalizeMode mode) { remaing_subcolumns.emplace(entry->path.get_path(), entry->data); } } + serialized_sparse_column->clear(); RETURN_IF_ERROR(serialize_sparse_columns(std::move(remaing_subcolumns))); } @@ -2168,7 +2172,7 @@ ColumnPtr ColumnObject::filter(const Filter& filter, ssize_t count) const { } ColumnPtr ColumnObject::replicate(const IColumn::Offsets& offsets) const { - column_match_offsets_size(num_rows, offsets.size()); + // column_match_offsets_size(num_rows, offsets.size()); return apply_for_columns([&](const ColumnPtr column) { return column->replicate(offsets); }); } diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 77b3299c5b5..74298d2c838 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -606,20 +606,18 @@ bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* o return new_schema_has_inverted_index != old_schema_has_inverted_index; } -TabletColumn create_sparse_column(int32_t parent_unique_id) { - TColumn tcolumn; - tcolumn.column_name = SPARSE_COLUMN_PATH; - tcolumn.col_unique_id = parent_unique_id; - tcolumn.column_type = TColumnType {}; - tcolumn.column_type.type = TPrimitiveType::MAP; - - TColumn child_tcolumn; - tcolumn.column_type = TColumnType {}; - tcolumn.column_type.type = TPrimitiveType::STRING; - tcolumn.children_column.push_back(child_tcolumn); - tcolumn.children_column.push_back(child_tcolumn); - auto res = TabletColumn {tcolumn}; +TabletColumn create_sparse_column(const TabletColumn& variant) { + TabletColumn res; + res.set_name(SPARSE_COLUMN_PATH); + res.set_unique_id(variant.unique_id()); + res.set_type(FieldType::OLAP_FIELD_TYPE_MAP); + res.set_aggregation_method(variant.aggregation()); res.set_path_info(PathInData {SPARSE_COLUMN_PATH}); + + TabletColumn child_tcolumn; + child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING); + res.add_sub_column(child_tcolumn); + res.add_sub_column(child_tcolumn); return res; } diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index fee6e778325..795c700e636 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -128,6 +128,6 @@ bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* o int32_t new_col_idx, int32_t old_col_idx); // create ColumnMap<String, String> -TabletColumn create_sparse_column(int32_t parent_unique_id); +TabletColumn create_sparse_column(const TabletColumn& variant); } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 5de820dfa3a..0e7a8c495d3 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -1933,6 +1933,7 @@ private: // set variant root column/type to from column/type auto variant = ColumnObject::create(true /*always nullable*/); variant->create_root(from_type, col_from->assume_mutable()); + variant->get_sparse_column()->assume_mutable()->insert_many_defaults(input_rows_count); block.replace_by_position(result, std::move(variant)); return Status::OK(); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org