This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
commit 6e9f39f867fc437759a3585640fc02cc69852042 Author: eldenmoon <lihan...@selectdb.com> AuthorDate: Mon Mar 10 17:32:16 2025 +0800 fix merge master --- .../rowset/segment_v2/hierarchical_data_reader.cpp | 3 ++- be/src/olap/rowset/segment_v2/segment.cpp | 6 ++++-- be/src/runtime/types.h | 25 ++++------------------ be/src/vec/columns/column_object.cpp | 6 +++--- be/src/vec/common/schema_util.cpp | 8 +------ be/src/vec/data_types/data_type_object.cpp | 6 ++++-- .../data_types/serde/data_type_nullable_serde.h | 2 +- .../java/org/apache/doris/qe/SessionVariable.java | 5 ++--- 8 files changed, 21 insertions(+), 40 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp index 6efad4a0cdd..82d41519466 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -26,6 +26,7 @@ #include "vec/columns/column_map.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_object.h" +#include "vec/columns/column_nothing.h" #include "vec/common/assert_cast.h" #include "vec/common/schema_util.h" #include "vec/data_types/data_type.h" @@ -206,7 +207,7 @@ Status HierarchicalDataReader::_process_nested_columns( for (const auto& subcolumn : entry.second) { const auto& column = subcolumn.column; const auto& type = subcolumn.type; - if (!remove_nullable(column)->is_column_array()) { + if (!check_and_get_column<ColumnArray>(remove_nullable(column).get())) { return Status::InvalidArgument( "Meet none array column when flatten nested array, path {}, type {}", subcolumn.path.get_path(), subcolumn.type->get_name()); diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 226d966e98a..350832cc704 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -611,7 +611,9 @@ vectorized::DataTypePtr Segment::get_data_type_of(const TabletColumn& column, } Status Segment::_create_column_readers_once(OlapReaderStatistics* stats) { - SCOPED_RAW_TIMER(&stats->segment_create_column_readers_timer_ns); + if (stats != nullptr) { + SCOPED_RAW_TIMER(&stats->segment_create_column_readers_timer_ns); + } return _create_column_readers_once_call.call([&] { DCHECK(_footer_pb); Defer defer([&]() { _footer_pb.reset(); }); @@ -849,7 +851,7 @@ Status Segment::new_column_iterator(const TabletColumn& tablet_column, } Result<ColumnReader*> Segment::get_column_reader(int32_t col_unique_id) { - auto status = _create_column_readers_once(); + auto status = _create_column_readers_once(nullptr); if (!status) { return ResultError(std::move(status)); } diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index c68e86f8d5c..f935bcbd0d8 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -72,7 +72,8 @@ struct TypeDescriptor { TypeDescriptor() : type(INVALID_TYPE), len(-1), precision(-1), scale(-1) {} // explicit TypeDescriptor(PrimitiveType type) : - TypeDescriptor(PrimitiveType type, int variant_max_subcolumns_count_ = -1) : type(type), len(-1), precision(-1), scale(-1) { + TypeDescriptor(PrimitiveType type, int variant_max_subcolumns_count_ = -1) + : type(type), len(-1), precision(-1), scale(-1) { // TODO, should not initialize default values, force initialization by parameters or external. if (type == TYPE_DECIMALV2) { precision = 27; @@ -180,27 +181,9 @@ struct TypeDescriptor { } bool operator==(const TypeDescriptor& o) const { -<<<<<<< HEAD - if (type != o.type) { - return false; - } - if (children != o.children) { - return false; - } - if (type == TYPE_CHAR) { - return len == o.len; - } - if (type == TYPE_DECIMALV2) { - return precision == o.precision && scale == o.scale; - } - if (type == TYPE_VARIANT) { - return variant_max_subcolumns_count == o.variant_max_subcolumns_count; - } - return true; -======= return type == o.type && len == o.len && precision == o.precision && scale == o.scale && - result_is_nullable == o.result_is_nullable && contains_nulls == o.contains_nulls; ->>>>>>> upstream-apache/master + result_is_nullable == o.result_is_nullable && contains_nulls == o.contains_nulls && + variant_max_subcolumns_count == o.variant_max_subcolumns_count; } bool operator!=(const TypeDescriptor& other) const { return !(*this == other); } diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 952aebf2495..2aeebb72fa7 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1667,7 +1667,7 @@ void ColumnObject::Subcolumn::wrapp_array_nullable() { } } -Status ColumnObject::serialize_one_row_to_string(int64_t row, std::string* output) const { +Status ColumnObject::serialize_one_row_to_string(size_t row, std::string* output) const { auto tmp_col = ColumnString::create(); VectorBufferWriter write_buffer(*tmp_col.get()); if (is_scalar_variant()) { @@ -1682,7 +1682,7 @@ Status ColumnObject::serialize_one_row_to_string(int64_t row, std::string* outpu return Status::OK(); } -Status ColumnObject::serialize_one_row_to_string(int64_t row, BufferWritable& output) const { +Status ColumnObject::serialize_one_row_to_string(size_t row, BufferWritable& output) const { if (is_scalar_variant()) { subcolumns.get_root()->data.serialize_text_json(row, output); return Status::OK(); @@ -2081,7 +2081,7 @@ Status ColumnObject::finalize(FinalizeMode mode) { for (size_t i = 0; i < std::min(size_t(_max_subcolumns_count), sorted_by_size.size()); ++i) { // if too many null values, then consider it as sparse column - if (sorted_by_size[i].second < num_rows * 0.95) { + if ((double) sorted_by_size[i].second < (double) num_rows * 0.95) { continue; } selected_path.insert(sorted_by_size[i].first); diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index adf5630f3ca..047d488e5ad 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -171,6 +171,7 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co } // set variant root column/type to from column/type CHECK(arg.column->is_nullable()); + auto to_type = remove_nullable(type); const auto& data_type_object = assert_cast<const DataTypeObject&>(*to_type); auto variant = ColumnObject::create(data_type_object.variant_max_subcolumns_count()); @@ -248,13 +249,9 @@ void get_column_by_type(const vectorized::DataTypePtr& data_type, const std::str return; } // TODO handle more types like struct/date/datetime/decimal... -<<<<<<< HEAD throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, "unexcepted data column type: {}, column name is: {}", data_type->get_name(), name); -======= - throw Exception(Status::FatalError("__builtin_unreachable")); ->>>>>>> upstream-apache/master } TabletColumn get_column_by_type(const vectorized::DataTypePtr& data_type, const std::string& name, @@ -633,7 +630,6 @@ bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* o return new_schema_has_inverted_index != old_schema_has_inverted_index; } -<<<<<<< HEAD TabletColumn create_sparse_column(const TabletColumn& variant) { TabletColumn res; res.set_name(variant.name_lower_case() + "." + SPARSE_COLUMN_PATH); @@ -868,7 +864,5 @@ void calculate_variant_stats(const IColumn& encoded_sparse_column, } } -======= #include "common/compile_check_end.h" ->>>>>>> upstream-apache/master } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/data_types/data_type_object.cpp b/be/src/vec/data_types/data_type_object.cpp index 5bcb9e6985b..62e4dec64dd 100644 --- a/be/src/vec/data_types/data_type_object.cpp +++ b/be/src/vec/data_types/data_type_object.cpp @@ -166,7 +166,7 @@ const char* DataTypeObject::deserialize(const char* buf, MutableColumnPtr* colum // 1. deserialize num of subcolumns uint32_t num_subcolumns = *reinterpret_cast<const uint32_t*>(buf); buf += sizeof(uint32_t); - + bool root_added = false; // 2. deserialize each subcolumn in a loop for (uint32_t i = 0; i < num_subcolumns; i++) { // 2.1 deserialize subcolumn column path (str size + str data) @@ -186,6 +186,8 @@ const char* DataTypeObject::deserialize(const char* buf, MutableColumnPtr* colum PathInData key; if (!column_meta_pb.name().empty()) { key = PathInData {column_meta_pb.name()}; + } else { + root_added = true; } column_object->add_sub_column(key, std::move(sub_column), type); } @@ -241,4 +243,4 @@ MutableColumnPtr DataTypeObject::create_column() const { return ColumnObject::create(_max_subcolumns_count); } -} // namespace doris::vectorized +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 62ffa2c5bdf..7c8b878ff2e 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -102,7 +102,7 @@ public: void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, int64_t row_num) const override; - DataTypeSerDeSPtr get_nested_serde() { return nested_serde; } + const DataTypeSerDeSPtr& get_nested_serde() const { return nested_serde; } virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return {nested_serde}; } private: diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index ca3f306312f..140c9235b78 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -4723,12 +4723,11 @@ public class SessionVariable implements Serializable, Writable { return disableInvertedIndexV1ForVaraint; } -<<<<<<< HEAD public int getGlobalVariantMaxSubcolumnsCount() { return globalVariantMaxSubcolumnsCount; -======= + } + public boolean getEnableLocalMergeSort() { return enableLocalMergeSort; ->>>>>>> upstream-apache/master } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org