This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push: new a0b86ccedb3 fix reader should starts with prefix (#46060) a0b86ccedb3 is described below commit a0b86ccedb36cf24bdaaad6aa08c7b2ce076de53 Author: lihangyu <lihan...@selectdb.com> AuthorDate: Mon Dec 30 20:41:02 2024 +0800 fix reader should starts with prefix (#46060) --- be/src/olap/rowset/segment_v2/column_reader.cpp | 7 +++-- .../rowset/segment_v2/hierarchical_data_reader.h | 2 +- be/src/olap/schema_change.cpp | 1 - be/src/vec/columns/column_object.cpp | 34 +++++++++------------- .../compaction/compaction_sparse_column.out | 24 +++++++++++++++ .../suites/variant_github_events_p2/load.groovy | 2 +- .../variant_p0/schema_change/schema_change.groovy | 1 + .../compaction/compaction_sparse_column.groovy | 10 +++++++ 8 files changed, 56 insertions(+), 25 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index c3c41d968ae..97e8b1ca1da 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -299,12 +299,15 @@ Status VariantColumnReader::new_iterator(ColumnIterator** iterator, std::unique_ptr<ColumnIterator>(inner_iter)); return Status::OK(); } - // Check if path is prefix, example sparse columns path: a.b.c, a.b.e, access prefix: a.b + // Check if path is prefix, example sparse columns path: a.b.c, a.b.e, access prefix: a.b. // then we must read the sparse columns bool prefix_existed_in_sparse_column = _statistics && (_statistics->sparse_column_non_null_size.lower_bound(relative_path.get_path()) != - _statistics->sparse_column_non_null_size.end()); + _statistics->sparse_column_non_null_size.end()) && + _statistics->sparse_column_non_null_size.lower_bound(relative_path.get_path()) + ->first.starts_with(relative_path.get_path() + "."); + // Otherwise the prefix is not exist and the sparse column size is reached limit // which means the path maybe exist in sparse_column bool exceeded_sparse_column_limit = diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h index 369dff92829..c50ac26169e 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h @@ -153,10 +153,10 @@ private: _rows_read += nrows; variant.finalize(); + RETURN_IF_ERROR(_init_null_map_and_clear_columns(container, dst, nrows)); #ifndef NDEBUG variant.check_consistency(); #endif - RETURN_IF_ERROR(_init_null_map_and_clear_columns(container, dst, nrows)); return Status::OK(); } diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index ec291d8d2f0..bc41365dbd4 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -209,7 +209,6 @@ public: for (int idx = 0; idx < columns; idx++) { auto column = finalized_block.get_by_position(idx).column->assume_mutable(); - for (int j = 0; j < limit; j++) { auto row_ref = pushed_row_refs[i + j]; column->insert_from(*row_ref.get_column(idx), row_ref.position); diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 8cd3b089f66..2ce8ce791ed 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -352,9 +352,9 @@ void get_field_info(const Field& field, FieldInfo* info) { } #ifdef NDEBUG -#define ENABLE_CHECK_CONSISTENCY /* Nothing */ +#define ENABLE_CHECK_CONSISTENCY (void) /* Nothing */ #else -#define ENABLE_CHECK_CONSISTENCY(this) this->check_consistency() +#define ENABLE_CHECK_CONSISTENCY(this) (this)->check_consistency() #endif // current nested level is 2, inside column object @@ -642,7 +642,7 @@ MutableColumnPtr ColumnObject::apply_for_columns(Func&& func) const { auto sparse_column = func(serialized_sparse_column); res->serialized_sparse_column = sparse_column->assume_mutable(); res->num_rows = res->serialized_sparse_column->size(); - res->check_consistency(); + ENABLE_CHECK_CONSISTENCY(res.get()); return res; } @@ -837,9 +837,7 @@ void ColumnObject::check_consistency() const { } size_t ColumnObject::size() const { -#ifndef NDEBUG - check_consistency(); -#endif + ENABLE_CHECK_CONSISTENCY(this); return num_rows; } @@ -875,6 +873,10 @@ void ColumnObject::for_each_subcolumn(ColumnCallback callback) { callback(part); } } + callback(serialized_sparse_column); + // callback may be filter, so the row count may be changed + num_rows = serialized_sparse_column->size(); + ENABLE_CHECK_CONSISTENCY(this); } void ColumnObject::insert_from(const IColumn& src, size_t n) { @@ -896,6 +898,7 @@ void ColumnObject::try_insert(const Field& field) { if (field.get_type() != Field::Types::VariantMap) { if (field.is_null()) { insert_default(); + ENABLE_CHECK_CONSISTENCY(this); return; } auto* root = get_subcolumn({}); @@ -1267,10 +1270,8 @@ bool ColumnObject::try_add_new_subcolumn(const PathInData& path) { void ColumnObject::insert_range_from(const IColumn& src, size_t start, size_t length) { const auto& src_object = assert_cast<const ColumnObject&>(src); -#ifndef NDEBUG - check_consistency(); - src_object.check_consistency(); -#endif + ENABLE_CHECK_CONSISTENCY(&src_object); + ENABLE_CHECK_CONSISTENCY(this); // First, insert src subcolumns // We can reach the limit of subcolumns, and in this case @@ -1981,6 +1982,7 @@ Status ColumnObject::finalize(FinalizeMode mode) { std::swap(subcolumns, new_subcolumns); doc_structure = nullptr; _prev_positions.clear(); + ENABLE_CHECK_CONSISTENCY(this); return Status::OK(); } @@ -2022,7 +2024,7 @@ ColumnPtr ColumnObject::filter(const Filter& filter, ssize_t count) const { } if (subcolumns.empty()) { auto res = ColumnObject::create(count_bytes_in_filter(filter)); - res->check_consistency(); + ENABLE_CHECK_CONSISTENCY(res.get()); return res; } auto new_column = ColumnObject::create(true, false); @@ -2032,7 +2034,7 @@ ColumnPtr ColumnObject::filter(const Filter& filter, ssize_t count) const { entry->data.get_least_common_type()); } new_column->serialized_sparse_column = serialized_sparse_column->filter(filter, count); - new_column->check_consistency(); + ENABLE_CHECK_CONSISTENCY(new_column.get()); return new_column; } @@ -2065,14 +2067,6 @@ size_t ColumnObject::filter(const Filter& filter) { } } }); - const auto result_size = serialized_sparse_column->filter(filter); - if (result_size != count) { - throw Exception(ErrorCode::INTERNAL_ERROR, - "result_size not euqal with filter_size, result_size={}, " - "filter_size={}", - result_size, count); - } - CHECK_EQ(result_size, count); } num_rows = count; ENABLE_CHECK_CONSISTENCY(this); diff --git a/regression-test/data/variant_p1/compaction/compaction_sparse_column.out b/regression-test/data/variant_p1/compaction/compaction_sparse_column.out index 520eaf5f84c..b060cb9f57c 100644 --- a/regression-test/data/variant_p1/compaction/compaction_sparse_column.out +++ b/regression-test/data/variant_p1/compaction/compaction_sparse_column.out @@ -95,3 +95,27 @@ -- !select_all -- 3 1234 \N ddddd 1 \N +-- !sql -- +\N +\N +\N +\N + +-- !sql -- +3 +3 +3 +3 + +-- !sql -- +2 +2 +2 +2 + +-- !sql -- +1 +1 +1 +1 + diff --git a/regression-test/suites/variant_github_events_p2/load.groovy b/regression-test/suites/variant_github_events_p2/load.groovy index d4c74474eaf..abcdfb689c1 100644 --- a/regression-test/suites/variant_github_events_p2/load.groovy +++ b/regression-test/suites/variant_github_events_p2/load.groovy @@ -158,7 +158,7 @@ suite("regression_test_variant_github_events_p2", "nonConcurrent,p2"){ ) DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(k) BUCKETS 4 - properties("replication_num" = "1", "disable_auto_compaction" = "true", "bloom_filter_columns" = "v", "variant_enable_flatten_nested" = "true"); + properties("replication_num" = "1", "disable_auto_compaction" = "true", "bloom_filter_columns" = "v", "variant_enable_flatten_nested" = "false"); """ // 2015 diff --git a/regression-test/suites/variant_p0/schema_change/schema_change.groovy b/regression-test/suites/variant_p0/schema_change/schema_change.groovy index 42cef32c8e5..3c35ff28100 100644 --- a/regression-test/suites/variant_p0/schema_change/schema_change.groovy +++ b/regression-test/suites/variant_p0/schema_change/schema_change.groovy @@ -79,4 +79,5 @@ suite("regression_test_variant_schema_change", "variant_type"){ sql """INSERT INTO ${table_name} SELECT k, v,v from ${table_name} limit 1111""" // select from mv qt_sql """select v['k1'], cast(v['k2'] as string) from ${table_name} order by k desc limit 10""" + qt_sql """select * from ${table_name} order by k desc limit 10""" } \ No newline at end of file diff --git a/regression-test/suites/variant_p1/compaction/compaction_sparse_column.groovy b/regression-test/suites/variant_p1/compaction/compaction_sparse_column.groovy index 32d16b040a3..b3cca5910e8 100644 --- a/regression-test/suites/variant_p1/compaction/compaction_sparse_column.groovy +++ b/regression-test/suites/variant_p1/compaction/compaction_sparse_column.groovy @@ -201,6 +201,16 @@ suite("test_compaction_sparse_column", "p1,nonConcurrent") { qt_select_5_1 """ SELECT count(cast(v['b'] as int)) FROM ${tableName} where cast(v['b'] as int) = 42004;""" qt_select_6_1 """ SELECT count(cast(v['b'] as int)) FROM ${tableName} where cast(v['b'] as int) = 42005;""" qt_select_all """SELECT k, v['a'], v['b'], v['xxxx'], v['point'], v['ddddd'] from ${tableName} where (cast(v['point'] as int) = 1);""" + + sql "truncate table ${tableName}" + sql """insert into ${tableName} values (1, '{"1" : 1, "2" : 2, "3" : 3, "4": "4", "a" : 1, "aa":2, "aaa" : 3,"aaaaaa":1234, ".a." : 1}')""" + sql """insert into ${tableName} values (1, '{"1" : 1, "2" : 2, "3" : 3, "4": "4", "a" : 1, "aa":2, "aaa" : 3,"aaaaaa":1234, ".a." : 1}')""" + sql """insert into ${tableName} values (1, '{"1" : 1, "2" : 2, "3" : 3, "4": "4", "a" : 1, "aa":2, "aaa" : 3,"aaaaaa":1234, ".a." : 1}')""" + sql """insert into ${tableName} values (1, '{"1" : 1, "2" : 2, "3" : 3, "4": "4", "a" : 1, "aa":2, "aaa" : 3,"aaaaaa":1234, ".a." : 1}')""" + qt_sql """select v['aaaa'] from ${tableName}""" + qt_sql """select v['aaa'] from ${tableName}""" + qt_sql """select v['aa'] from ${tableName}""" + qt_sql """select v['1'] from ${tableName}""" } finally { // try_sql("DROP TABLE IF EXISTS ${tableName}") GetDebugPoint().disableDebugPointForAllBEs("variant_column_writer_impl._get_subcolumn_paths_from_stats") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org