This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
commit 1f2257e4b9399a6136d3705f5195cdfdd65fe492 Author: eldenmoon <lihan...@selectdb.com> AuthorDate: Mon Mar 10 18:39:48 2025 +0800 fix case after rebase --- be/src/olap/rowset/segment_v2/column_reader.cpp | 8 ++++++-- .../rowset/segment_v2/hierarchical_data_reader.cpp | 10 +++++++++- be/src/vec/columns/column_object.cpp | 2 +- .../data/variant_p0/compaction/test_compaction.out | Bin 5819 -> 5787 bytes .../data/variant_p0/schema_change/schema_change.out | Bin 1108 -> 1104 bytes .../test_array_contains_with_inverted_index.out | Bin 45214 -> 45954 bytes .../load.groovy | 12 +++--------- .../variant_p0/compaction/test_compaction.groovy | 2 +- regression-test/suites/variant_p0/nested.groovy | 7 +------ .../test_array_contains_with_inverted_index.groovy | 2 +- 10 files changed, 22 insertions(+), 21 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 073d121cc82..7d9e375891c 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -461,11 +461,15 @@ Status VariantColumnReader::new_iterator(ColumnIterator** iterator, const Tablet } // if path exists in sparse column, read sparse column with extract reader - if (existed_in_sparse_column) { - // Sparse column exists or reached sparse size limit, read sparse column + if (existed_in_sparse_column && !node) { + // node should be nullptr, example + // {"b" : {"c":456}} b.c in subcolumn + // {"b" : 123} b in sparse column + // Then we should use hierarchical reader to read b ColumnIterator* inner_iter; RETURN_IF_ERROR(_sparse_column_reader->new_iterator(&inner_iter)); DCHECK(opt); + // Sparse column exists or reached sparse size limit, read sparse column *iterator = new SparseColumnExtractReader(relative_path.get_path(), std::unique_ptr<ColumnIterator>(inner_iter), nullptr, target_col); diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp index 82d41519466..2367a38821b 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -24,9 +24,9 @@ #include "olap/rowset/segment_v2/column_reader.h" #include "vec/columns/column.h" #include "vec/columns/column_map.h" +#include "vec/columns/column_nothing.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_object.h" -#include "vec/columns/column_nothing.h" #include "vec/common/assert_cast.h" #include "vec/common/schema_util.h" #include "vec/data_types/data_type.h" @@ -427,6 +427,14 @@ Status HierarchicalDataReader::_init_null_map_and_clear_columns( dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); // clear nullmap and inner data src_null_map.clear(); + } else { + if (dst->is_nullable()) { + // No nullable info exist in hirearchical data, fill nullmap with all none null + ColumnUInt8& dst_null_map = + assert_cast<ColumnNullable&>(*dst).get_null_map_column(); + auto fake_nullable_column = ColumnUInt8::create(nrows, 0); + dst_null_map.insert_range_from(*fake_nullable_column, 0, nrows); + } } _root_reader->column->clear(); } else { diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 2aeebb72fa7..90720c57235 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -2081,7 +2081,7 @@ Status ColumnObject::finalize(FinalizeMode mode) { for (size_t i = 0; i < std::min(size_t(_max_subcolumns_count), sorted_by_size.size()); ++i) { // if too many null values, then consider it as sparse column - if ((double) sorted_by_size[i].second < (double) num_rows * 0.95) { + if ((double)sorted_by_size[i].second < (double)num_rows * 0.95) { continue; } selected_path.insert(sorted_by_size[i].first); diff --git a/regression-test/data/variant_p0/compaction/test_compaction.out b/regression-test/data/variant_p0/compaction/test_compaction.out index 8c7a4f7887d..a5c4281ee98 100644 Binary files a/regression-test/data/variant_p0/compaction/test_compaction.out and b/regression-test/data/variant_p0/compaction/test_compaction.out differ diff --git a/regression-test/data/variant_p0/schema_change/schema_change.out b/regression-test/data/variant_p0/schema_change/schema_change.out index d99799dc3a0..0a0b49a923d 100644 Binary files a/regression-test/data/variant_p0/schema_change/schema_change.out and b/regression-test/data/variant_p0/schema_change/schema_change.out differ diff --git a/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out index ff409b7405a..33c2d34685c 100644 Binary files a/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out and b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out differ diff --git a/regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy b/regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy index 3641264953c..3dbf4ae945b 100644 --- a/regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy +++ b/regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy @@ -150,8 +150,8 @@ suite("regression_test_variant_github_events_p2", "nonConcurrent,p2"){ def table_name = "github_events" sql """DROP TABLE IF EXISTS ${table_name}""" table_name = "github_events" - // int rand_subcolumns_count = Math.floor(Math.random() * (611 - 511 + 1)) + 511 - int rand_subcolumns_count = 10; + int rand_subcolumns_count = Math.floor(Math.random() * (611 - 511 + 1)) + 511 + // int rand_subcolumns_count = 0; sql """ CREATE TABLE IF NOT EXISTS ${table_name} ( k bigint, @@ -206,14 +206,8 @@ suite("regression_test_variant_github_events_p2", "nonConcurrent,p2"){ v variant not null ) UNIQUE KEY(`k`) -<<<<<<< HEAD:regression-test/suites/variant_github_events_p2/load.groovy DISTRIBUTED BY HASH(k) BUCKETS 4 - properties("replication_num" = "1", "disable_auto_compaction" = "false", "variant_enable_flatten_nested" = "false", - "variant_max_subcolumns_count" = "${rand_subcolumns_count}"); -======= - DISTRIBUTED BY HASH(k) BUCKETS 4 - properties("replication_num" = "1", "disable_auto_compaction" = "false", "bloom_filter_columns" = "v", "variant_enable_flatten_nested" = "true"); ->>>>>>> upstream-apache/master:regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy + properties("replication_num" = "1", "disable_auto_compaction" = "false", "variant_enable_flatten_nested" = "false", "bloom_filter_columns" = "v", "variant_max_subcolumns_count" = "${rand_subcolumns_count}"); """ sql """insert into github_events2 select * from github_events order by k""" sql """select v['payload']['commits'] from github_events order by k ;""" diff --git a/regression-test/suites/variant_p0/compaction/test_compaction.groovy b/regression-test/suites/variant_p0/compaction/test_compaction.groovy index 2bc61a38057..69747015320 100644 --- a/regression-test/suites/variant_p0/compaction/test_compaction.groovy +++ b/regression-test/suites/variant_p0/compaction/test_compaction.groovy @@ -82,7 +82,7 @@ suite("test_compaction_variant") { qt_sql_1 "SELECT * FROM ${tableName} ORDER BY k, cast(v as string); " qt_sql_2 "select k, cast(v['a'] as array<int>) from ${tableName} where size(cast(v['a'] as array<int>)) > 0 order by k" qt_sql_3 "select k, v['a'], cast(v['b'] as string) from ${tableName} where length(cast(v['b'] as string)) > 4 order by k" - qt_sql_5 "select cast(v['b'] as string), cast(v['b']['c'] as string) from ${tableName} where cast(v['b'] as string) != 'null' or cast(v['b'] as string) != '{}' order by k desc, 1, 2 limit 10;" + qt_sql_5 "select cast(v['b'] as string), cast(v['b']['c'] as string) from ${tableName} where cast(v['b'] as string) != 'null' and cast(v['b'] as string) != '{}' order by k desc, 1, 2 limit 10;" //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,QueryHits,PathHash,MetaUrl,CompactionStatus diff --git a/regression-test/suites/variant_p0/nested.groovy b/regression-test/suites/variant_p0/nested.groovy index 97261545648..660d383977b 100644 --- a/regression-test/suites/variant_p0/nested.groovy +++ b/regression-test/suites/variant_p0/nested.groovy @@ -117,13 +117,8 @@ parallel_pipeline_task_num=7,profile_level=1,enable_pipeline_engine=true,enable_ properties("replication_num" = "1", "disable_auto_compaction" = "false", "enable_unique_key_merge_on_write" = "true", "variant_enable_flatten_nested" = "true", "variant_max_subcolumns_count" = "0"); """ sql """insert into var_nested2 select * from var_nested order by k limit 1024""" -<<<<<<< HEAD - qt_sql """select /*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=true,enable_distinct_streaming_aggregation=true,parallel_fragment_exec_instance_num=5,parallel_pipeline_task_num=1,profile_level=1,enable_pipeline_engine=false,enable_parallel_scan=true,parallel_scan_max_scanners_count=48,parallel_scan_min_rows_per_scanner=16384,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=12,enable_parallel_res [...] + qt_sql """select /*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=true,enable_distinct_streaming_aggregation=true,parallel_pipeline_task_num=1,profile_level=1,enable_pipeline_engine=false,enable_parallel_scan=true,parallel_scan_max_scanners_count=48,parallel_scan_min_rows_per_scanner=16384,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=12,enable_parallel_result_sink=false,enable_nereids_planner [...] qt_sql """select v['nested'] from var_nested2 where k < 10 and length(v['nested']) > 3 order by k limit 10;""" -======= - qt_sql """select /*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=true,enable_distinct_streaming_aggregation=true,parallel_pipeline_task_num=1,profile_level=1,enable_pipeline_engine=false,enable_parallel_scan=true,parallel_scan_max_scanners_count=48,parallel_scan_min_rows_per_scanner=16384,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=12,enable_parallel_result_sink=false,enable_nereids_planner= [...] - qt_sql """select v['nested'] from var_nested2 where k < 10 order by k limit 10;""" ->>>>>>> upstream-apache/master // 0. nomal explode variant array order_qt_explode_sql """select count(),cast(vv['xx'] as int) from var_nested lateral view explode_variant_array(v['nested']) tmp as vv where vv['xx'] = 10 group by cast(vv['xx'] as int)""" sql """truncate table var_nested2""" diff --git a/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy b/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy index 06813514808..ea5cf37e3de 100644 --- a/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy +++ b/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_array_contains_with_inverted_index"){ +suite("test_array_contains_with_inverted_index") { // prepare test table def indexTblName = "tai" sql "set disable_inverted_index_v1_for_variant = false" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org