This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1f2257e4b9399a6136d3705f5195cdfdd65fe492
Author: eldenmoon <lihan...@selectdb.com>
AuthorDate: Mon Mar 10 18:39:48 2025 +0800

    fix case after rebase
---
 be/src/olap/rowset/segment_v2/column_reader.cpp     |   8 ++++++--
 .../rowset/segment_v2/hierarchical_data_reader.cpp  |  10 +++++++++-
 be/src/vec/columns/column_object.cpp                |   2 +-
 .../data/variant_p0/compaction/test_compaction.out  | Bin 5819 -> 5787 bytes
 .../data/variant_p0/schema_change/schema_change.out | Bin 1108 -> 1104 bytes
 .../test_array_contains_with_inverted_index.out     | Bin 45214 -> 45954 bytes
 .../load.groovy                                     |  12 +++---------
 .../variant_p0/compaction/test_compaction.groovy    |   2 +-
 regression-test/suites/variant_p0/nested.groovy     |   7 +------
 .../test_array_contains_with_inverted_index.groovy  |   2 +-
 10 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 073d121cc82..7d9e375891c 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -461,11 +461,15 @@ Status VariantColumnReader::new_iterator(ColumnIterator** 
iterator, const Tablet
     }
 
     // if path exists in sparse column, read sparse column with extract reader
-    if (existed_in_sparse_column) {
-        // Sparse column exists or reached sparse size limit, read sparse 
column
+    if (existed_in_sparse_column && !node) {
+        // node should be nullptr, example
+        // {"b" : {"c":456}}   b.c in subcolumn
+        // {"b" : 123}         b in sparse column
+        // Then we should use hierarchical reader to read b
         ColumnIterator* inner_iter;
         RETURN_IF_ERROR(_sparse_column_reader->new_iterator(&inner_iter));
         DCHECK(opt);
+        // Sparse column exists or reached sparse size limit, read sparse 
column
         *iterator = new SparseColumnExtractReader(relative_path.get_path(),
                                                   
std::unique_ptr<ColumnIterator>(inner_iter),
                                                   nullptr, target_col);
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp 
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
index 82d41519466..2367a38821b 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
@@ -24,9 +24,9 @@
 #include "olap/rowset/segment_v2/column_reader.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_map.h"
+#include "vec/columns/column_nothing.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_object.h"
-#include "vec/columns/column_nothing.h"
 #include "vec/common/assert_cast.h"
 #include "vec/common/schema_util.h"
 #include "vec/data_types/data_type.h"
@@ -427,6 +427,14 @@ Status 
HierarchicalDataReader::_init_null_map_and_clear_columns(
             dst_null_map.insert_range_from(src_null_map, 0, 
src_null_map.size());
             // clear nullmap and inner data
             src_null_map.clear();
+        } else {
+            if (dst->is_nullable()) {
+                // No nullable info exist in hirearchical data, fill nullmap 
with all none null
+                ColumnUInt8& dst_null_map =
+                        
assert_cast<ColumnNullable&>(*dst).get_null_map_column();
+                auto fake_nullable_column = ColumnUInt8::create(nrows, 0);
+                dst_null_map.insert_range_from(*fake_nullable_column, 0, 
nrows);
+            }
         }
         _root_reader->column->clear();
     } else {
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 2aeebb72fa7..90720c57235 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -2081,7 +2081,7 @@ Status ColumnObject::finalize(FinalizeMode mode) {
         for (size_t i = 0; i < std::min(size_t(_max_subcolumns_count), 
sorted_by_size.size());
              ++i) {
             // if too many null values, then consider it as sparse column
-            if ((double) sorted_by_size[i].second < (double) num_rows * 0.95) {
+            if ((double)sorted_by_size[i].second < (double)num_rows * 0.95) {
                 continue;
             }
             selected_path.insert(sorted_by_size[i].first);
diff --git a/regression-test/data/variant_p0/compaction/test_compaction.out 
b/regression-test/data/variant_p0/compaction/test_compaction.out
index 8c7a4f7887d..a5c4281ee98 100644
Binary files a/regression-test/data/variant_p0/compaction/test_compaction.out 
and b/regression-test/data/variant_p0/compaction/test_compaction.out differ
diff --git a/regression-test/data/variant_p0/schema_change/schema_change.out 
b/regression-test/data/variant_p0/schema_change/schema_change.out
index d99799dc3a0..0a0b49a923d 100644
Binary files a/regression-test/data/variant_p0/schema_change/schema_change.out 
and b/regression-test/data/variant_p0/schema_change/schema_change.out differ
diff --git 
a/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out
 
b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out
index ff409b7405a..33c2d34685c 100644
Binary files 
a/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out
 and 
b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out
 differ
diff --git 
a/regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy 
b/regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy
index 3641264953c..3dbf4ae945b 100644
--- a/regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy
+++ b/regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy
@@ -150,8 +150,8 @@ suite("regression_test_variant_github_events_p2", 
"nonConcurrent,p2"){
     def table_name = "github_events"
     sql """DROP TABLE IF EXISTS ${table_name}"""
     table_name = "github_events"
-    // int rand_subcolumns_count = Math.floor(Math.random() * (611 - 511 + 1)) 
+ 511
-    int rand_subcolumns_count = 10;
+    int rand_subcolumns_count = Math.floor(Math.random() * (611 - 511 + 1)) + 
511
+    // int rand_subcolumns_count = 0;
     sql """
         CREATE TABLE IF NOT EXISTS ${table_name} (
             k bigint,
@@ -206,14 +206,8 @@ suite("regression_test_variant_github_events_p2", 
"nonConcurrent,p2"){
             v variant not null
         )
         UNIQUE KEY(`k`)
-<<<<<<< HEAD:regression-test/suites/variant_github_events_p2/load.groovy
         DISTRIBUTED BY HASH(k) BUCKETS 4 
-        properties("replication_num" = "1", "disable_auto_compaction" = 
"false", "variant_enable_flatten_nested" = "false",
-                            "variant_max_subcolumns_count" = 
"${rand_subcolumns_count}");
-=======
-        DISTRIBUTED BY HASH(k) BUCKETS 4
-        properties("replication_num" = "1", "disable_auto_compaction" = 
"false", "bloom_filter_columns" = "v", "variant_enable_flatten_nested" = 
"true");
->>>>>>> 
upstream-apache/master:regression-test/suites/variant_github_events_nonConcurrent_p2/load.groovy
+        properties("replication_num" = "1", "disable_auto_compaction" = 
"false", "variant_enable_flatten_nested" = "false", "bloom_filter_columns" = 
"v", "variant_max_subcolumns_count" = "${rand_subcolumns_count}");
         """
     sql """insert into github_events2 select * from github_events order by k"""
     sql """select v['payload']['commits'] from github_events order by k ;"""
diff --git 
a/regression-test/suites/variant_p0/compaction/test_compaction.groovy 
b/regression-test/suites/variant_p0/compaction/test_compaction.groovy
index 2bc61a38057..69747015320 100644
--- a/regression-test/suites/variant_p0/compaction/test_compaction.groovy
+++ b/regression-test/suites/variant_p0/compaction/test_compaction.groovy
@@ -82,7 +82,7 @@ suite("test_compaction_variant") {
             qt_sql_1 "SELECT * FROM ${tableName} ORDER BY k, cast(v as 
string); "
             qt_sql_2 "select k, cast(v['a'] as array<int>) from  ${tableName} 
where  size(cast(v['a'] as array<int>)) > 0 order by k"
             qt_sql_3 "select k, v['a'], cast(v['b'] as string) from  
${tableName} where  length(cast(v['b'] as string)) > 4 order  by k"
-            qt_sql_5 "select cast(v['b'] as string), cast(v['b']['c'] as 
string) from  ${tableName} where cast(v['b'] as string) != 'null' or 
cast(v['b'] as string) != '{}' order by k desc, 1, 2 limit 10;"
+            qt_sql_5 "select cast(v['b'] as string), cast(v['b']['c'] as 
string) from  ${tableName} where cast(v['b'] as string) != 'null' and 
cast(v['b'] as string) != '{}' order by k desc, 1, 2 limit 10;"
 
 
             
//TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,QueryHits,PathHash,MetaUrl,CompactionStatus
diff --git a/regression-test/suites/variant_p0/nested.groovy 
b/regression-test/suites/variant_p0/nested.groovy
index 97261545648..660d383977b 100644
--- a/regression-test/suites/variant_p0/nested.groovy
+++ b/regression-test/suites/variant_p0/nested.groovy
@@ -117,13 +117,8 @@ 
parallel_pipeline_task_num=7,profile_level=1,enable_pipeline_engine=true,enable_
                 properties("replication_num" = "1", "disable_auto_compaction" 
= "false", "enable_unique_key_merge_on_write" = "true", 
"variant_enable_flatten_nested" = "true", "variant_max_subcolumns_count" = "0");
             """
         sql """insert into var_nested2 select * from var_nested order by k 
limit 1024"""
-<<<<<<< HEAD
-        qt_sql """select  
/*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=true,enable_distinct_streaming_aggregation=true,parallel_fragment_exec_instance_num=5,parallel_pipeline_task_num=1,profile_level=1,enable_pipeline_engine=false,enable_parallel_scan=true,parallel_scan_max_scanners_count=48,parallel_scan_min_rows_per_scanner=16384,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=12,enable_parallel_res
 [...]
+        qt_sql """select   
/*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=true,enable_distinct_streaming_aggregation=true,parallel_pipeline_task_num=1,profile_level=1,enable_pipeline_engine=false,enable_parallel_scan=true,parallel_scan_max_scanners_count=48,parallel_scan_min_rows_per_scanner=16384,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=12,enable_parallel_result_sink=false,enable_nereids_planner
 [...]
         qt_sql """select v['nested'] from var_nested2 where k < 10 and 
length(v['nested']) > 3 order by k limit 10;"""
-=======
-        qt_sql """select  
/*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=true,enable_distinct_streaming_aggregation=true,parallel_pipeline_task_num=1,profile_level=1,enable_pipeline_engine=false,enable_parallel_scan=true,parallel_scan_max_scanners_count=48,parallel_scan_min_rows_per_scanner=16384,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=12,enable_parallel_result_sink=false,enable_nereids_planner=
 [...]
-        qt_sql """select v['nested'] from var_nested2 where k < 10 order by k 
limit 10;"""
->>>>>>> upstream-apache/master
         // 0. nomal explode variant array
         order_qt_explode_sql """select count(),cast(vv['xx'] as int) from 
var_nested lateral view explode_variant_array(v['nested']) tmp as vv where 
vv['xx'] = 10 group by cast(vv['xx'] as int)"""
         sql """truncate table var_nested2"""
diff --git 
a/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy
 
b/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy
index 06813514808..ea5cf37e3de 100644
--- 
a/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy
+++ 
b/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-suite("test_array_contains_with_inverted_index"){
+suite("test_array_contains_with_inverted_index") {
     // prepare test table
     def indexTblName = "tai"
     sql "set disable_inverted_index_v1_for_variant = false"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to