This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fa40415f8ff [Fix](Variant) Fix variant compaction empty path handling 
(#59544)
fa40415f8ff is described below

commit fa40415f8ffcd40abe41a28aa6175faaedc82033
Author: lihangyu <[email protected]>
AuthorDate: Mon Jan 5 11:18:57 2026 +0800

    [Fix](Variant) Fix variant compaction empty path handling (#59544)
    
    - Skip VARIANT root node when building sparse merge readers
---
 .../segment_v2/variant/variant_column_reader.cpp   |   5 +
 .../test_variant_compaction_empty_path_bug.out     |  80 ++++++++++++
 .../test_variant_compaction_empty_path_bug.groovy  | 142 +++++++++++++++++++++
 3 files changed, 227 insertions(+)

diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp 
b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
index 1e411dec498..a8a7a82dc5c 100644
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
@@ -327,6 +327,11 @@ Status 
VariantColumnReader::_create_sparse_merge_reader(ColumnIteratorUPtr* iter
     // Build substream reader tree for merging subcolumns into sparse column
     SubstreamReaderTree src_subcolumns_for_sparse;
     for (const auto& subcolumn_reader : *_subcolumns_meta_info) {
+        // NOTE: Skip the root node (empty parts). Do NOT skip "empty key" 
subcolumns where
+        // path.get_path() may also be "" but parts are not empty. Otherwise 
v[''] data will be lost.
+        if (subcolumn_reader->path.empty()) {
+            continue;
+        }
         const auto& path = subcolumn_reader->path.get_path();
         if (path_set_info.sparse_path_set.find(StringRef(path)) ==
             path_set_info.sparse_path_set.end()) {
diff --git 
a/regression-test/data/variant_p0/test_variant_compaction_empty_path_bug.out 
b/regression-test/data/variant_p0/test_variant_compaction_empty_path_bug.out
new file mode 100644
index 00000000000..5a11b56fdc7
--- /dev/null
+++ b/regression-test/data/variant_p0/test_variant_compaction_empty_path_bug.out
@@ -0,0 +1,80 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !before_compaction --
+1      {"a":1,"b":2,"c":3}
+2      {"a":10,"b":20,"c":30}
+3      {"a":100,"b":200,"c":300}
+4      {"a":1,"d":4,"e":5,"f":6}
+5      {"b":2,"d":40,"e":50,"f":60}
+6      {"c":3,"d":400,"e":500,"f":600}
+7      {"a":7,"g":70,"h":700}
+8      {"b":8,"g":80,"h":800}
+9      {"c":9,"g":90,"h":900}
+10     {"":"empty_key_value","a":1000}
+11     {"":"empty_key_value2","b":2000}
+12     {"":"empty_key_value3","c":3000}
+13     {"a":13,"d":130}
+14     {"b":14,"e":140}
+15     {"c":15,"f":150}
+16     {"d":16,"g":160}
+17     {"e":17,"h":170}
+18     {"a":180,"f":18}
+
+-- !col_a_before --
+1      1
+2      10
+3      100
+4      1
+7      7
+10     1000
+13     13
+18     180
+
+-- !col_d_before --
+4      4
+5      40
+6      400
+13     130
+16     16
+
+-- !after_compaction --
+1      {"a":1,"b":2,"c":3}
+2      {"a":10,"b":20,"c":30}
+3      {"a":100,"b":200,"c":300}
+4      {"a":1,"d":4,"e":5,"f":6}
+5      {"b":2,"d":40,"e":50,"f":60}
+6      {"c":3,"d":400,"e":500,"f":600}
+7      {"a":7,"g":70,"h":700}
+8      {"b":8,"g":80,"h":800}
+9      {"c":9,"g":90,"h":900}
+10     {"":"empty_key_value","a":1000}
+11     {"":"empty_key_value2","b":2000}
+12     {"":"empty_key_value3","c":3000}
+13     {"a":13,"d":130}
+14     {"b":14,"e":140}
+15     {"c":15,"f":150}
+16     {"d":16,"g":160}
+17     {"e":17,"h":170}
+18     {"a":180,"f":18}
+
+-- !col_a_after --
+1      1
+2      10
+3      100
+4      1
+7      7
+10     1000
+13     13
+18     180
+
+-- !col_d_after --
+4      4
+5      40
+6      400
+13     130
+16     16
+
+-- !empty_key --
+10     empty_key_value
+11     empty_key_value2
+12     empty_key_value3
+
diff --git 
a/regression-test/suites/variant_p0/test_variant_compaction_empty_path_bug.groovy
 
b/regression-test/suites/variant_p0/test_variant_compaction_empty_path_bug.groovy
new file mode 100644
index 00000000000..66af55ac3b6
--- /dev/null
+++ 
b/regression-test/suites/variant_p0/test_variant_compaction_empty_path_bug.groovy
@@ -0,0 +1,142 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_variant_compaction_empty_path_bug", "nonConcurrent") {
+    def tableName = "test_variant_empty_path_compaction"
+
+    try {
+        sql "DROP TABLE IF EXISTS ${tableName}"
+
+        // Create table with variant column
+        // Set variant_max_subcolumns_count to 3, so any columns beyond the 
top 3 will become sparse
+        // This triggers the sparse column merge logic during compaction
+        sql """
+            CREATE TABLE IF NOT EXISTS ${tableName} (
+                k bigint,
+                v variant< properties("variant_max_subcolumns_count" = "3")>
+            )
+            DUPLICATE KEY(`k`)
+            DISTRIBUTED BY HASH(k) BUCKETS 1
+            properties("replication_num" = "1", "disable_auto_compaction" = 
"true");
+        """
+
+        logger.info("Testing variant compaction with empty path in sparse 
columns")
+
+        // Insert data with multiple different subcolumns
+        // Strategy: Insert 6+ different subcolumns to exceed the limit of 3
+        // The most frequently used 3 columns will be materialized, others 
will be sparse
+
+        // First batch: establish column usage patterns
+        sql """INSERT INTO ${tableName} VALUES
+            (1, '{"a": 1, "b": 2, "c": 3}'),
+            (2, '{"a": 10, "b": 20, "c": 30}'),
+            (3, '{"a": 100, "b": 200, "c": 300}')
+        """
+
+        // Second batch: introduce additional columns that will become sparse
+        sql """INSERT INTO ${tableName} VALUES
+            (4, '{"a": 1, "d": 4, "e": 5, "f": 6}'),
+            (5, '{"b": 2, "d": 40, "e": 50, "f": 60}'),
+            (6, '{"c": 3, "d": 400, "e": 500, "f": 600}')
+        """
+
+        // Third batch: more sparse columns
+        sql """INSERT INTO ${tableName} VALUES
+            (7, '{"a": 7, "g": 70, "h": 700}'),
+            (8, '{"b": 8, "g": 80, "h": 800}'),
+            (9, '{"c": 9, "g": 90, "h": 900}')
+        """
+
+        // Fourth batch: edge case - JSON with empty key
+        // This creates a scenario where statistics might contain empty path
+        sql """INSERT INTO ${tableName} VALUES
+            (10, '{"": "empty_key_value", "a": 1000}'),
+            (11, '{"": "empty_key_value2", "b": 2000}'),
+            (12, '{"": "empty_key_value3", "c": 3000}')
+        """
+
+        // Additional inserts to create more rowsets for compaction
+        sql """INSERT INTO ${tableName} VALUES
+            (13, '{"a": 13, "d": 130}'),
+            (14, '{"b": 14, "e": 140}'),
+            (15, '{"c": 15, "f": 150}')
+        """
+
+        sql """INSERT INTO ${tableName} VALUES
+            (16, '{"d": 16, "g": 160}'),
+            (17, '{"e": 17, "h": 170}'),
+            (18, '{"f": 18, "a": 180}')
+        """
+
+        // Verify data before compaction
+        def count_before = sql "SELECT COUNT(*) FROM ${tableName}"
+        logger.info("Row count before compaction: ${count_before[0][0]}")
+        assertEquals(18, count_before[0][0])
+
+        // Query to verify data integrity before compaction
+        qt_before_compaction "SELECT k, cast(v as string) FROM ${tableName} 
ORDER BY k"
+
+        // Test specific column access
+        qt_col_a_before "SELECT k, v['a'] FROM ${tableName} WHERE v['a'] IS 
NOT NULL ORDER BY k"
+        qt_col_d_before "SELECT k, v['d'] FROM ${tableName} WHERE v['d'] IS 
NOT NULL ORDER BY k"
+
+        logger.info("Data inserted, now triggering compaction...")
+        logger.info("Expected behavior: columns a,b,c materialized, d,e,f,g,h 
as sparse")
+        logger.info("Bug scenario: if root node (empty path) is not skipped in 
_create_sparse_merge_reader")
+        logger.info("             it will call 
VariantColumnReader::new_iterator with 3 params")
+        logger.info("             which returns NOT_IMPLEMENTED_ERROR")
+
+        // Trigger compaction - this may reproduce the NOT_IMPLEMENTED_ERROR 
bug
+        def tablets = sql_return_maparray "SHOW TABLETS FROM ${tableName}"
+
+        try {
+            trigger_and_wait_compaction(tableName, "cumulative")
+            logger.info("Compaction completed successfully")
+
+            // Verify data after compaction
+            def count_after = sql "SELECT COUNT(*) FROM ${tableName}"
+            logger.info("Row count after compaction: ${count_after[0][0]}")
+            assertEquals(18, count_after[0][0])
+
+            // Query to verify data integrity after compaction
+            qt_after_compaction "SELECT k, cast(v as string) FROM ${tableName} 
ORDER BY k"
+
+            // Test specific column access after compaction
+            qt_col_a_after "SELECT k, v['a'] FROM ${tableName} WHERE v['a'] IS 
NOT NULL ORDER BY k"
+            qt_col_d_after "SELECT k, v['d'] FROM ${tableName} WHERE v['d'] IS 
NOT NULL ORDER BY k"
+
+            // Test empty key access if supported
+            qt_empty_key "SELECT k, v[''] FROM ${tableName} WHERE v[''] IS NOT 
NULL ORDER BY k"
+
+        } catch (Exception e) {
+            logger.error("Compaction failed with error: ${e.getMessage()}", e)
+
+            // Check if the error is the expected NOT_IMPLEMENTED_ERROR
+            if (e.getMessage().contains("NOT_IMPLEMENTED_ERROR") ||
+                e.getMessage().contains("Not implemented")) {
+                logger.error("BUG REPRODUCED: Compaction failed with 
NOT_IMPLEMENTED_ERROR")
+                throw e
+            } else {
+                // Different error, rethrow
+                throw e
+            }
+        }
+
+    } finally {
+        sql "DROP TABLE IF EXISTS ${tableName}"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to