This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fa40415f8ff [Fix](Variant) Fix variant compaction empty path handling
(#59544)
fa40415f8ff is described below
commit fa40415f8ffcd40abe41a28aa6175faaedc82033
Author: lihangyu <[email protected]>
AuthorDate: Mon Jan 5 11:18:57 2026 +0800
[Fix](Variant) Fix variant compaction empty path handling (#59544)
- Skip VARIANT root node when building sparse merge readers
---
.../segment_v2/variant/variant_column_reader.cpp | 5 +
.../test_variant_compaction_empty_path_bug.out | 80 ++++++++++++
.../test_variant_compaction_empty_path_bug.groovy | 142 +++++++++++++++++++++
3 files changed, 227 insertions(+)
diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
index 1e411dec498..a8a7a82dc5c 100644
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
@@ -327,6 +327,11 @@ Status
VariantColumnReader::_create_sparse_merge_reader(ColumnIteratorUPtr* iter
// Build substream reader tree for merging subcolumns into sparse column
SubstreamReaderTree src_subcolumns_for_sparse;
for (const auto& subcolumn_reader : *_subcolumns_meta_info) {
+ // NOTE: Skip the root node (empty parts). Do NOT skip "empty key"
subcolumns where
+ // path.get_path() may also be "" but parts are not empty. Otherwise
v[''] data will be lost.
+ if (subcolumn_reader->path.empty()) {
+ continue;
+ }
const auto& path = subcolumn_reader->path.get_path();
if (path_set_info.sparse_path_set.find(StringRef(path)) ==
path_set_info.sparse_path_set.end()) {
diff --git
a/regression-test/data/variant_p0/test_variant_compaction_empty_path_bug.out
b/regression-test/data/variant_p0/test_variant_compaction_empty_path_bug.out
new file mode 100644
index 00000000000..5a11b56fdc7
--- /dev/null
+++ b/regression-test/data/variant_p0/test_variant_compaction_empty_path_bug.out
@@ -0,0 +1,80 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !before_compaction --
+1 {"a":1,"b":2,"c":3}
+2 {"a":10,"b":20,"c":30}
+3 {"a":100,"b":200,"c":300}
+4 {"a":1,"d":4,"e":5,"f":6}
+5 {"b":2,"d":40,"e":50,"f":60}
+6 {"c":3,"d":400,"e":500,"f":600}
+7 {"a":7,"g":70,"h":700}
+8 {"b":8,"g":80,"h":800}
+9 {"c":9,"g":90,"h":900}
+10 {"":"empty_key_value","a":1000}
+11 {"":"empty_key_value2","b":2000}
+12 {"":"empty_key_value3","c":3000}
+13 {"a":13,"d":130}
+14 {"b":14,"e":140}
+15 {"c":15,"f":150}
+16 {"d":16,"g":160}
+17 {"e":17,"h":170}
+18 {"a":180,"f":18}
+
+-- !col_a_before --
+1 1
+2 10
+3 100
+4 1
+7 7
+10 1000
+13 13
+18 180
+
+-- !col_d_before --
+4 4
+5 40
+6 400
+13 130
+16 16
+
+-- !after_compaction --
+1 {"a":1,"b":2,"c":3}
+2 {"a":10,"b":20,"c":30}
+3 {"a":100,"b":200,"c":300}
+4 {"a":1,"d":4,"e":5,"f":6}
+5 {"b":2,"d":40,"e":50,"f":60}
+6 {"c":3,"d":400,"e":500,"f":600}
+7 {"a":7,"g":70,"h":700}
+8 {"b":8,"g":80,"h":800}
+9 {"c":9,"g":90,"h":900}
+10 {"":"empty_key_value","a":1000}
+11 {"":"empty_key_value2","b":2000}
+12 {"":"empty_key_value3","c":3000}
+13 {"a":13,"d":130}
+14 {"b":14,"e":140}
+15 {"c":15,"f":150}
+16 {"d":16,"g":160}
+17 {"e":17,"h":170}
+18 {"a":180,"f":18}
+
+-- !col_a_after --
+1 1
+2 10
+3 100
+4 1
+7 7
+10 1000
+13 13
+18 180
+
+-- !col_d_after --
+4 4
+5 40
+6 400
+13 130
+16 16
+
+-- !empty_key --
+10 empty_key_value
+11 empty_key_value2
+12 empty_key_value3
+
diff --git
a/regression-test/suites/variant_p0/test_variant_compaction_empty_path_bug.groovy
b/regression-test/suites/variant_p0/test_variant_compaction_empty_path_bug.groovy
new file mode 100644
index 00000000000..66af55ac3b6
--- /dev/null
+++
b/regression-test/suites/variant_p0/test_variant_compaction_empty_path_bug.groovy
@@ -0,0 +1,142 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_variant_compaction_empty_path_bug", "nonConcurrent") {
+ def tableName = "test_variant_empty_path_compaction"
+
+ try {
+ sql "DROP TABLE IF EXISTS ${tableName}"
+
+ // Create table with variant column
+ // Set variant_max_subcolumns_count to 3, so any columns beyond the
top 3 will become sparse
+ // This triggers the sparse column merge logic during compaction
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ k bigint,
+ v variant< properties("variant_max_subcolumns_count" = "3")>
+ )
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 1
+ properties("replication_num" = "1", "disable_auto_compaction" =
"true");
+ """
+
+ logger.info("Testing variant compaction with empty path in sparse
columns")
+
+ // Insert data with multiple different subcolumns
+ // Strategy: Insert 6+ different subcolumns to exceed the limit of 3
+ // The most frequently used 3 columns will be materialized, others
will be sparse
+
+ // First batch: establish column usage patterns
+ sql """INSERT INTO ${tableName} VALUES
+ (1, '{"a": 1, "b": 2, "c": 3}'),
+ (2, '{"a": 10, "b": 20, "c": 30}'),
+ (3, '{"a": 100, "b": 200, "c": 300}')
+ """
+
+ // Second batch: introduce additional columns that will become sparse
+ sql """INSERT INTO ${tableName} VALUES
+ (4, '{"a": 1, "d": 4, "e": 5, "f": 6}'),
+ (5, '{"b": 2, "d": 40, "e": 50, "f": 60}'),
+ (6, '{"c": 3, "d": 400, "e": 500, "f": 600}')
+ """
+
+ // Third batch: more sparse columns
+ sql """INSERT INTO ${tableName} VALUES
+ (7, '{"a": 7, "g": 70, "h": 700}'),
+ (8, '{"b": 8, "g": 80, "h": 800}'),
+ (9, '{"c": 9, "g": 90, "h": 900}')
+ """
+
+ // Fourth batch: edge case - JSON with empty key
+ // This creates a scenario where statistics might contain empty path
+ sql """INSERT INTO ${tableName} VALUES
+ (10, '{"": "empty_key_value", "a": 1000}'),
+ (11, '{"": "empty_key_value2", "b": 2000}'),
+ (12, '{"": "empty_key_value3", "c": 3000}')
+ """
+
+ // Additional inserts to create more rowsets for compaction
+ sql """INSERT INTO ${tableName} VALUES
+ (13, '{"a": 13, "d": 130}'),
+ (14, '{"b": 14, "e": 140}'),
+ (15, '{"c": 15, "f": 150}')
+ """
+
+ sql """INSERT INTO ${tableName} VALUES
+ (16, '{"d": 16, "g": 160}'),
+ (17, '{"e": 17, "h": 170}'),
+ (18, '{"f": 18, "a": 180}')
+ """
+
+ // Verify data before compaction
+ def count_before = sql "SELECT COUNT(*) FROM ${tableName}"
+ logger.info("Row count before compaction: ${count_before[0][0]}")
+ assertEquals(18, count_before[0][0])
+
+ // Query to verify data integrity before compaction
+ qt_before_compaction "SELECT k, cast(v as string) FROM ${tableName}
ORDER BY k"
+
+ // Test specific column access
+ qt_col_a_before "SELECT k, v['a'] FROM ${tableName} WHERE v['a'] IS
NOT NULL ORDER BY k"
+ qt_col_d_before "SELECT k, v['d'] FROM ${tableName} WHERE v['d'] IS
NOT NULL ORDER BY k"
+
+ logger.info("Data inserted, now triggering compaction...")
+ logger.info("Expected behavior: columns a,b,c materialized, d,e,f,g,h
as sparse")
+ logger.info("Bug scenario: if root node (empty path) is not skipped in
_create_sparse_merge_reader")
+ logger.info(" it will call
VariantColumnReader::new_iterator with 3 params")
+ logger.info(" which returns NOT_IMPLEMENTED_ERROR")
+
+ // Trigger compaction - this may reproduce the NOT_IMPLEMENTED_ERROR
bug
+ def tablets = sql_return_maparray "SHOW TABLETS FROM ${tableName}"
+
+ try {
+ trigger_and_wait_compaction(tableName, "cumulative")
+ logger.info("Compaction completed successfully")
+
+ // Verify data after compaction
+ def count_after = sql "SELECT COUNT(*) FROM ${tableName}"
+ logger.info("Row count after compaction: ${count_after[0][0]}")
+ assertEquals(18, count_after[0][0])
+
+ // Query to verify data integrity after compaction
+ qt_after_compaction "SELECT k, cast(v as string) FROM ${tableName}
ORDER BY k"
+
+ // Test specific column access after compaction
+ qt_col_a_after "SELECT k, v['a'] FROM ${tableName} WHERE v['a'] IS
NOT NULL ORDER BY k"
+ qt_col_d_after "SELECT k, v['d'] FROM ${tableName} WHERE v['d'] IS
NOT NULL ORDER BY k"
+
+ // Test empty key access if supported
+ qt_empty_key "SELECT k, v[''] FROM ${tableName} WHERE v[''] IS NOT
NULL ORDER BY k"
+
+ } catch (Exception e) {
+ logger.error("Compaction failed with error: ${e.getMessage()}", e)
+
+ // Check if the error is the expected NOT_IMPLEMENTED_ERROR
+ if (e.getMessage().contains("NOT_IMPLEMENTED_ERROR") ||
+ e.getMessage().contains("Not implemented")) {
+ logger.error("BUG REPRODUCED: Compaction failed with
NOT_IMPLEMENTED_ERROR")
+ throw e
+ } else {
+ // Different error, rethrow
+ throw e
+ }
+ }
+
+ } finally {
+ sql "DROP TABLE IF EXISTS ${tableName}"
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]