This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 35089583cf8 [fix](variant) Handle truncated sparse path stats when
reading variant (#64205)
35089583cf8 is described below
commit 35089583cf80e5117fc4cb2917b1daf8ce38a7e2
Author: lihangyu <[email protected]>
AuthorDate: Wed Jun 17 11:05:51 2026 +0800
[fix](variant) Handle truncated sparse path stats when reading variant
(#64205)
---
.../segment/variant/variant_column_reader.cpp | 17 +++++++++------
.../apache/doris/common/util/PropertyAnalyzer.java | 8 ++++----
.../java/org/apache/doris/qe/SessionVariable.java | 9 ++++++++
.../apache/doris/common/PropertyAnalyzerTest.java | 22 ++++++++++++++++++++
.../ddl/create_nestedtypes_with_schemachange.out | 12 +++++------
.../test_variant_compaction_with_sparse_limit.out | 6 +++---
.../create_nestedtypes_with_schemachange.groovy | 2 +-
...est_variant_compaction_with_sparse_limit.groovy | 24 ++++++++++++++++++++--
8 files changed, 78 insertions(+), 22 deletions(-)
diff --git a/be/src/storage/segment/variant/variant_column_reader.cpp
b/be/src/storage/segment/variant/variant_column_reader.cpp
index ded364b33c2..df9561ada7c 100644
--- a/be/src/storage/segment/variant/variant_column_reader.cpp
+++ b/be/src/storage/segment/variant/variant_column_reader.cpp
@@ -192,9 +192,9 @@ bool VariantColumnReader::is_exceeded_sparse_column_limit()
const {
}
bool VariantColumnReader::_is_exceeded_sparse_column_limit_unlocked() const {
- bool exceeded_sparse_column_limit =
!_statistics->sparse_column_non_null_size.empty() &&
-
_statistics->sparse_column_non_null_size.size() >=
-
_variant_sparse_column_statistics_size;
+ const bool exceeded_sparse_column_limit =
!_statistics->sparse_column_non_null_size.empty() &&
+
_statistics->sparse_column_non_null_size.size() >=
+
_variant_sparse_column_statistics_size;
DBUG_EXECUTE_IF("exceeded_sparse_column_limit_must_be_false", {
if (exceeded_sparse_column_limit) {
throw doris::Exception(
@@ -882,8 +882,12 @@ Status VariantColumnReader::_build_read_plan(ReadPlan*
plan, const TabletColumn&
}
// Check if path is prefix, example sparse columns path: a.b.c, a.b.e,
access prefix: a.b.
- // Or access root path
- if (_has_prefix_path_unlocked(relative_path)) {
+ // Or access root path. If sparse stats reached the configured limit, an
exact sparse path can
+ // still have unrecorded sparse children such as a.b.c.
+ const bool has_prefix_path = _has_prefix_path_unlocked(relative_path);
+ const bool sparse_stats_may_have_unrecorded_children =
+ exceeded_sparse_column_limit && existed_in_sparse_column;
+ if (has_prefix_path || sparse_stats_may_have_unrecorded_children) {
// Example {"b" : {"c":456,"e":7.111}}
// b.c is sparse column, b.e is subcolumn, so b is both the prefix of
sparse column and
// subcolumn
@@ -951,7 +955,8 @@ Status VariantColumnReader::_build_read_plan(ReadPlan*
plan, const TabletColumn&
}
if (exceeded_sparse_column_limit) {
- // maybe exist prefix path in sparse column
+ // Sparse stats are truncated, so a missing exact sparse path does
not prove that the
+ // path is absent. It may still be nested under a recorded sparse
object.
plan->kind = ReadKind::HIERARCHICAL;
plan->type = create_variant_type(target_col);
plan->relative_path = relative_path;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
index 2e776e264bf..b27db96bbe1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
@@ -2118,12 +2118,12 @@ public class PropertyAnalyzer {
properties.get(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE);
try {
maxSparseColumnStatisticsSize =
Integer.parseInt(maxSparseColumnStatisticsSizeStr);
- if (maxSparseColumnStatisticsSize < 0 ||
maxSparseColumnStatisticsSize > 50000) {
- throw new
AnalysisException("variant_max_sparse_column_statistics_size must between 0 and
50000 ");
- }
- } catch (Exception e) {
+ } catch (NumberFormatException e) {
throw new
AnalysisException("variant_max_sparse_column_statistics_size format error:" +
e.getMessage());
}
+ if (maxSparseColumnStatisticsSize < 1 ||
maxSparseColumnStatisticsSize > 50000) {
+ throw new
AnalysisException("variant_max_sparse_column_statistics_size must between 1 and
50000 ");
+ }
properties.remove(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index c9a17bc7527..5eacd86e5b0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -3597,6 +3597,7 @@ public class SessionVariable implements Serializable,
Writable {
@VarAttrDef.VarAttr(
name = DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE,
needForward = true,
+ checker = "checkDefaultVariantMaxSparseColumnStatisticsSize",
fuzzy = true
)
public int defaultVariantMaxSparseColumnStatisticsSize = 10000;
@@ -6446,6 +6447,14 @@ public class SessionVariable implements Serializable,
Writable {
}
}
+ public void checkDefaultVariantMaxSparseColumnStatisticsSize(String
variantMaxSparseColumnStatisticsSize) {
+ int value = Integer.valueOf(variantMaxSparseColumnStatisticsSize);
+ if (value < 1 || value > 50000) {
+ throw new UnsupportedOperationException("variant max sparse column
statistics size is: "
+ + variantMaxSparseColumnStatisticsSize + " it must between
1 and 50000");
+ }
+ }
+
public void checkHnswEfSearch(String efSearch) {
int value = Integer.valueOf(efSearch);
if (value < 1) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
index bd821e4d377..81878c9b1a3 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
@@ -28,6 +28,7 @@ import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.util.PropertyAnalyzer;
import org.apache.doris.common.util.TimeUtils;
+import org.apache.doris.qe.SessionVariable;
import org.apache.doris.resource.Tag;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
import org.apache.doris.thrift.TStorageFormat;
@@ -345,6 +346,19 @@ public class PropertyAnalyzerTest {
@Test
public void testAnalyzeVariantMaxSparseColumnStatisticsSize() throws
AnalysisException {
Map<String, String> properties = Maps.newHashMap();
+
properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE,
"0");
+ try {
+
PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0);
+ Assertions.fail("Expected AnalysisException was not thrown");
+ } catch (AnalysisException e) {
+ Assertions.assertNotNull(e.getMessage());
+ }
+ properties.clear();
+
properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE,
"1");
+ Assertions.assertEquals(1,
PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0));
+ Assertions.assertFalse(properties.containsKey(
+
PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE));
+ properties.clear();
properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE,
"-1");
try {
PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0);
@@ -370,6 +384,14 @@ public class PropertyAnalyzerTest {
}
}
+ @Test
+ public void testCheckDefaultVariantMaxSparseColumnStatisticsSize() {
+ SessionVariable sessionVariable = new SessionVariable();
+ Assertions.assertThrows(UnsupportedOperationException.class,
+ () ->
sessionVariable.checkDefaultVariantMaxSparseColumnStatisticsSize("0"));
+ sessionVariable.checkDefaultVariantMaxSparseColumnStatisticsSize("1");
+ }
+
@Test
public void testAnalyzeSequenceMap() throws AnalysisException {
List<Column> columns = Lists.newArrayList();
diff --git
a/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
b/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
index 86cce3569f7..725ed40d971 100644
---
a/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
+++
b/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
@@ -53,7 +53,7 @@ col2 int No false \N NONE
col3 array<int> Yes false \N NONE
col4 map<int,int> Yes false \N NONE
col5 struct<f1:int> Yes false \N NONE
-col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"0","variant_sparse_hash_shard_count" = "1")> Yes false \N NONE
+col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"1","variant_sparse_hash_shard_count" = "1")> Yes false \N NONE
-- !sql_after --
1 2 [1, 2] {1:2} {"f1":1} {"a":[1, 2, 3]}
@@ -67,7 +67,7 @@ col2 int No false \N NONE
col3 array<int> Yes false \N NONE
col4 map<int,int> No false \N NONE
col5 struct<f1:int> No false \N NONE
-col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"0","variant_sparse_hash_shard_count" = "1")> No false \N NONE
+col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"1","variant_sparse_hash_shard_count" = "1")> No false \N NONE
-- !sql_after --
1 2 [1, 2] {1:2} {"f1":1} {"a":[1, 2, 3]}
@@ -81,7 +81,7 @@ col2 int No false \N NONE
col3 array<int> Yes false \N NONE
col4 map<int,int> Yes false \N NONE
col5 struct<f1:int> Yes false \N NONE
-col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"0","variant_sparse_hash_shard_count" = "1")> Yes false \N NONE
+col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"1","variant_sparse_hash_shard_count" = "1")> Yes false \N NONE
-- !sql_after --
1 2 [1, 2] {1:2} {"f1":1} {"a":[1, 2, 3]}
@@ -95,7 +95,7 @@ col2 int No false \N NONE
col3 array<int> No false \N NONE
col4 map<int,int> Yes false \N NONE
col5 struct<f1:int> No false \N NONE
-col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"0","variant_sparse_hash_shard_count" = "1")> No false \N NONE
+col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"1","variant_sparse_hash_shard_count" = "1")> No false \N NONE
-- !sql_after --
1 2 [1, 2] {1:2} {"f1":1} {"a":[1, 2, 3]}
@@ -109,7 +109,7 @@ col2 int No false \N NONE
col3 array<int> Yes false \N NONE
col4 map<int,int> Yes false \N NONE
col5 struct<f1:int> Yes false \N NONE
-col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"0","variant_sparse_hash_shard_count" = "1")> Yes false \N NONE
+col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"1","variant_sparse_hash_shard_count" = "1")> Yes false \N NONE
-- !sql_after --
1 2 [1, 2] {1:2} {"f1":1} {"a":[1, 2, 3]}
@@ -123,7 +123,7 @@ col2 int No false \N NONE
col3 array<int> No false \N NONE
col4 map<int,int> No false \N NONE
col5 struct<f1:int> Yes false \N NONE
-col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"0","variant_sparse_hash_shard_count" = "1")> No false \N NONE
+col6 variant<PROPERTIES ("variant_max_subcolumns_count" =
"0","variant_enable_typed_paths_to_sparse" =
"false","variant_max_sparse_column_statistics_size" =
"1","variant_sparse_hash_shard_count" = "1")> No false \N NONE
-- !sql_after --
1 2 [1, 2] {1:2} {"f1":1} {"a":[1, 2, 3]}
diff --git
a/regression-test/data/variant_p0/predefine/test_variant_compaction_with_sparse_limit.out
b/regression-test/data/variant_p0/predefine/test_variant_compaction_with_sparse_limit.out
index 42862a35f10..6caed66d80f 100644
---
a/regression-test/data/variant_p0/predefine/test_variant_compaction_with_sparse_limit.out
+++
b/regression-test/data/variant_p0/predefine/test_variant_compaction_with_sparse_limit.out
@@ -34,8 +34,8 @@
16 {"a":"1223"}
17 {"a":[1]}
17 {"a":[1]}
-18 {"a":["1",2,1.1]}
-18 {"a":["1",2,1.1]}
+18 {"a":["1", 2, 1.1]}
+18 {"a":["1", 2, 1.1]}
19 {"a":1,"b":{"c":1}}
19 {"a":1,"b":{"c":1}}
20 {"a":1,"b":{"c":[{"a":1}]}}
@@ -193,7 +193,7 @@
15 {"a":1}
16 {"a":"1223"}
17 {"a":[1]}
-18 {"a":["1",2,1.1]}
+18 {"a":["1", 2, 1.1]}
19 {"a":1,"b":{"c":1}}
20 {"a":1,"b":{"c":[{"a":1}]}}
21 {"a":1,"b":{"c":[{"a":1}]}}
diff --git
a/regression-test/suites/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.groovy
b/regression-test/suites/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.groovy
index 138c4914c3f..d4333ccb72f 100644
---
a/regression-test/suites/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.groovy
+++
b/regression-test/suites/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.groovy
@@ -20,7 +20,7 @@ suite("create_nestedtypes_with_schemachange", "p0") {
// create basic type
sql "set default_variant_max_subcolumns_count = 0"
sql "set default_variant_enable_typed_paths_to_sparse = false"
- sql "set default_variant_max_sparse_column_statistics_size = 0"
+ sql "set default_variant_max_sparse_column_statistics_size = 1"
sql "set default_variant_sparse_hash_shard_count = 0"
sql "set default_variant_enable_doc_mode = false"
sql "DROP TABLE IF EXISTS $testTablex"
diff --git
a/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy
b/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy
index 1a51f065c0b..5660171b5fa 100644
---
a/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy
+++
b/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy
@@ -41,7 +41,24 @@ suite("test_compaction_variant_predefine_with_sparse_limit",
"nonConcurrent") {
}
int max_sparse_column_statistics_size = 2
- def create_table = { tableName, buckets="auto", key_type="DUPLICATE" ->
+ test {
+ sql """ set default_variant_max_sparse_column_statistics_size = 0
"""
+ exception "variant max sparse column statistics size"
+ }
+ sql "DROP TABLE IF EXISTS variant_sparse_stats_zero"
+ test {
+ sql """
+ CREATE TABLE variant_sparse_stats_zero (
+ k bigint,
+ v variant
<properties("variant_max_sparse_column_statistics_size" = "0")>
+ )
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 1
+ properties("replication_num" = "1");
+ """
+ exception "variant_max_sparse_column_statistics_size must between
1 and 50000"
+ }
+ def create_table = { tableName, buckets="auto", key_type="DUPLICATE",
max_subcolumns_count=2048 ->
sql "DROP TABLE IF EXISTS ${tableName}"
def var_def = "variant <MATCH_NAME 'sala' : int, MATCH_NAME 'ddd'
: double, MATCH_NAME 'z' : double,
properties(\"variant_max_sparse_column_statistics_size\" =
\"${max_sparse_column_statistics_size}\")>"
if (key_type == "AGGREGATE") {
@@ -60,13 +77,16 @@
suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") {
def create_tbl_res = sql """ show create table ${tableName} """
logger.info("${create_tbl_res}")
assertTrue(create_tbl_res.toString().contains("variant_max_sparse_column_statistics_size"))
+
assertTrue(create_tbl_res.toString().contains("\"variant_max_subcolumns_count\"
= \"${max_subcolumns_count}\""))
}
def key_types = ["DUPLICATE", "UNIQUE", "AGGREGATE"]
// def key_types = ["AGGREGATE"]
for (int i = 0; i < key_types.size(); i++) {
+ def max_subcolumns_count = key_types[i] == "AGGREGATE" ? 2048 : 1
+ sql """ set default_variant_max_subcolumns_count =
${max_subcolumns_count} """
def tableName = "simple_variant_${key_types[i]}"
// 1. simple cases
- create_table.call(tableName, "1", key_types[i])
+ create_table.call(tableName, "1", key_types[i],
max_subcolumns_count)
def insert1 = {
sql """insert into ${tableName} values (1, '{"x" :
[1]}'),(13, '{"a" : 1}');"""
sql """insert into ${tableName} values (2, '{"a" :
"1"}'),(14, '{"a" : [[[1]]]}');"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]