This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new e21f0177c11 branch-4.0: [Improve](Config) add 
variant_max_json_key_length to limit variant json key length #60187 (#60219)
e21f0177c11 is described below

commit e21f0177c11349be23bd59ca816e6d9d340dddc2
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Jan 28 18:31:44 2026 +0800

    branch-4.0: [Improve](Config) add variant_max_json_key_length to limit 
variant json key length #60187 (#60219)
    
    Cherry-picked from #60187
    
    Co-authored-by: lihangyu <[email protected]>
---
 be/src/common/config.cpp               |  4 +++
 be/src/common/config.h                 |  2 ++
 be/src/vec/json/json_parser.cpp        | 18 +++++++++-----
 be/test/vec/jsonb/json_parser_test.cpp | 45 +++++++++++++++++++++++++++++++++-
 4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index b86b60ed14e..1c8003606cb 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1096,9 +1096,13 @@ DEFINE_mInt64(workload_group_scan_task_wait_timeout_ms, 
"10000");
 // Whether use schema dict in backend side instead of MetaService side(cloud 
mode)
 DEFINE_mBool(variant_use_cloud_schema_dict_cache, "true");
 DEFINE_mInt64(variant_threshold_rows_to_estimate_sparse_column, "2048");
+DEFINE_mInt32(variant_max_json_key_length, "255");
 DEFINE_mBool(variant_throw_exeception_on_invalid_json, "false");
 DEFINE_mBool(enable_vertical_compact_variant_subcolumns, "true");
 
+DEFINE_Validator(variant_max_json_key_length,
+                 [](const int config) -> bool { return config > 0 && config <= 
65535; });
+
 // block file cache
 DEFINE_Bool(enable_file_cache, "false");
 // format: 
[{"path":"/path/to/file_cache","total_size":21474836480,"query_limit":10737418240}]
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 47623a2b1a4..7ef737f2de6 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1335,6 +1335,8 @@ DECLARE_mBool(variant_use_cloud_schema_dict_cache);
 // Threshold to estimate a column is sparsed
 // Notice: TEST ONLY
 DECLARE_mInt64(variant_threshold_rows_to_estimate_sparse_column);
+// Max json key length in bytes when parsing json into variant 
subcolumns/jsonb.
+DECLARE_mInt32(variant_max_json_key_length);
 // Treat invalid json format str as string, instead of throwing exception if 
false
 DECLARE_mBool(variant_throw_exeception_on_invalid_json);
 // Enable vertical compact subcolumns of variant column
diff --git a/be/src/vec/json/json_parser.cpp b/be/src/vec/json/json_parser.cpp
index e4ad2bfe7c5..eb4d6c5e2b5 100644
--- a/be/src/vec/json/json_parser.cpp
+++ b/be/src/vec/json/json_parser.cpp
@@ -93,9 +93,12 @@ void JSONDataParser<ParserImpl>::traverseObject(const 
JSONObject& object, ParseC
     ctx.values.reserve(ctx.values.size() + object.size());
     for (auto it = object.begin(); it != object.end(); ++it) {
         const auto& [key, value] = *it;
-        if (key.size() >= std::numeric_limits<uint8_t>::max()) {
-            throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
-                                   "Key length exceeds maximum allowed size of 
255 bytes.");
+        const size_t max_key_length = 
cast_set<size_t>(config::variant_max_json_key_length);
+        if (key.size() > max_key_length) {
+            throw doris::Exception(
+                    doris::ErrorCode::INVALID_ARGUMENT,
+                    fmt::format("Key length exceeds maximum allowed size of {} 
bytes.",
+                                max_key_length));
         }
         ctx.builder.append(key, false);
         traverse(value, ctx);
@@ -133,9 +136,12 @@ void 
JSONDataParser<ParserImpl>::traverseObjectAsJsonb(const JSONObject& object,
     writer.writeStartObject();
     for (auto it = object.begin(); it != object.end(); ++it) {
         const auto& [key, value] = *it;
-        if (key.size() >= std::numeric_limits<uint8_t>::max()) {
-            throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
-                                   "Key length exceeds maximum allowed size of 
255 bytes.");
+        const size_t max_key_length = 
cast_set<size_t>(config::variant_max_json_key_length);
+        if (key.size() > max_key_length) {
+            throw doris::Exception(
+                    doris::ErrorCode::INVALID_ARGUMENT,
+                    fmt::format("Key length exceeds maximum allowed size of {} 
bytes.",
+                                max_key_length));
         }
         writer.writeKey(key.data(), cast_set<uint8_t>(key.size()));
         traverseAsJsonb(value, writer);
diff --git a/be/test/vec/jsonb/json_parser_test.cpp 
b/be/test/vec/jsonb/json_parser_test.cpp
index b878cc165f9..e4790f6786c 100644
--- a/be/test/vec/jsonb/json_parser_test.cpp
+++ b/be/test/vec/jsonb/json_parser_test.cpp
@@ -21,6 +21,7 @@
 
 #include <vector>
 
+#include "common/config.h"
 #include "vec/common/string_ref.h"
 
 using doris::vectorized::JSONDataParser;
@@ -430,4 +431,46 @@ TEST(JsonParserTest, ParseUInt64) {
     EXPECT_EQ(array_field_2.size(), 1);
     EXPECT_EQ(array_field_2[0].get_type(), 
doris::PrimitiveType::TYPE_LARGEINT);
     EXPECT_EQ(array_field_2[0].get<doris::PrimitiveType::TYPE_LARGEINT>(), 
18446744073709551615ULL);
-}
\ No newline at end of file
+}
+
+TEST(JsonParserTest, KeyLengthLimitByConfig) {
+    struct ScopedMaxJsonKeyLength {
+        int32_t old_value;
+        explicit ScopedMaxJsonKeyLength(int32_t new_value)
+                : old_value(doris::config::variant_max_json_key_length) {
+            doris::config::variant_max_json_key_length = new_value;
+        }
+        ~ScopedMaxJsonKeyLength() { doris::config::variant_max_json_key_length 
= old_value; }
+    };
+
+    JSONDataParser<SimdJSONParser> parser;
+    ParseConfig config;
+
+    {
+        ScopedMaxJsonKeyLength guard(10);
+        std::string key11(11, 'a');
+
+        std::string obj_json = "{\"" + key11 + "\": 1}";
+        EXPECT_ANY_THROW(parser.parse(obj_json.c_str(), obj_json.size(), 
config));
+
+        config.enable_flatten_nested = false;
+        std::string jsonb_json = "{\"a\": [{\"" + key11 + "\": 1}]}";
+        EXPECT_ANY_THROW(parser.parse(jsonb_json.c_str(), jsonb_json.size(), 
config));
+    }
+
+    {
+        ScopedMaxJsonKeyLength guard(255);
+        std::string key255(255, 'b');
+
+        std::string obj_json = "{\"" + key255 + "\": 1}";
+        auto result = parser.parse(obj_json.c_str(), obj_json.size(), config);
+        ASSERT_TRUE(result.has_value());
+
+        config.enable_flatten_nested = false;
+        std::string jsonb_json = "{\"a\": [{\"" + key255 + "\": 1}]}";
+        result = parser.parse(jsonb_json.c_str(), jsonb_json.size(), config);
+        ASSERT_TRUE(result.has_value());
+        ASSERT_EQ(result->values.size(), 1);
+        EXPECT_EQ(result->values[0].get_type(), 
doris::PrimitiveType::TYPE_JSONB);
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to