This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new e21f0177c11 branch-4.0: [Improve](Config) add
variant_max_json_key_length to limit variant json key length #60187 (#60219)
e21f0177c11 is described below
commit e21f0177c11349be23bd59ca816e6d9d340dddc2
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Jan 28 18:31:44 2026 +0800
branch-4.0: [Improve](Config) add variant_max_json_key_length to limit
variant json key length #60187 (#60219)
Cherry-picked from #60187
Co-authored-by: lihangyu <[email protected]>
---
be/src/common/config.cpp | 4 +++
be/src/common/config.h | 2 ++
be/src/vec/json/json_parser.cpp | 18 +++++++++-----
be/test/vec/jsonb/json_parser_test.cpp | 45 +++++++++++++++++++++++++++++++++-
4 files changed, 62 insertions(+), 7 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index b86b60ed14e..1c8003606cb 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1096,9 +1096,13 @@ DEFINE_mInt64(workload_group_scan_task_wait_timeout_ms,
"10000");
// Whether use schema dict in backend side instead of MetaService side(cloud
mode)
DEFINE_mBool(variant_use_cloud_schema_dict_cache, "true");
DEFINE_mInt64(variant_threshold_rows_to_estimate_sparse_column, "2048");
+DEFINE_mInt32(variant_max_json_key_length, "255");
DEFINE_mBool(variant_throw_exeception_on_invalid_json, "false");
DEFINE_mBool(enable_vertical_compact_variant_subcolumns, "true");
+DEFINE_Validator(variant_max_json_key_length,
+ [](const int config) -> bool { return config > 0 && config <=
65535; });
+
// block file cache
DEFINE_Bool(enable_file_cache, "false");
// format:
[{"path":"/path/to/file_cache","total_size":21474836480,"query_limit":10737418240}]
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 47623a2b1a4..7ef737f2de6 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1335,6 +1335,8 @@ DECLARE_mBool(variant_use_cloud_schema_dict_cache);
// Threshold to estimate a column is sparsed
// Notice: TEST ONLY
DECLARE_mInt64(variant_threshold_rows_to_estimate_sparse_column);
+// Max json key length in bytes when parsing json into variant
subcolumns/jsonb.
+DECLARE_mInt32(variant_max_json_key_length);
// Treat invalid json format str as string, instead of throwing exception if
false
DECLARE_mBool(variant_throw_exeception_on_invalid_json);
// Enable vertical compact subcolumns of variant column
diff --git a/be/src/vec/json/json_parser.cpp b/be/src/vec/json/json_parser.cpp
index e4ad2bfe7c5..eb4d6c5e2b5 100644
--- a/be/src/vec/json/json_parser.cpp
+++ b/be/src/vec/json/json_parser.cpp
@@ -93,9 +93,12 @@ void JSONDataParser<ParserImpl>::traverseObject(const
JSONObject& object, ParseC
ctx.values.reserve(ctx.values.size() + object.size());
for (auto it = object.begin(); it != object.end(); ++it) {
const auto& [key, value] = *it;
- if (key.size() >= std::numeric_limits<uint8_t>::max()) {
- throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
- "Key length exceeds maximum allowed size of
255 bytes.");
+ const size_t max_key_length =
cast_set<size_t>(config::variant_max_json_key_length);
+ if (key.size() > max_key_length) {
+ throw doris::Exception(
+ doris::ErrorCode::INVALID_ARGUMENT,
+ fmt::format("Key length exceeds maximum allowed size of {}
bytes.",
+ max_key_length));
}
ctx.builder.append(key, false);
traverse(value, ctx);
@@ -133,9 +136,12 @@ void
JSONDataParser<ParserImpl>::traverseObjectAsJsonb(const JSONObject& object,
writer.writeStartObject();
for (auto it = object.begin(); it != object.end(); ++it) {
const auto& [key, value] = *it;
- if (key.size() >= std::numeric_limits<uint8_t>::max()) {
- throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
- "Key length exceeds maximum allowed size of
255 bytes.");
+ const size_t max_key_length =
cast_set<size_t>(config::variant_max_json_key_length);
+ if (key.size() > max_key_length) {
+ throw doris::Exception(
+ doris::ErrorCode::INVALID_ARGUMENT,
+ fmt::format("Key length exceeds maximum allowed size of {}
bytes.",
+ max_key_length));
}
writer.writeKey(key.data(), cast_set<uint8_t>(key.size()));
traverseAsJsonb(value, writer);
diff --git a/be/test/vec/jsonb/json_parser_test.cpp
b/be/test/vec/jsonb/json_parser_test.cpp
index b878cc165f9..e4790f6786c 100644
--- a/be/test/vec/jsonb/json_parser_test.cpp
+++ b/be/test/vec/jsonb/json_parser_test.cpp
@@ -21,6 +21,7 @@
#include <vector>
+#include "common/config.h"
#include "vec/common/string_ref.h"
using doris::vectorized::JSONDataParser;
@@ -430,4 +431,46 @@ TEST(JsonParserTest, ParseUInt64) {
EXPECT_EQ(array_field_2.size(), 1);
EXPECT_EQ(array_field_2[0].get_type(),
doris::PrimitiveType::TYPE_LARGEINT);
EXPECT_EQ(array_field_2[0].get<doris::PrimitiveType::TYPE_LARGEINT>(),
18446744073709551615ULL);
-}
\ No newline at end of file
+}
+
+TEST(JsonParserTest, KeyLengthLimitByConfig) {
+ struct ScopedMaxJsonKeyLength {
+ int32_t old_value;
+ explicit ScopedMaxJsonKeyLength(int32_t new_value)
+ : old_value(doris::config::variant_max_json_key_length) {
+ doris::config::variant_max_json_key_length = new_value;
+ }
+ ~ScopedMaxJsonKeyLength() { doris::config::variant_max_json_key_length
= old_value; }
+ };
+
+ JSONDataParser<SimdJSONParser> parser;
+ ParseConfig config;
+
+ {
+ ScopedMaxJsonKeyLength guard(10);
+ std::string key11(11, 'a');
+
+ std::string obj_json = "{\"" + key11 + "\": 1}";
+ EXPECT_ANY_THROW(parser.parse(obj_json.c_str(), obj_json.size(),
config));
+
+ config.enable_flatten_nested = false;
+ std::string jsonb_json = "{\"a\": [{\"" + key11 + "\": 1}]}";
+ EXPECT_ANY_THROW(parser.parse(jsonb_json.c_str(), jsonb_json.size(),
config));
+ }
+
+ {
+ ScopedMaxJsonKeyLength guard(255);
+ std::string key255(255, 'b');
+
+ std::string obj_json = "{\"" + key255 + "\": 1}";
+ auto result = parser.parse(obj_json.c_str(), obj_json.size(), config);
+ ASSERT_TRUE(result.has_value());
+
+ config.enable_flatten_nested = false;
+ std::string jsonb_json = "{\"a\": [{\"" + key255 + "\": 1}]}";
+ result = parser.parse(jsonb_json.c_str(), jsonb_json.size(), config);
+ ASSERT_TRUE(result.has_value());
+ ASSERT_EQ(result->values.size(), 1);
+ EXPECT_EQ(result->values[0].get_type(),
doris::PrimitiveType::TYPE_JSONB);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]