This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 2588d2ce11b branch-3.1: [regression-test](Variant) fix and add escaped
chars cases #52657 (#52664)
2588d2ce11b is described below
commit 2588d2ce11b19ece8b7057397819c72805de1995
Author: lihangyu <[email protected]>
AuthorDate: Wed Jul 2 19:24:50 2025 +0800
branch-3.1: [regression-test](Variant) fix and add escaped chars cases
#52657 (#52664)
cherry-pick from #52657
---
be/src/vec/columns/column_object.cpp | 13 ++--
be/src/vec/columns/column_object.h | 3 +-
regression-test/data/variant_p0/escaped_chars.out | Bin 0 -> 1458 bytes
.../data/variant_p0/predefine/delete_update.out | Bin 931 -> 951 bytes
.../suites/variant_p0/escaped_chars.groovy | 71 +++++++++++++++++++++
5 files changed, 77 insertions(+), 10 deletions(-)
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index 79380b413f0..0a83f5cd298 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1313,7 +1313,8 @@ const ColumnObject::Subcolumn*
ColumnObject::get_subcolumn(const PathInData& key
return &node->data;
}
-size_t ColumnObject::Subcolumn::serialize_text_json(size_t n, BufferWritable&
output) const {
+size_t ColumnObject::Subcolumn::serialize_text_json(size_t n, BufferWritable&
output,
+
DataTypeSerDe::FormatOptions opt) const {
if (least_common_type.get_base_type_id() == TypeIndex::Nothing) {
output.write(DataTypeSerDe::NULL_IN_COMPLEX_TYPE.data(),
DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size());
@@ -1328,7 +1329,6 @@ size_t
ColumnObject::Subcolumn::serialize_text_json(size_t n, BufferWritable& ou
}
ind -= num_of_defaults_in_prefix;
- DataTypeSerDe::FormatOptions opt;
for (size_t i = 0; i < data.size(); ++i) {
const auto& part = data[i];
const auto& part_type_serde = data_serdes[i];
@@ -1497,7 +1497,6 @@ void ColumnObject::serialize_one_row_to_string(int row,
std::string* output) con
// TODO preallocate memory
serialize_one_row_to_json_format(row, write_buffer, nullptr);
}
-
write_buffer.commit();
auto str_ref = tmp_col->get_data_at(0);
*output = std::string(str_ref.data, str_ref.size);
@@ -1739,14 +1738,14 @@ void
ColumnObject::serialize_one_row_to_json_format(int64_t row_num, BufferWrita
// Serialize value of current path.
if (auto subcolumn_it = subcolumn_path_map.find(path);
subcolumn_it != subcolumn_path_map.end()) {
- subcolumn_it->second.serialize_text_json(row_num, output);
+ subcolumn_it->second.serialize_text_json(row_num, output,
{.escape_char = '\\'});
} else {
// To serialize value stored in shared data we should first
deserialize it from binary format.
Subcolumn tmp_subcolumn(0, true);
const auto& data = ColumnObject::deserialize_from_sparse_column(
sparse_data_values, index_in_sparse_data_values++);
tmp_subcolumn.insert(data.first, data.second);
- tmp_subcolumn.serialize_text_json(0, output);
+ tmp_subcolumn.serialize_text_json(0, output, {.escape_char =
'\\'});
}
}
@@ -1755,10 +1754,6 @@ void
ColumnObject::serialize_one_row_to_json_format(int64_t row_num, BufferWrita
writeChar('}', output);
}
writeChar('}', output);
-#ifndef NDEBUG
- // check if it is a valid json
-#endif
- return;
}
size_t ColumnObject::Subcolumn::get_non_null_value_size() const {
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index 69e9d08d62c..5551a10659a 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -144,7 +144,8 @@ public:
size_t get_non_null_value_size() const;
- size_t serialize_text_json(size_t n, BufferWritable& output) const;
+ size_t serialize_text_json(size_t n, BufferWritable& output,
+ DataTypeSerDe::FormatOptions opt = {})
const;
const DataTypeSerDeSPtr& get_least_common_type_serde() const {
return least_common_type.get_serde();
diff --git a/regression-test/data/variant_p0/escaped_chars.out
b/regression-test/data/variant_p0/escaped_chars.out
new file mode 100644
index 00000000000..5a8d9f410c5
Binary files /dev/null and b/regression-test/data/variant_p0/escaped_chars.out
differ
diff --git a/regression-test/data/variant_p0/predefine/delete_update.out
b/regression-test/data/variant_p0/predefine/delete_update.out
index 3fab0479cac..2bcfda75ba3 100644
Binary files a/regression-test/data/variant_p0/predefine/delete_update.out and
b/regression-test/data/variant_p0/predefine/delete_update.out differ
diff --git a/regression-test/suites/variant_p0/escaped_chars.groovy
b/regression-test/suites/variant_p0/escaped_chars.groovy
new file mode 100644
index 00000000000..8e5a32f6803
--- /dev/null
+++ b/regression-test/suites/variant_p0/escaped_chars.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("regression_test_variant_escaped_chars", "p0"){
+ def tableName = "variant_escape_chars"
+
+ sql """ DROP TABLE IF EXISTS variant_escape_chars """
+
+ sql """
+ CREATE TABLE IF NOT EXISTS variant_escape_chars (
+ `id` INT,
+ `description` VARIANT
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'This is a test table with escape characters in description'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+
+ sql """
+ INSERT INTO variant_escape_chars VALUES
+ (1, '{"a" : 123, "b" : "test with escape \\\\" characters"}'),
+ (2, '{"a" : 456, "b" : "another test with escape \\\\\\\\
characters"}'),
+ (3, '{"a" : 789, "b" : "test with single quote \\\' characters"}'),
+ (4, '{"a" : 101112, "b" : "test with newline \\\\n characters"}'),
+ (5, '{"a" : 131415, "b" : "test with tab \\\\t characters"}'),
+ (6, '{"a" : 161718, "b" : "test with backslash \\\\b characters"}');
+ """
+
+ // test json value with escaped characters
+ qt_select """ SELECT * FROM variant_escape_chars ORDER BY id """
+ qt_select """ SELECT description['b'] FROM variant_escape_chars ORDER BY
id """
+ qt_select """ SELECT CAST(description['b'] AS TEXT) FROM
variant_escape_chars ORDER BY id """
+
+ sql """
+ drop table if exists t01;
+ create table t01(id int, b json, c json, d variant, e variant)
properties ("replication_num" = "1");
+ insert into t01 values (1, '{"c_json":{"a":"a\\\\nb"}}', '{"c_json":
{"quote":"\\\\"Helvetica tofu try-hard gluten-free gentrify leggings.\\\\" -
Remington Trantow"}}', '{"c_json": {"quote":"\\\\"Helvetica tofu try-hard
gluten-free gentrify leggings.\\\\" - Remington Trantow"}}',
'{"c_json":{"a":"a\\\\nb"}}');
+ """
+ qt_select """ SELECT * FROM t01 """
+ qt_select """select json_extract(b, "\$.c_json"), e["c_json"] from t01;"""
+
+ // test json keys with escaped characters, FIXED in 3.1.0
+ sql "truncate table variant_escape_chars"
+ sql """
+ INSERT INTO variant_escape_chars VALUES
+ (1, '{"test with escape \\\\" characters" : 123}'),
+ (2, '{"another test with escape \\\\\\\\ characters" : 123}'),
+ (3, '{"test with single quote \\\' characters" : 123}'),
+ (4, '{"test with newline \\\\n characters":123}'),
+ (5, '{"test with tab \\\\t characters" : 123}'),
+ (6, '{"test with backslash \\\\b characters" : 123}');
+ """
+ qt_select """ SELECT * FROM variant_escape_chars ORDER BY id """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]