Copilot commented on code in PR #63697:
URL: https://github.com/apache/doris/pull/63697#discussion_r3307893276
##########
be/src/vec/json/parse2column.cpp:
##########
@@ -165,32 +165,57 @@ void parse_json_to_variant(IColumn& column, const char*
src, size_t length,
check_paths.insert(check_paths.end(), paths.begin(), paths.end());
THROW_IF_ERROR(vectorized::schema_util::check_variant_has_no_ambiguous_paths(check_paths));
}
- for (size_t i = 0; i < paths.size(); ++i) {
- FieldInfo field_info;
- schema_util::get_field_info(values[i], &field_info);
- if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE) {
- continue;
+
+ auto is_plain_path = [](const PathInData& path) {
+ for (const auto& part : path.get_parts()) {
+ if (part.is_nested || part.anonymous_array_level != 0) {
+ return false;
+ }
}
- if (column_variant.get_subcolumn(paths[i], i) == nullptr) {
- if (paths[i].has_nested_part()) {
- column_variant.add_nested_subcolumn(paths[i], field_info,
old_num_rows);
+ return true;
+ };
+
+ auto get_or_create_subcolumn = [&](const PathInData& path, size_t
index_hint,
+ const FieldInfo& field_info) ->
ColumnVariant::Subcolumn* {
+ auto* subcolumn = column_variant.get_subcolumn(path, index_hint);
+ if (subcolumn == nullptr) {
+ if (path.has_nested_part()) {
+ column_variant.add_nested_subcolumn(path, field_info,
old_num_rows);
} else {
- column_variant.add_sub_column(paths[i], old_num_rows);
+ column_variant.add_sub_column(path, old_num_rows);
}
+ subcolumn = column_variant.get_subcolumn(path, index_hint);
}
- auto* subcolumn = column_variant.get_subcolumn(paths[i], i);
if (!subcolumn) {
throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to
find sub column {}",
- paths[i].get_path());
+ path.get_path());
+ }
+ return subcolumn;
+ };
+
+ auto normalize_plain_path = [&](const PathInData& path) {
+ if (!config.check_duplicate_json_path || path.empty() ||
!is_plain_path(path)) {
+ return path;
+ }
+ return PathInData(path.get_path());
+ };
+
+ for (size_t i = 0; i < paths.size(); ++i) {
+ FieldInfo field_info;
+ schema_util::get_field_info(values[i], &field_info);
+ if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE) {
+ continue;
}
+ auto path = normalize_plain_path(paths[i]);
+ auto* subcolumn = get_or_create_subcolumn(path, i, field_info);
if (subcolumn->cur_num_of_defaults() > 0) {
subcolumn->insert_many_defaults(subcolumn->cur_num_of_defaults());
subcolumn->reset_current_num_of_defaults();
}
if (subcolumn->size() != old_num_rows) {
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
"subcolumn {} size missmatched, may
contains duplicated entry",
Review Comment:
The exception message contains a typo/grammar issue: "missmatched" should be
"mismatched" (and consider "may contain duplicated entry"). This message is
user-facing when duplicate entries occur, so correcting it will make
diagnostics clearer.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]