This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new dca16796ad [fix](ParquetReader) definition level of repeated parent is wrong (#17337) dca16796ad is described below commit dca16796adfac43f5bbcf74df33d212c2bcf8510 Author: Ashin Gau <ashin...@users.noreply.github.com> AuthorDate: Mon Mar 6 18:15:57 2023 +0800 [fix](ParquetReader) definition level of repeated parent is wrong (#17337) Fix three bugs: 1. `repeated_parent_def_level ` should be the definition of its repeated parent. 2. Failed to parse schema like `decimal(p, s)` 3. Fill wrong offsets for array type --- be/src/vec/exec/format/parquet/schema_desc.cpp | 22 +++++++++++----------- .../exec/format/parquet/vparquet_column_reader.cpp | 8 ++++---- .../doris/catalog/HiveMetaStoreClientHelper.java | 7 ++++++- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp b/be/src/vec/exec/format/parquet/schema_desc.cpp index c7ff2353d4..ebd9a25352 100644 --- a/be/src/vec/exec/format/parquet/schema_desc.cpp +++ b/be/src/vec/exec/format/parquet/schema_desc.cpp @@ -55,11 +55,11 @@ static int num_children_node(const tparquet::SchemaElement& schema) { return schema.__isset.num_children ? schema.num_children : 0; } -static void set_child_node_level(FieldSchema* parent, size_t rep_inc = 0, size_t def_inc = 0) { +static void set_child_node_level(FieldSchema* parent, int16_t repeated_parent_def_level) { for (auto& child : parent->children) { - child.repetition_level = parent->repetition_level + rep_inc; - child.definition_level = parent->definition_level + def_inc; - child.repeated_parent_def_level = parent->definition_level; + child.repetition_level = parent->repetition_level; + child.definition_level = parent->definition_level; + child.repeated_parent_def_level = repeated_parent_def_level; } } @@ -129,7 +129,7 @@ Status FieldDescriptor::parse_node_field(const std::vector<tparquet::SchemaEleme node_field->repetition_level++; node_field->definition_level++; node_field->children.resize(1); - set_child_node_level(node_field); + set_child_node_level(node_field, node_field->definition_level); auto child = &node_field->children[0]; parse_physical_field(t_schema, false, child); @@ -315,7 +315,7 @@ Status FieldDescriptor::parse_group_field(const std::vector<tparquet::SchemaElem group_field->repetition_level++; group_field->definition_level++; group_field->children.resize(1); - set_child_node_level(group_field); + set_child_node_level(group_field, group_field->definition_level); auto struct_field = &group_field->children[0]; // the list of struct: // repeated group <name> (LIST) { @@ -379,16 +379,16 @@ Status FieldDescriptor::parse_list_field(const std::vector<tparquet::SchemaEleme // optional field, and the third level element is the nested structure in list // produce nested structure like: LIST<INT>, LIST<MAP>, LIST<LIST<...>> // skip bag/list, it's a repeated element. - set_child_node_level(list_field); + set_child_node_level(list_field, list_field->definition_level); RETURN_IF_ERROR(parse_node_field(t_schemas, curr_pos + 2, list_child)); } else { // required field, produce the list of struct - set_child_node_level(list_field); + set_child_node_level(list_field, list_field->definition_level); RETURN_IF_ERROR(parse_struct_field(t_schemas, curr_pos + 1, list_child)); } } else if (num_children == 0) { // required two level list, for compatibility reason. - set_child_node_level(list_field); + set_child_node_level(list_field, list_field->definition_level); parse_physical_field(second_level, false, list_child); _next_schema_pos = curr_pos + 2; } @@ -450,7 +450,7 @@ Status FieldDescriptor::parse_map_field(const std::vector<tparquet::SchemaElemen map_field->definition_level++; map_field->children.resize(1); - set_child_node_level(map_field); + set_child_node_level(map_field, map_field->repeated_parent_def_level); auto map_kv_field = &map_field->children[0]; // produce MAP<STRUCT<KEY, VALUE>> RETURN_IF_ERROR(parse_struct_field(t_schemas, curr_pos + 1, map_kv_field)); @@ -473,7 +473,7 @@ Status FieldDescriptor::parse_struct_field(const std::vector<tparquet::SchemaEle } auto num_children = struct_schema.num_children; struct_field->children.resize(num_children); - set_child_node_level(struct_field); + set_child_node_level(struct_field, struct_field->repeated_parent_def_level); _next_schema_pos = curr_pos + 1; for (int i = 0; i < num_children; ++i) { RETURN_IF_ERROR(parse_node_field(t_schemas, _next_schema_pos, &struct_field->children[i])); diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index 4e270c30cf..3e94a3082f 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -40,7 +40,7 @@ static void fill_struct_null_map(FieldSchema* field, NullMap& null_map, DCHECK_EQ(num_levels, rep_levels.size()); size_t origin_size = null_map.size(); null_map.resize(origin_size + num_levels); - size_t pos = 0; + size_t pos = origin_size; for (size_t i = 0; i < num_levels; ++i) { // skip the levels affect its ancestor or its descendants if (def_levels[i] < field->repeated_parent_def_level || @@ -53,7 +53,7 @@ static void fill_struct_null_map(FieldSchema* field, NullMap& null_map, null_map[pos++] = 1; } } - null_map.resize(origin_size + pos); + null_map.resize(pos + 1); } static void fill_array_offset(FieldSchema* field, ColumnArray::Offsets64& offsets_data, @@ -88,9 +88,9 @@ static void fill_array_offset(FieldSchema* field, ColumnArray::Offsets64& offset (*null_map_ptr)[offset_pos] = 1; } } - offsets_data.resize(origin_size + offset_pos + 1); + offsets_data.resize(offset_pos + 1); if (null_map_ptr != nullptr) { - null_map_ptr->resize(origin_size + offset_pos + 1); + null_map_ptr->resize(offset_pos + 1); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java index 94370f3129..53b492082f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java @@ -676,13 +676,18 @@ public class HiveMetaStoreClientHelper { */ private static int findNextNestedField(String commaSplitFields) { int numLess = 0; + int numBracket = 0; for (int i = 0; i < commaSplitFields.length(); i++) { char c = commaSplitFields.charAt(i); if (c == '<') { numLess++; } else if (c == '>') { numLess--; - } else if (c == ',' && numLess == 0) { + } else if (c == '(') { + numBracket++; + } else if (c == ')') { + numBracket--; + } else if (c == ',' && numLess == 0 && numBracket == 0) { return i; } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org