xiaokang commented on code in PR #16444: URL: https://github.com/apache/doris/pull/16444#discussion_r1098219931
########## gensrc/thrift/Exprs.thrift: ########## @@ -44,6 +44,7 @@ enum TExprNodeType { INFO_FUNC, FUNCTION_CALL, ARRAY_LITERAL, + STRUCT_LITERAL, Review Comment: should we move it to the end to keep maximum compatability? ########## fe/fe-core/src/main/jflex/sql_scanner.flex: ########## @@ -666,6 +670,8 @@ EndOfLineComment = "--" !({HintContent}|{ContainsLineTerminator}) {LineTerminato "!" { return newToken(SqlParserSymbols.NOT, null); } "<" { return newToken(SqlParserSymbols.LESSTHAN, null); } ">" { return newToken(SqlParserSymbols.GREATERTHAN, null); } +"{" { return newToken(SqlParserSymbols.LBRACE, null); } Review Comment: duplicated with line 658,659 ########## README.md: ########## @@ -231,3 +231,4 @@ some Doris features to be complied with Apache 2.0 License. For details, refer t Review Comment: useless blank lines? ########## fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java: ########## @@ -1354,7 +1354,7 @@ public void checkReturnsBool(String name, boolean printExpr) throws AnalysisExce } public Expr checkTypeCompatibility(Type targetType) throws AnalysisException { - if (targetType.getPrimitiveType() != PrimitiveType.ARRAY + if (targetType.getPrimitiveType() != PrimitiveType.ARRAY && targetType.getPrimitiveType() != PrimitiveType.MAP Review Comment: do we need to check Struct ########## fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java: ########## @@ -319,9 +319,22 @@ public void analyze() throws AnalysisException { type, Function.NullableMode.ALWAYS_NULLABLE, Lists.newArrayList(Type.VARCHAR), false, "doris::CastFunctions::cast_to_array_val", null, null, true); + } else if (type.isMapType()) { + fn = ScalarFunction.createBuiltin(getFnName(Type.MAP), + type, Function.NullableMode.ALWAYS_NULLABLE, + Lists.newArrayList(Type.VARCHAR), false, + "doris::CastFunctions::cast_to_map_val", null, null, true); + } else if (type.isStructType()) { + fn = ScalarFunction.createBuiltin(getFnName(Type.STRUCT), + type, Function.NullableMode.ALWAYS_NULLABLE, + Lists.newArrayList(Type.VARCHAR), false, + "doris::CastFunctions::cast_to_struct_val", null, null, true); } if (fn == null) { + if (type.isStructType() && childType.isStringType()) { Review Comment: do we need to check array and map type? ########## be/src/olap/tablet_meta.cpp: ########## @@ -297,11 +297,21 @@ void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tco if (tcolumn.__isset.is_bloom_filter_column) { column->set_is_bf_column(tcolumn.is_bloom_filter_column); } - - if (tcolumn.column_type.type == TPrimitiveType::ARRAY) { + if (tcolumn.column_type.type == TPrimitiveType::STRUCT) { + for (size_t i = 0; i < tcolumn.children_column.size(); i++) { + ColumnPB* children_column = column->add_children_columns(); + init_column_from_tcolumn(i, tcolumn.children_column[i], children_column); + } + } else if (tcolumn.column_type.type == TPrimitiveType::ARRAY) { ColumnPB* children_column = column->add_children_columns(); init_column_from_tcolumn(0, tcolumn.children_column[0], children_column); } + if (tcolumn.column_type.type == TPrimitiveType::MAP) { Review Comment: else if ########## be/src/olap/types.h: ########## @@ -322,6 +324,240 @@ class ArrayTypeInfo : public TypeInfo { TypeInfoPtr _item_type_info; const size_t _item_size; }; +///====================== MapType Info ==========================/// +class MapTypeInfo : public TypeInfo { Review Comment: need to refine refer to Array and Struct ########## be/src/runtime/types.cpp: ########## @@ -78,15 +93,17 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx) // ++(*idx); // children.push_back(TypeDescriptor(types, idx)); // break; - // case TTypeNodeType::MAP: - // DCHECK(!node.__isset.scalar_type); - // DCHECK_LT(*idx, types.size() - 2); - // type = TYPE_MAP; - // ++(*idx); - // children.push_back(TypeDescriptor(types, idx)); - // ++(*idx); - // children.push_back(TypeDescriptor(types, idx)); - // break; + case TTypeNodeType::MAP: { + DCHECK(!node.__isset.scalar_type); + DCHECK_LT(*idx, types.size() - 2); + DCHECK(!node.__isset.contains_null); Review Comment: TODO, handle contains_null[0] for key and 1 for value ########## be/src/vec/core/field.h: ########## @@ -334,6 +336,8 @@ class Field { return "Array"; case Tuple: Review Comment: Do we need to define Struct in Field::Types or just use Tuple? ########## be/src/olap/rowset/segment_v2/column_writer.cpp: ########## @@ -173,6 +245,79 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* *writer = std::move(writer_local); return Status::OK(); } + case FieldType::OLAP_FIELD_TYPE_MAP: { + DCHECK(column->get_subtype_count() == 2); + ScalarColumnWriter* null_writer = nullptr; + // create null writer + if (opts.meta->is_nullable()) { + FieldType null_type = FieldType::OLAP_FIELD_TYPE_TINYINT; + ColumnWriterOptions null_options; + null_options.meta = opts.meta->add_children_columns(); + null_options.meta->set_column_id(3); + null_options.meta->set_unique_id(3); + null_options.meta->set_type(null_type); + null_options.meta->set_is_nullable(false); + null_options.meta->set_length( + get_scalar_type_info<OLAP_FIELD_TYPE_TINYINT>()->size()); + null_options.meta->set_encoding(DEFAULT_ENCODING); + null_options.meta->set_compression(opts.meta->compression()); + + null_options.need_zone_map = false; + null_options.need_bloom_filter = false; + null_options.need_bitmap_index = false; + + TabletColumn null_column = + TabletColumn(OLAP_FIELD_AGGREGATION_NONE, null_type, false, + null_options.meta->unique_id(), null_options.meta->length()); + null_column.set_name("nullable"); + null_column.set_index_length(-1); // no short key index + std::unique_ptr<Field> null_field(FieldFactory::create(null_column)); + null_writer = + new ScalarColumnWriter(null_options, std::move(null_field), file_writer); + } + + // create key & value writer + std::vector<std::unique_ptr<ColumnWriter>> inner_writer_list; + for (int i = 0; i < 2; ++i) { + std::unique_ptr<ColumnWriter> inner_array_writer; + ColumnWriterOptions arr_opts; + TabletColumn array_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); + + array_column.set_index_length(-1); + arr_opts.meta = opts.meta->mutable_children_columns(i); + ColumnMetaPB* child_meta = arr_opts.meta->add_children_columns(); + // inner column meta from actual opts meta + const TabletColumn& inner_column = + column->get_sub_column(i); // field_type is true key and value + array_column.add_sub_column(const_cast<TabletColumn&>(inner_column)); + array_column.set_name("map.arr"); Review Comment: map.keys map.vals ########## fe/fe-core/src/main/cup/sql_parser.cup: ########## @@ -5948,22 +5951,56 @@ array_expr ::= :} ; +kv_list ::= + expr:k COLON expr:v + {: + ArrayList<Expr> list = new ArrayList<Expr>(); + list.add(k); + list.add(v); + RESULT = list ; + :} + |kv_list:list COMMA expr:k COLON expr:v + {: + list.add(k); + list.add(v); + RESULT = list; + :} + ; + +map_literal ::= + LBRACE RBRACE + {: + RESULT = new MapLiteral(); + :} + | LBRACE kv_list:list RBRACE + {: + RESULT = new MapLiteral(list.toArray(new LiteralExpr[0])); + :} + ; + struct_field ::= ident:name COLON type:type {: RESULT = new StructField(name, type); :} ; struct_field_list ::= - struct_field:field - {: - RESULT = Lists.newArrayList(field); - :} - | struct_field_list:fields COMMA struct_field:field - {: - fields.add(field); - RESULT = fields; - :} - ; + struct_field:field + {: + RESULT = Lists.newArrayList(field); + :} + | struct_field_list:fields COMMA struct_field:field + {: + fields.add(field); + RESULT = fields; + :} + ; + +struct_literal ::= + LBRACE expr_list:list RBRACE Review Comment: User may get confused if map literal and struct literal both use brace{}. ########## be/src/vec/exprs/vexpr.cpp: ########## @@ -255,24 +265,6 @@ Status VExpr::open(const std::vector<VExprContext*>& ctxs, RuntimeState* state) return Status::OK(); } -Status VExpr::clone_if_not_exists(const std::vector<VExprContext*>& ctxs, RuntimeState* state, Review Comment: do we need to move this function? ########## fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java: ########## @@ -88,6 +119,16 @@ protected String prettyPrint(int lpad) { return String.format("%sMAP<%s,%s>", leftPadding, keyType.toSql(), structStr); } + public static boolean canCastTo(MapType type, MapType targetType) { + return Type.canCastTo(type.getKeyType(), targetType.getKeyType()) + && Type.canCastTo(type.getValueType(), targetType.getValueType()); + } + + @Override + public boolean supportSubType(Type subType) { + return true; Review Comment: should limit key type to scalar type ########## fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java: ########## @@ -311,6 +311,10 @@ public void analyze(boolean isOlap) throws AnalysisException { } if (type.getPrimitiveType() == PrimitiveType.STRUCT) { + if (isKey()) { Review Comment: add isKey() check for MAP ########## fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java: ########## @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.catalog.StructField; +import org.apache.doris.catalog.StructType; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.thrift.TExprNode; +import org.apache.doris.thrift.TExprNodeType; + +import org.apache.commons.lang.StringUtils; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class StructLiteral extends LiteralExpr { + // only for persist + public StructLiteral() { + type = new StructType(); + children = new ArrayList<>(); + } + + public StructLiteral(LiteralExpr... exprs) throws AnalysisException { + type = new StructType(); + children = new ArrayList<>(); + for (LiteralExpr expr : exprs) { + if (!type.supportSubType(expr.getType())) { + throw new AnalysisException("Invalid element type in STRUCT."); + } + ((StructType) type).addField(new StructField(expr.getType())); + children.add(expr); + } + } + + protected StructLiteral(StructLiteral other) { + super(other); + } + + @Override + protected String toSqlImpl() { + List<String> list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(v.toSqlImpl())); + return "STRUCT(" + StringUtils.join(list, ", ") + ")"; + } + + @Override + public String toDigestImpl() { + List<String> list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(v.toDigestImpl())); + return "STRUCT(" + StringUtils.join(list, ", ") + ")"; + } + + @Override + public String getStringValue() { + List<String> list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(v.getStringValue())); + return "{" + StringUtils.join(list, ", ") + "}"; Review Comment: suggest using '(' ')' to keep consistency. ########## fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java: ########## @@ -87,6 +87,7 @@ public class Util { TYPE_STRING_MAP.put(PrimitiveType.BITMAP, "bitmap"); TYPE_STRING_MAP.put(PrimitiveType.QUANTILE_STATE, "quantile_state"); TYPE_STRING_MAP.put(PrimitiveType.ARRAY, "Array<%s>"); + TYPE_STRING_MAP.put(PrimitiveType.MAP, "Map<%s,%s>"); Review Comment: struct ########## fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java: ########## @@ -1275,10 +1275,9 @@ public static boolean isCastMatchAllowed(Function desc, Function candicate) { final Type[] candicateArgTypes = candicate.getArgs(); if (!(descArgTypes[0] instanceof ScalarType) || !(candicateArgTypes[0] instanceof ScalarType)) { - if (candicateArgTypes[0] instanceof ArrayType) { + if (candicateArgTypes[0] instanceof ArrayType || candicateArgTypes[0] instanceof MapType) { Review Comment: do we need to check struct type ########## be/src/olap/tablet_schema.cpp: ########## @@ -311,8 +311,14 @@ uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3 return string_length + sizeof(OLAP_STRING_MAX_LENGTH); case TPrimitiveType::JSONB: return string_length + sizeof(OLAP_JSONB_MAX_LENGTH); + case TPrimitiveType::STRUCT: + // Note that(xy): this is the length of struct type itself, + // the length of its subtypes are not included. + return OLAP_STRUCT_MAX_LENGTH; case TPrimitiveType::ARRAY: return OLAP_ARRAY_MAX_LENGTH; + case TPrimitiveType::MAP: + return OLAP_ARRAY_MAX_LENGTH; Review Comment: OLAP_MAP_MAX_LENGTH ########## be/src/runtime/primitive_type.h: ########## @@ -54,6 +54,7 @@ constexpr bool is_enumeration_type(PrimitiveType type) { case TYPE_DECIMAL128I: case TYPE_BOOLEAN: case TYPE_ARRAY: + case TYPE_STRUCT: Review Comment: add TYPE_MAP ########## be/src/runtime/struct_value.h: ########## @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <type_traits> + +#include "runtime/primitive_type.h" + +namespace doris { + +class StructValue { +public: + StructValue() = default; + + explicit StructValue(uint32_t size) : _values(nullptr), _size(size), _has_null(false) {} + StructValue(void** values, uint32_t size) : _values(values), _size(size), _has_null(false) {} + StructValue(void** values, uint32_t size, bool has_null) + : _values(values), _size(size), _has_null(has_null) {} + + //void to_struct_val(StructVal* val) const; + //static StructValue from_struct_val(const StructVal& val); + + uint32_t size() const { return _size; } + void set_size(uint32_t size) { _size = size; } + bool has_null() const { return _has_null; } + void set_has_null(bool has_null) { _has_null = has_null; } + bool is_null_at(uint32_t index) const { + return this->_has_null && this->_values[index] == nullptr; + } + + void shallow_copy(const StructValue* other); + + // size_t get_byte_size(const TypeDescriptor& type) const; + + const void** values() const { return const_cast<const void**>(_values); } + void** mutable_values() { return _values; } + void set_values(void** values) { _values = values; } + const void* child_value(uint32_t index) const { return _values[index]; } + void* mutable_child_value(uint32_t index) { return _values[index]; } + void set_child_value(void* value, uint32_t index) { _values[index] = value; } + +private: + // pointer to the start of the vector of children pointers. These pointers are + // point to children values where a null pointer means that this child is NULL. + void** _values; + // the number of values in this struct value. + uint32_t _size; + // child has no null value if has_null is false. + // child may has null value if has_null is true. + bool _has_null; Review Comment: should _has_null be an array as it in StructTypeInfo? ########## be/src/olap/page_cache.cpp: ########## @@ -65,7 +65,6 @@ bool StoragePageCache::lookup(const CacheKey& key, PageCacheHandle* handle, void StoragePageCache::insert(const CacheKey& key, const Slice& data, PageCacheHandle* handle, segment_v2::PageTypePB page_type, bool in_memory) { auto deleter = [](const doris::CacheKey& key, void* value) { delete[] (uint8_t*)value; }; - Review Comment: useless delete ########## be/src/olap/field.h: ########## @@ -256,6 +260,40 @@ class Field { int32_t _unique_id; }; +class MapField : public Field { +public: + explicit MapField(const TabletColumn& column) : Field(column) {} + + // make variable_ptr memory allocate to cell_ptr as MapValue + char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { + return variable_ptr + _length; Review Comment: should do some real work like Struct and Array. ########## be/src/runtime/types.cpp: ########## @@ -98,15 +115,17 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const { if (is_complex_type()) { if (type == TYPE_ARRAY) { node.type = TTypeNodeType::ARRAY; + node.contains_null = contains_nulls[0]; } else if (type == TYPE_MAP) { node.type = TTypeNodeType::MAP; Review Comment: need to process children for map ########## be/src/vec/data_types/data_type_factory.hpp: ########## @@ -95,7 +97,10 @@ class DataTypeFactory { }); Review Comment: register Map to _invert_data_type_map like Array ########## be/src/vec/data_types/data_type_factory.cpp: ########## @@ -368,6 +444,12 @@ DataTypePtr DataTypeFactory::create_data_type(const arrow::DataType* type, bool nested = std::make_shared<vectorized::DataTypeArray>( create_data_type(type->field(0)->type().get(), true)); break; + case ::arrow::Type::MAP: Review Comment: add processing for STRUCT ########## be/src/vec/data_types/data_type_struct.cpp: ########## @@ -0,0 +1,480 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeTuple.cpp +// and modified by Doris + +#include "vec/data_types/data_type_struct.h" + +namespace doris::vectorized { + +DataTypeStruct::DataTypeStruct(const DataTypes& elems_) + : elems(elems_), have_explicit_names(false) { + /// Automatically assigned names in form of '1', '2', ... + size_t size = elems.size(); + names.resize(size); + for (size_t i = 0; i < size; ++i) { + names[i] = std::to_string(i + 1); + } +} + +static Status check_tuple_names(const Strings& names) { + std::unordered_set<String> names_set; + for (const auto& name : names) { + if (name.empty()) { + return Status::InvalidArgument("Names of tuple elements cannot be empty"); + } + + if (!names_set.insert(name).second) { + return Status::InvalidArgument("Names of tuple elements must be unique"); + } + } + + return {}; +} + +DataTypeStruct::DataTypeStruct(const DataTypes& elems_, const Strings& names_) + : elems(elems_), names(names_), have_explicit_names(true) { + size_t size = elems.size(); + if (names.size() != size) { + LOG(FATAL) << "Wrong number of names passed to constructor of DataTypeStruct"; + } + + Status st = check_tuple_names(names); + //if (!st.ok()) { + //} +} + +std::string DataTypeStruct::do_get_name() const { + size_t size = elems.size(); + std::stringstream s; + + s << "Struct("; + for (size_t i = 0; i < size; ++i) { + if (i != 0) { + s << ", "; + } + + // if (have_explicit_names) { + // s << back_quote_if_need(names[i]) << ' '; + // } + + s << elems[i]->get_name(); + } + s << ")"; + + return s.str(); +} + +Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const { + DCHECK(!rb.eof()); + auto* struct_column = assert_cast<ColumnStruct*>(column); + + if (*rb.position() != '{') { + return Status::InvalidArgument("Struct does not start with '{' character, found '{}'", + *rb.position()); + } + if (rb.count() < 2 || *(rb.end() - 1) != '}') { + return Status::InvalidArgument("Struct does not end with '}' character, found '{}'", + *(rb.end() - 1)); + } + + // here need handle the empty struct '{}' + if (rb.count() == 2) { + return Status::OK(); + } + + ++rb.position(); + std::vector<ReadBuffer> field_rbs; + field_rbs.reserve(elems.size()); + + // here get the value "jack" and 20 from {"name":"jack","age":20} + while (!rb.eof()) { + size_t field_len = 0; + auto start = rb.position(); + while (!rb.eof() && *start != ',' && *start != '}') { + field_len++; + start++; + } + if (field_len >= rb.count()) { + return Status::InvalidArgument("Invalid Length"); + } + ReadBuffer field_rb(rb.position(), field_len); + + size_t len = 0; + auto start_rb = field_rb.position(); + while (!field_rb.eof() && *start_rb != ':') { + len++; + start_rb++; + } + ReadBuffer field(field_rb.position() + len + 1, field_rb.count() - len - 1); + + if (field.count() >= 2 && ((*field.position() == '"' && *(field.end() - 1) == '"') || + (*field.position() == '\'' && *(field.end() - 1) == '\''))) { + ReadBuffer field_no_quote(field.position() + 1, field.count() - 2); + field_rbs.push_back(field_no_quote); + } else { + field_rbs.push_back(field); + } + + rb.position() += field_len + 1; + } + + for (size_t idx = 0; idx < elems.size(); idx++) { + elems[idx]->from_string(field_rbs[idx], &struct_column->get_column(idx)); + } + + return Status::OK(); +} + +std::string DataTypeStruct::to_string(const IColumn& column, size_t row_num) const { + auto ptr = column.convert_to_full_column_if_const(); + auto& struct_column = assert_cast<const ColumnStruct&>(*ptr.get()); + + std::stringstream ss; + ss << "<"; Review Comment: using consistent brace? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org