This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 7229751bd9 [Improve](map-type) Add contains_null for map (#16948) 7229751bd9 is described below commit 7229751bd9c55b0325b384a1508ba6cbd2b4c258 Author: amory <wangqian...@selectdb.com> AuthorDate: Thu Feb 23 20:47:26 2023 +0800 [Improve](map-type) Add contains_null for map (#16948) Add contains_null for map type. --- be/src/olap/types.h | 36 +++++++++++-- be/src/runtime/types.cpp | 34 +++++------- be/src/vec/data_types/data_type_factory.cpp | 6 +-- be/src/vec/exprs/vmap_literal.cpp | 20 +++---- be/src/vec/functions/function_cast.h | 1 + be/test/vec/exec/parquet/parquet_thrift_test.cpp | 5 +- .../java/org/apache/doris/catalog/ArrayType.java | 3 +- .../java/org/apache/doris/catalog/MapType.java | 21 ++++++++ .../main/java/org/apache/doris/catalog/Type.java | 3 +- .../java/org/apache/doris/analysis/CastExpr.java | 2 +- .../java/org/apache/doris/analysis/MapLiteral.java | 61 +++++++++++----------- .../main/java/org/apache/doris/catalog/Column.java | 2 + gensrc/thrift/Types.thrift | 2 +- 13 files changed, 124 insertions(+), 72 deletions(-) diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 58ec1eb469..b26a89ba6f 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -339,7 +339,18 @@ public: } else if (l_size > r_size) { return 1; } else { - return 0; + // now we use collection value in array to pack map k-v + auto l_k = reinterpret_cast<const CollectionValue*>(l_value->key_data()); + auto l_v = reinterpret_cast<const CollectionValue*>(l_value->value_data()); + auto r_k = reinterpret_cast<const CollectionValue*>(r_value->key_data()); + auto r_v = reinterpret_cast<const CollectionValue*>(r_value->value_data()); + auto key_arr = new ArrayTypeInfo(create_static_type_info_ptr(_key_type_info.get())); + auto val_arr = new ArrayTypeInfo(create_static_type_info_ptr(_value_type_info.get())); + if (int kc = key_arr->cmp(l_k, r_k) != 0) { + return kc; + } else { + return val_arr->cmp(l_v, r_v); + } } } @@ -356,7 +367,27 @@ public: return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(); } - std::string to_string(const void* src) const override { return "{}"; } + std::string to_string(const void* src) const override { + auto src_ = reinterpret_cast<const MapValue*>(src); + auto src_key = reinterpret_cast<const CollectionValue*>(src_->key_data()); + auto src_val = reinterpret_cast<const CollectionValue*>(src_->value_data()); + size_t key_slot_size = _key_type_info->size(); + size_t val_slot_size = _value_type_info->size(); + std::string result = "{"; + + for (size_t i = 0; i < src_key->length(); ++i) { + std::string k_s = + _key_type_info->to_string((uint8_t*)(src_key->data()) + key_slot_size); + std::string v_s = + _key_type_info->to_string((uint8_t*)(src_val->data()) + val_slot_size); + result += k_s + ":" + v_s; + if (i != src_key->length() - 1) { + result += ", "; + } + } + result += "}"; + return result; + } void set_to_max(void* buf) const override { DCHECK(false) << "set_to_max of list is not implemented."; @@ -366,7 +397,6 @@ public: DCHECK(false) << "set_to_min of list is not implemented."; } - // todo . is here only to need return 16 for two ptr? size_t size() const override { return sizeof(MapValue); } FieldType type() const override { return OLAP_FIELD_TYPE_MAP; } diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index e6366a1681..fca1c86954 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -55,9 +55,10 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx) case TTypeNodeType::ARRAY: { DCHECK(!node.__isset.scalar_type); DCHECK_LT(*idx, types.size() - 1); + DCHECK_EQ(node.contains_nulls.size(), 1); type = TYPE_ARRAY; contains_nulls.reserve(1); - contains_nulls.push_back(node.contains_null); + contains_nulls.push_back(node.contains_nulls[0]); ++(*idx); children.push_back(TypeDescriptor(types, idx)); break; @@ -65,7 +66,7 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx) case TTypeNodeType::STRUCT: { DCHECK(!node.__isset.scalar_type); DCHECK_LT(*idx, types.size() - 1); - DCHECK(!node.__isset.contains_null); + DCHECK(!node.__isset.contains_nulls); DCHECK(node.__isset.struct_fields); DCHECK_GE(node.struct_fields.size(), 1); type = TYPE_STRUCT; @@ -85,31 +86,19 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx) type = TYPE_VARIANT; break; } - // case TTypeNodeType::STRUCT: - // type = TYPE_STRUCT; - // for (int i = 0; i < node.struct_fields.size(); ++i) { - // ++(*idx); - // children.push_back(TypeDescriptor(types, idx)); - // field_names.push_back(node.struct_fields[i].name); - // } - // break; - // case TTypeNodeType::ARRAY: - // DCHECK(!node.__isset.scalar_type); - // DCHECK_LT(*idx, types.size() - 1); - // type = TYPE_ARRAY; - // ++(*idx); - // children.push_back(TypeDescriptor(types, idx)); - // break; case TTypeNodeType::MAP: { //TODO(xy): handle contains_null[0] for key and [1] for value DCHECK(!node.__isset.scalar_type); DCHECK_LT(*idx, types.size() - 2); - DCHECK(!node.__isset.contains_null); + DCHECK_EQ(node.contains_nulls.size(), 2); + contains_nulls.reserve(2); type = TYPE_MAP; ++(*idx); children.push_back(TypeDescriptor(types, idx)); + contains_nulls.push_back(node.contains_nulls[0]); ++(*idx); children.push_back(TypeDescriptor(types, idx)); + contains_nulls.push_back(node.contains_nulls[1]); break; } default: @@ -122,11 +111,16 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const { TTypeNode& node = thrift_type->types.back(); if (is_complex_type()) { if (type == TYPE_ARRAY) { + DCHECK_EQ(contains_nulls.size(), 1); node.type = TTypeNodeType::ARRAY; - node.contains_null = contains_nulls[0]; + node.contains_nulls.reserve(1); + node.contains_nulls.push_back(contains_nulls[0]); } else if (type == TYPE_MAP) { - //TODO(xy): need to process children for map + DCHECK_EQ(contains_nulls.size(), 2); node.type = TTypeNodeType::MAP; + node.contains_nulls.reserve(2); + node.contains_nulls.push_back(contains_nulls[0]); + node.contains_nulls.push_back(contains_nulls[1]); } else if (type == TYPE_VARIANT) { node.type = TTypeNodeType::VARIANT; } else { diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 91fc51187b..aaf9a395f1 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -169,10 +169,10 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo break; case TYPE_MAP: DCHECK(col_desc.children.size() == 2); - // todo. (Amory) Support Map contains_nulls in FE MapType.java Later PR + DCHECK_EQ(col_desc.contains_nulls.size(), 2); nested = std::make_shared<vectorized::DataTypeMap>( - create_data_type(col_desc.children[0], true), - create_data_type(col_desc.children[1], true)); + create_data_type(col_desc.children[0], col_desc.contains_nulls[0]), + create_data_type(col_desc.children[1], col_desc.contains_nulls[1])); break; case TYPE_STRUCT: { DCHECK(col_desc.children.size() >= 1); diff --git a/be/src/vec/exprs/vmap_literal.cpp b/be/src/vec/exprs/vmap_literal.cpp index 954142f04d..60415dda69 100644 --- a/be/src/vec/exprs/vmap_literal.cpp +++ b/be/src/vec/exprs/vmap_literal.cpp @@ -30,17 +30,17 @@ Status VMapLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, Field keys = Array(); Field values = Array(); // each child is slot with key1, value1, key2, value2... - for (int idx = 0; idx < _children.size(); ++idx) { - Field item; - ColumnPtrWrapper* const_col_wrapper = nullptr; - RETURN_IF_ERROR(_children[idx]->get_const_col(context, &const_col_wrapper)); - const_col_wrapper->column_ptr->get(0, item); + for (int idx = 0; idx < _children.size() && idx + 1 < _children.size(); idx += 2) { + Field kf, vf; + ColumnPtrWrapper* const_key_col_wrapper = nullptr; + ColumnPtrWrapper* const_val_col_wrapper = nullptr; + RETURN_IF_ERROR(_children[idx]->get_const_col(context, &const_key_col_wrapper)); + RETURN_IF_ERROR(_children[idx + 1]->get_const_col(context, &const_val_col_wrapper)); + const_key_col_wrapper->column_ptr->get(0, kf); + const_val_col_wrapper->column_ptr->get(0, vf); - if ((idx & 1) == 0) { - keys.get<Array>().push_back(item); - } else { - values.get<Array>().push_back(item); - } + keys.get<Array>().push_back(kf); + values.get<Array>().push_back(vf); } map.get<Map>().push_back(keys); map.get<Map>().push_back(values); diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 7ac3316fdb..f3e5d6caf1 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -1542,6 +1542,7 @@ private: } } + //TODO(Amory) . Need support more cast for key , value for map WrapperType create_map_wrapper(const DataTypePtr& from_type, const DataTypeMap& to_type) const { switch (from_type->get_type_id()) { case TypeIndex::String: diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp b/be/test/vec/exec/parquet/parquet_thrift_test.cpp index b6d635aa44..c7e4894d20 100644 --- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp +++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp @@ -375,13 +375,14 @@ TEST_F(ParquetThriftReaderTest, group_reader) { { TTypeNode node; node.__set_type(TTypeNodeType::ARRAY); - node.contains_null = true; + std::vector<bool> contains_nulls {true}; + node.__set_contains_nulls(contains_nulls); TTypeNode inner; inner.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::STRING); inner.__set_scalar_type(scalar_type); - inner.contains_null = true; + inner.__set_contains_nulls(contains_nulls); type.types.push_back(node); type.types.push_back(inner); } diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java index 8aa6acb31a..6cc9162dc6 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java @@ -24,6 +24,7 @@ import org.apache.doris.thrift.TTypeNodeType; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.common.collect.Lists; import com.google.gson.annotations.SerializedName; import java.util.Objects; @@ -138,7 +139,7 @@ public class ArrayType extends Type { container.types.add(node); Preconditions.checkNotNull(itemType); node.setType(TTypeNodeType.ARRAY); - node.setContainsNull(containsNull); + node.setContainsNulls(Lists.newArrayList(containsNull)); itemType.toThrift(container); } diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java index 06167cc9c9..6fd8da9c24 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java @@ -24,6 +24,7 @@ import org.apache.doris.thrift.TTypeNodeType; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.common.collect.Lists; import com.google.gson.annotations.SerializedName; import java.util.Objects; @@ -35,19 +36,30 @@ public class MapType extends Type { @SerializedName(value = "keyType") private final Type keyType; + + @SerializedName(value = "isKeyContainsNull") + private final boolean isKeyContainsNull; // Now always true + @SerializedName(value = "valueType") private final Type valueType; + @SerializedName(value = "isValueContainsNull") + private final boolean isValueContainsNull; // Now always true + public MapType() { this.keyType = NULL; + this.isKeyContainsNull = true; this.valueType = NULL; + this.isValueContainsNull = true; } public MapType(Type keyType, Type valueType) { Preconditions.checkNotNull(keyType); Preconditions.checkNotNull(valueType); this.keyType = keyType; + this.isKeyContainsNull = true; this.valueType = valueType; + this.isValueContainsNull = true; } @Override @@ -59,6 +71,14 @@ public class MapType extends Type { return keyType; } + public Boolean getIsKeyContainsNull() { + return isKeyContainsNull; + } + + public Boolean getIsValueContainsNull() { + return isValueContainsNull; + } + public Type getValueType() { return valueType; } @@ -141,6 +161,7 @@ public class MapType extends Type { Preconditions.checkNotNull(keyType); Preconditions.checkNotNull(valueType); node.setType(TTypeNodeType.MAP); + node.setContainsNulls(Lists.newArrayList(isKeyContainsNull, isValueContainsNull)); keyType.toThrift(container); valueType.toThrift(container); } diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index 06f00d04e5..ca4ef88bd1 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -194,6 +194,7 @@ public abstract class Type { mapSubTypes.add(CHAR); mapSubTypes.add(VARCHAR); mapSubTypes.add(STRING); + mapSubTypes.add(NULL); structSubTypes = Lists.newArrayList(); structSubTypes.add(BOOLEAN); @@ -606,7 +607,7 @@ public abstract class Type { && !sourceType.isNull()) { // TODO: current not support cast any non-array type(except for null) to nested array type. return false; - } else if (targetType.isStructType() && sourceType.isStringType()) { + } else if ((targetType.isStructType() || targetType.isMapType()) && sourceType.isStringType()) { return true; } else if (sourceType.isStructType() && targetType.isStructType()) { return StructType.canCastTo((StructType) sourceType, (StructType) targetType); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index 2ff6579d82..22252d1d5d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -333,7 +333,7 @@ public class CastExpr extends Expr { if (fn == null) { //TODO(xy): check map type - if (type.isStructType() && childType.isStringType()) { + if ((type.isMapType() || type.isStructType()) && childType.isStringType()) { return; } if (childType.isNull() && Type.canCastTo(childType, type)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java index fe8815a4d2..182fb40f33 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java @@ -47,30 +47,35 @@ public class MapLiteral extends LiteralExpr { Type keyType = Type.NULL; Type valueType = Type.NULL; children = new ArrayList<>(); - int idx = 0; - // TODO(xy): limit key type to scalar type - for (LiteralExpr expr : exprs) { - if (idx % 2 == 0) { - if (keyType == Type.NULL) { - keyType = expr.getType(); - } else { - keyType = Type.getAssignmentCompatibleType(keyType, expr.getType(), true); - } - if (keyType == Type.INVALID) { - throw new AnalysisException("Invalid element type in Map"); - } + // check types here + // 1. limit key type with map-key support + // 2. check type can be assigment for cast + for (int idx = 0; idx < exprs.length && idx + 1 < exprs.length; idx += 2) { + if (!MapType.MAP.supportSubType(exprs[idx].getType())) { + throw new AnalysisException("Invalid key type in Map, not support " + exprs[idx].getType()); + } + keyType = Type.getAssignmentCompatibleType(keyType, exprs[idx].getType(), true); + valueType = Type.getAssignmentCompatibleType(valueType, exprs[idx + 1].getType(), true); + } + + if (keyType == Type.INVALID) { + throw new AnalysisException("Invalid key type in Map."); + } + if (valueType == Type.INVALID) { + throw new AnalysisException("Invalid value type in Map."); + } + + for (int idx = 0; idx < exprs.length && idx + 1 < exprs.length; idx += 2) { + if (exprs[idx].getType().equals(keyType)) { + children.add(exprs[idx]); + } else { + children.add(exprs[idx].castTo(keyType)); + } + if (exprs[idx + 1].getType().equals(valueType)) { + children.add(exprs[idx + 1]); } else { - if (valueType == Type.NULL) { - valueType = expr.getType(); - } else { - valueType = Type.getAssignmentCompatibleType(valueType, expr.getType(), true); - } - if (valueType == Type.INVALID) { - throw new AnalysisException("Invalid element type in Map"); - } + children.add(exprs[idx + 1].castTo(valueType)); } - children.add(expr); - ++ idx; } type = new MapType(keyType, valueType); @@ -89,13 +94,9 @@ public class MapLiteral extends LiteralExpr { Type keyType = ((MapType) targetType).getKeyType(); Type valueType = ((MapType) targetType).getValueType(); - for (int i = 0; i < children.size(); ++ i) { - Expr child = children.get(i); - if ((i % 2) == 0) { - literal.children.set(i, child.uncheckedCastTo(keyType)); - } else { - literal.children.set(i, child.uncheckedCastTo(valueType)); - } + for (int i = 0; i < children.size() && i + 1 < children.size(); i += 2) { + literal.children.set(i, children.get(i).uncheckedCastTo(keyType)); + literal.children.set(i + 1, children.get(i + 1).uncheckedCastTo(valueType)); } literal.setType(targetType); return literal; @@ -111,7 +112,7 @@ public class MapLiteral extends LiteralExpr { @Override protected String toSqlImpl() { List<String> list = new ArrayList<>(children.size()); - for (int i = 0; i < children.size(); i += 2) { + for (int i = 0; i < children.size() && i + 1 < children.size(); i += 2) { list.add(children.get(i).toSqlImpl() + ":" + children.get(i + 1).toSqlImpl()); } return "MAP{" + StringUtils.join(list, ", ") + "}"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index f6ae293f54..72bb65a20a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -218,6 +218,8 @@ public class Column implements Writable, GsonPostProcessable { } else if (type.isMapType()) { Column k = new Column(COLUMN_MAP_KEY, ((MapType) type).getKeyType()); Column v = new Column(COLUMN_MAP_VALUE, ((MapType) type).getValueType()); + k.setIsAllowNull(((MapType) type).getIsKeyContainsNull()); + v.setIsAllowNull(((MapType) type).getIsValueContainsNull()); column.addChildrenColumn(k); column.addChildrenColumn(v); } else if (type.isStructType()) { diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index 641e54f780..cec723aada 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -141,7 +141,7 @@ struct TTypeNode { 3: optional list<TStructField> struct_fields // only used for complex types, such as array, map and etc. - 4: optional bool contains_null + 4: optional list<bool> contains_nulls } // A flattened representation of a tree of column types obtained by depth-first --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org