This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7229751bd9 [Improve](map-type) Add contains_null for map  (#16948)
7229751bd9 is described below

commit 7229751bd9c55b0325b384a1508ba6cbd2b4c258
Author: amory <wangqian...@selectdb.com>
AuthorDate: Thu Feb 23 20:47:26 2023 +0800

    [Improve](map-type) Add contains_null for map  (#16948)
    
    Add contains_null for map type.
---
 be/src/olap/types.h                                | 36 +++++++++++--
 be/src/runtime/types.cpp                           | 34 +++++-------
 be/src/vec/data_types/data_type_factory.cpp        |  6 +--
 be/src/vec/exprs/vmap_literal.cpp                  | 20 +++----
 be/src/vec/functions/function_cast.h               |  1 +
 be/test/vec/exec/parquet/parquet_thrift_test.cpp   |  5 +-
 .../java/org/apache/doris/catalog/ArrayType.java   |  3 +-
 .../java/org/apache/doris/catalog/MapType.java     | 21 ++++++++
 .../main/java/org/apache/doris/catalog/Type.java   |  3 +-
 .../java/org/apache/doris/analysis/CastExpr.java   |  2 +-
 .../java/org/apache/doris/analysis/MapLiteral.java | 61 +++++++++++-----------
 .../main/java/org/apache/doris/catalog/Column.java |  2 +
 gensrc/thrift/Types.thrift                         |  2 +-
 13 files changed, 124 insertions(+), 72 deletions(-)

diff --git a/be/src/olap/types.h b/be/src/olap/types.h
index 58ec1eb469..b26a89ba6f 100644
--- a/be/src/olap/types.h
+++ b/be/src/olap/types.h
@@ -339,7 +339,18 @@ public:
         } else if (l_size > r_size) {
             return 1;
         } else {
-            return 0;
+            // now we use collection value in array to pack map k-v
+            auto l_k = reinterpret_cast<const 
CollectionValue*>(l_value->key_data());
+            auto l_v = reinterpret_cast<const 
CollectionValue*>(l_value->value_data());
+            auto r_k = reinterpret_cast<const 
CollectionValue*>(r_value->key_data());
+            auto r_v = reinterpret_cast<const 
CollectionValue*>(r_value->value_data());
+            auto key_arr = new 
ArrayTypeInfo(create_static_type_info_ptr(_key_type_info.get()));
+            auto val_arr = new 
ArrayTypeInfo(create_static_type_info_ptr(_value_type_info.get()));
+            if (int kc = key_arr->cmp(l_k, r_k) != 0) {
+                return kc;
+            } else {
+                return val_arr->cmp(l_v, r_v);
+            }
         }
     }
 
@@ -356,7 +367,27 @@ public:
         return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
     }
 
-    std::string to_string(const void* src) const override { return "{}"; }
+    std::string to_string(const void* src) const override {
+        auto src_ = reinterpret_cast<const MapValue*>(src);
+        auto src_key = reinterpret_cast<const 
CollectionValue*>(src_->key_data());
+        auto src_val = reinterpret_cast<const 
CollectionValue*>(src_->value_data());
+        size_t key_slot_size = _key_type_info->size();
+        size_t val_slot_size = _value_type_info->size();
+        std::string result = "{";
+
+        for (size_t i = 0; i < src_key->length(); ++i) {
+            std::string k_s =
+                    _key_type_info->to_string((uint8_t*)(src_key->data()) + 
key_slot_size);
+            std::string v_s =
+                    _key_type_info->to_string((uint8_t*)(src_val->data()) + 
val_slot_size);
+            result += k_s + ":" + v_s;
+            if (i != src_key->length() - 1) {
+                result += ", ";
+            }
+        }
+        result += "}";
+        return result;
+    }
 
     void set_to_max(void* buf) const override {
         DCHECK(false) << "set_to_max of list is not implemented.";
@@ -366,7 +397,6 @@ public:
         DCHECK(false) << "set_to_min of list is not implemented.";
     }
 
-    // todo . is here only to need return 16 for two ptr?
     size_t size() const override { return sizeof(MapValue); }
 
     FieldType type() const override { return OLAP_FIELD_TYPE_MAP; }
diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp
index e6366a1681..fca1c86954 100644
--- a/be/src/runtime/types.cpp
+++ b/be/src/runtime/types.cpp
@@ -55,9 +55,10 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& 
types, int* idx)
     case TTypeNodeType::ARRAY: {
         DCHECK(!node.__isset.scalar_type);
         DCHECK_LT(*idx, types.size() - 1);
+        DCHECK_EQ(node.contains_nulls.size(), 1);
         type = TYPE_ARRAY;
         contains_nulls.reserve(1);
-        contains_nulls.push_back(node.contains_null);
+        contains_nulls.push_back(node.contains_nulls[0]);
         ++(*idx);
         children.push_back(TypeDescriptor(types, idx));
         break;
@@ -65,7 +66,7 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& 
types, int* idx)
     case TTypeNodeType::STRUCT: {
         DCHECK(!node.__isset.scalar_type);
         DCHECK_LT(*idx, types.size() - 1);
-        DCHECK(!node.__isset.contains_null);
+        DCHECK(!node.__isset.contains_nulls);
         DCHECK(node.__isset.struct_fields);
         DCHECK_GE(node.struct_fields.size(), 1);
         type = TYPE_STRUCT;
@@ -85,31 +86,19 @@ TypeDescriptor::TypeDescriptor(const 
std::vector<TTypeNode>& types, int* idx)
         type = TYPE_VARIANT;
         break;
     }
-    // case TTypeNodeType::STRUCT:
-    //     type = TYPE_STRUCT;
-    //     for (int i = 0; i < node.struct_fields.size(); ++i) {
-    //         ++(*idx);
-    //         children.push_back(TypeDescriptor(types, idx));
-    //         field_names.push_back(node.struct_fields[i].name);
-    //     }
-    //     break;
-    // case TTypeNodeType::ARRAY:
-    //     DCHECK(!node.__isset.scalar_type);
-    //     DCHECK_LT(*idx, types.size() - 1);
-    //     type = TYPE_ARRAY;
-    //     ++(*idx);
-    //     children.push_back(TypeDescriptor(types, idx));
-    //     break;
     case TTypeNodeType::MAP: {
         //TODO(xy): handle contains_null[0] for key and [1] for value
         DCHECK(!node.__isset.scalar_type);
         DCHECK_LT(*idx, types.size() - 2);
-        DCHECK(!node.__isset.contains_null);
+        DCHECK_EQ(node.contains_nulls.size(), 2);
+        contains_nulls.reserve(2);
         type = TYPE_MAP;
         ++(*idx);
         children.push_back(TypeDescriptor(types, idx));
+        contains_nulls.push_back(node.contains_nulls[0]);
         ++(*idx);
         children.push_back(TypeDescriptor(types, idx));
+        contains_nulls.push_back(node.contains_nulls[1]);
         break;
     }
     default:
@@ -122,11 +111,16 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) 
const {
     TTypeNode& node = thrift_type->types.back();
     if (is_complex_type()) {
         if (type == TYPE_ARRAY) {
+            DCHECK_EQ(contains_nulls.size(), 1);
             node.type = TTypeNodeType::ARRAY;
-            node.contains_null = contains_nulls[0];
+            node.contains_nulls.reserve(1);
+            node.contains_nulls.push_back(contains_nulls[0]);
         } else if (type == TYPE_MAP) {
-            //TODO(xy): need to process children for map
+            DCHECK_EQ(contains_nulls.size(), 2);
             node.type = TTypeNodeType::MAP;
+            node.contains_nulls.reserve(2);
+            node.contains_nulls.push_back(contains_nulls[0]);
+            node.contains_nulls.push_back(contains_nulls[1]);
         } else if (type == TYPE_VARIANT) {
             node.type = TTypeNodeType::VARIANT;
         } else {
diff --git a/be/src/vec/data_types/data_type_factory.cpp 
b/be/src/vec/data_types/data_type_factory.cpp
index 91fc51187b..aaf9a395f1 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -169,10 +169,10 @@ DataTypePtr DataTypeFactory::create_data_type(const 
TypeDescriptor& col_desc, bo
         break;
     case TYPE_MAP:
         DCHECK(col_desc.children.size() == 2);
-        // todo. (Amory) Support Map contains_nulls in FE MapType.java Later PR
+        DCHECK_EQ(col_desc.contains_nulls.size(), 2);
         nested = std::make_shared<vectorized::DataTypeMap>(
-                create_data_type(col_desc.children[0], true),
-                create_data_type(col_desc.children[1], true));
+                create_data_type(col_desc.children[0], 
col_desc.contains_nulls[0]),
+                create_data_type(col_desc.children[1], 
col_desc.contains_nulls[1]));
         break;
     case TYPE_STRUCT: {
         DCHECK(col_desc.children.size() >= 1);
diff --git a/be/src/vec/exprs/vmap_literal.cpp 
b/be/src/vec/exprs/vmap_literal.cpp
index 954142f04d..60415dda69 100644
--- a/be/src/vec/exprs/vmap_literal.cpp
+++ b/be/src/vec/exprs/vmap_literal.cpp
@@ -30,17 +30,17 @@ Status VMapLiteral::prepare(RuntimeState* state, const 
RowDescriptor& row_desc,
     Field keys = Array();
     Field values = Array();
     // each child is slot with key1, value1, key2, value2...
-    for (int idx = 0; idx < _children.size(); ++idx) {
-        Field item;
-        ColumnPtrWrapper* const_col_wrapper = nullptr;
-        RETURN_IF_ERROR(_children[idx]->get_const_col(context, 
&const_col_wrapper));
-        const_col_wrapper->column_ptr->get(0, item);
+    for (int idx = 0; idx < _children.size() && idx + 1 < _children.size(); 
idx += 2) {
+        Field kf, vf;
+        ColumnPtrWrapper* const_key_col_wrapper = nullptr;
+        ColumnPtrWrapper* const_val_col_wrapper = nullptr;
+        RETURN_IF_ERROR(_children[idx]->get_const_col(context, 
&const_key_col_wrapper));
+        RETURN_IF_ERROR(_children[idx + 1]->get_const_col(context, 
&const_val_col_wrapper));
+        const_key_col_wrapper->column_ptr->get(0, kf);
+        const_val_col_wrapper->column_ptr->get(0, vf);
 
-        if ((idx & 1) == 0) {
-            keys.get<Array>().push_back(item);
-        } else {
-            values.get<Array>().push_back(item);
-        }
+        keys.get<Array>().push_back(kf);
+        values.get<Array>().push_back(vf);
     }
     map.get<Map>().push_back(keys);
     map.get<Map>().push_back(values);
diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index 7ac3316fdb..f3e5d6caf1 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1542,6 +1542,7 @@ private:
         }
     }
 
+    //TODO(Amory) . Need support more cast for key , value for map
     WrapperType create_map_wrapper(const DataTypePtr& from_type, const 
DataTypeMap& to_type) const {
         switch (from_type->get_type_id()) {
         case TypeIndex::String:
diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp 
b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
index b6d635aa44..c7e4894d20 100644
--- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
@@ -375,13 +375,14 @@ TEST_F(ParquetThriftReaderTest, group_reader) {
         {
             TTypeNode node;
             node.__set_type(TTypeNodeType::ARRAY);
-            node.contains_null = true;
+            std::vector<bool> contains_nulls {true};
+            node.__set_contains_nulls(contains_nulls);
             TTypeNode inner;
             inner.__set_type(TTypeNodeType::SCALAR);
             TScalarType scalar_type;
             scalar_type.__set_type(TPrimitiveType::STRING);
             inner.__set_scalar_type(scalar_type);
-            inner.contains_null = true;
+            inner.__set_contains_nulls(contains_nulls);
             type.types.push_back(node);
             type.types.push_back(inner);
         }
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java 
b/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java
index 8aa6acb31a..6cc9162dc6 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ArrayType.java
@@ -24,6 +24,7 @@ import org.apache.doris.thrift.TTypeNodeType;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
 import com.google.gson.annotations.SerializedName;
 
 import java.util.Objects;
@@ -138,7 +139,7 @@ public class ArrayType extends Type {
         container.types.add(node);
         Preconditions.checkNotNull(itemType);
         node.setType(TTypeNodeType.ARRAY);
-        node.setContainsNull(containsNull);
+        node.setContainsNulls(Lists.newArrayList(containsNull));
         itemType.toThrift(container);
     }
 
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java 
b/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java
index 06167cc9c9..6fd8da9c24 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/MapType.java
@@ -24,6 +24,7 @@ import org.apache.doris.thrift.TTypeNodeType;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
 import com.google.gson.annotations.SerializedName;
 
 import java.util.Objects;
@@ -35,19 +36,30 @@ public class MapType extends Type {
 
     @SerializedName(value = "keyType")
     private final Type keyType;
+
+    @SerializedName(value = "isKeyContainsNull")
+    private final boolean isKeyContainsNull; // Now always true
+
     @SerializedName(value = "valueType")
     private final Type valueType;
 
+    @SerializedName(value = "isValueContainsNull")
+    private final boolean isValueContainsNull; // Now always true
+
     public MapType() {
         this.keyType = NULL;
+        this.isKeyContainsNull = true;
         this.valueType = NULL;
+        this.isValueContainsNull = true;
     }
 
     public MapType(Type keyType, Type valueType) {
         Preconditions.checkNotNull(keyType);
         Preconditions.checkNotNull(valueType);
         this.keyType = keyType;
+        this.isKeyContainsNull = true;
         this.valueType = valueType;
+        this.isValueContainsNull = true;
     }
 
     @Override
@@ -59,6 +71,14 @@ public class MapType extends Type {
         return keyType;
     }
 
+    public Boolean getIsKeyContainsNull() {
+        return isKeyContainsNull;
+    }
+
+    public Boolean getIsValueContainsNull() {
+        return isValueContainsNull;
+    }
+
     public Type getValueType() {
         return valueType;
     }
@@ -141,6 +161,7 @@ public class MapType extends Type {
         Preconditions.checkNotNull(keyType);
         Preconditions.checkNotNull(valueType);
         node.setType(TTypeNodeType.MAP);
+        node.setContainsNulls(Lists.newArrayList(isKeyContainsNull, 
isValueContainsNull));
         keyType.toThrift(container);
         valueType.toThrift(container);
     }
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java 
b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
index 06f00d04e5..ca4ef88bd1 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
@@ -194,6 +194,7 @@ public abstract class Type {
         mapSubTypes.add(CHAR);
         mapSubTypes.add(VARCHAR);
         mapSubTypes.add(STRING);
+        mapSubTypes.add(NULL);
 
         structSubTypes = Lists.newArrayList();
         structSubTypes.add(BOOLEAN);
@@ -606,7 +607,7 @@ public abstract class Type {
                 && !sourceType.isNull()) {
             // TODO: current not support cast any non-array type(except for 
null) to nested array type.
             return false;
-        } else if (targetType.isStructType() && sourceType.isStringType()) {
+        } else if ((targetType.isStructType() || targetType.isMapType()) && 
sourceType.isStringType()) {
             return true;
         } else if (sourceType.isStructType() && targetType.isStructType()) {
             return StructType.canCastTo((StructType) sourceType, (StructType) 
targetType);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
index 2ff6579d82..22252d1d5d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
@@ -333,7 +333,7 @@ public class CastExpr extends Expr {
 
         if (fn == null) {
             //TODO(xy): check map type
-            if (type.isStructType() && childType.isStringType()) {
+            if ((type.isMapType() || type.isStructType()) && 
childType.isStringType()) {
                 return;
             }
             if (childType.isNull() && Type.canCastTo(childType, type)) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java
index fe8815a4d2..182fb40f33 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java
@@ -47,30 +47,35 @@ public class MapLiteral extends LiteralExpr {
         Type keyType = Type.NULL;
         Type valueType = Type.NULL;
         children = new ArrayList<>();
-        int idx = 0;
-        // TODO(xy): limit key type to scalar type
-        for (LiteralExpr expr : exprs) {
-            if (idx % 2 == 0) {
-                if (keyType == Type.NULL) {
-                    keyType = expr.getType();
-                } else {
-                    keyType = Type.getAssignmentCompatibleType(keyType, 
expr.getType(), true);
-                }
-                if (keyType == Type.INVALID) {
-                    throw new AnalysisException("Invalid element type in Map");
-                }
+        // check types here
+        // 1. limit key type with map-key support
+        // 2. check type can be assigment for cast
+        for (int idx = 0; idx < exprs.length && idx + 1 < exprs.length; idx += 
2) {
+            if (!MapType.MAP.supportSubType(exprs[idx].getType())) {
+                throw new AnalysisException("Invalid key type in Map, not 
support " + exprs[idx].getType());
+            }
+            keyType = Type.getAssignmentCompatibleType(keyType, 
exprs[idx].getType(), true);
+            valueType = Type.getAssignmentCompatibleType(valueType, exprs[idx 
+ 1].getType(), true);
+        }
+
+        if (keyType == Type.INVALID) {
+            throw new AnalysisException("Invalid key type in Map.");
+        }
+        if (valueType == Type.INVALID) {
+            throw new AnalysisException("Invalid value type in Map.");
+        }
+
+        for (int idx = 0; idx < exprs.length && idx + 1 < exprs.length; idx += 
2) {
+            if (exprs[idx].getType().equals(keyType)) {
+                children.add(exprs[idx]);
+            } else {
+                children.add(exprs[idx].castTo(keyType));
+            }
+            if (exprs[idx + 1].getType().equals(valueType)) {
+                children.add(exprs[idx + 1]);
             } else {
-                if (valueType == Type.NULL) {
-                    valueType = expr.getType();
-                } else {
-                    valueType = Type.getAssignmentCompatibleType(valueType, 
expr.getType(), true);
-                }
-                if (valueType == Type.INVALID) {
-                    throw new AnalysisException("Invalid element type in Map");
-                }
+                children.add(exprs[idx + 1].castTo(valueType));
             }
-            children.add(expr);
-            ++ idx;
         }
 
         type = new MapType(keyType, valueType);
@@ -89,13 +94,9 @@ public class MapLiteral extends LiteralExpr {
         Type keyType = ((MapType) targetType).getKeyType();
         Type valueType = ((MapType) targetType).getValueType();
 
-        for (int i = 0; i < children.size(); ++ i) {
-            Expr child = children.get(i);
-            if ((i % 2) == 0) {
-                literal.children.set(i, child.uncheckedCastTo(keyType));
-            } else {
-                literal.children.set(i, child.uncheckedCastTo(valueType));
-            }
+        for (int i = 0; i < children.size() &&  i + 1 < children.size(); i += 
2) {
+            literal.children.set(i, children.get(i).uncheckedCastTo(keyType));
+            literal.children.set(i + 1, children.get(i + 
1).uncheckedCastTo(valueType));
         }
         literal.setType(targetType);
         return literal;
@@ -111,7 +112,7 @@ public class MapLiteral extends LiteralExpr {
     @Override
     protected String toSqlImpl() {
         List<String> list = new ArrayList<>(children.size());
-        for (int i = 0; i < children.size(); i += 2) {
+        for (int i = 0; i < children.size() && i + 1 < children.size(); i += 
2) {
             list.add(children.get(i).toSqlImpl() + ":" + children.get(i + 
1).toSqlImpl());
         }
         return "MAP{" + StringUtils.join(list, ", ") + "}";
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index f6ae293f54..72bb65a20a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -218,6 +218,8 @@ public class Column implements Writable, 
GsonPostProcessable {
         } else if (type.isMapType()) {
             Column k = new Column(COLUMN_MAP_KEY, ((MapType) 
type).getKeyType());
             Column v = new Column(COLUMN_MAP_VALUE, ((MapType) 
type).getValueType());
+            k.setIsAllowNull(((MapType) type).getIsKeyContainsNull());
+            v.setIsAllowNull(((MapType) type).getIsValueContainsNull());
             column.addChildrenColumn(k);
             column.addChildrenColumn(v);
         } else if (type.isStructType()) {
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index 641e54f780..cec723aada 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -141,7 +141,7 @@ struct TTypeNode {
     3: optional list<TStructField> struct_fields
 
     // only used for complex types, such as array, map and etc.
-    4: optional bool contains_null
+    4: optional list<bool> contains_nulls
 }
 
 // A flattened representation of a tree of column types obtained by depth-first


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to