This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch struct-type in repository https://gitbox.apache.org/repos/asf/doris.git
commit 0d4da4f5f7797c53c9a2389519d5a81073baf82c Author: carlvinhust2012 <huchengha...@126.com> AuthorDate: Fri Jan 13 17:14:11 2023 +0800 [feature](struct-type) adjust the vexpr for struct type (#15669) --- be/src/vec/columns/column_struct.cpp | 7 -- be/src/vec/columns/column_struct.h | 1 - be/src/vec/data_types/data_type_factory.hpp | 8 ++ be/src/vec/data_types/data_type_struct.cpp | 99 +++++++++++++++++++--- be/src/vec/data_types/data_type_struct.h | 3 + be/src/vec/functions/function_cast.h | 13 ++- .../java/org/apache/doris/analysis/CastExpr.java | 8 ++ .../java/org/apache/doris/analysis/TypeDef.java | 2 +- .../main/java/org/apache/doris/catalog/Column.java | 1 + .../main/java/org/apache/doris/catalog/Type.java | 14 ++- 10 files changed, 134 insertions(+), 22 deletions(-) diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index d51bebb536..afef00c314 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -22,13 +22,6 @@ namespace doris::vectorized { -namespace ErrorCodes { -extern const int ILLEGAL_COLUMN; -extern const int NOT_IMPLEMENTED; -extern const int CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE; -extern const int LOGICAL_ERROR; -} // namespace ErrorCodes - std::string ColumnStruct::get_name() const { std::stringstream res; res << "Struct("; diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index a66d91f3e0..393b2c275f 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -103,7 +103,6 @@ public: bool can_be_inside_nullable() const override { return true; } MutableColumnPtr clone_empty() const override; MutableColumnPtr clone_resized(size_t size) const override; - size_t size() const override { return columns.at(0)->size(); } Field operator[](size_t n) const override; diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index d46dd3435d..9bc5e20d5c 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -112,6 +112,14 @@ public: return entity.second; } } + if (type_ptr->get_type_id() == TypeIndex::Struct) { + DataTypeFactory::instance().register_data_type(type_ptr->get_name(), type_ptr); + for (const auto& entity : _invert_data_type_map) { + if (entity.first->equals(*type_ptr)) { + return entity.second; + } + } + } return _empty_string; } diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp index 4364f5c34f..87b5c4f110 100644 --- a/be/src/vec/data_types/data_type_struct.cpp +++ b/be/src/vec/data_types/data_type_struct.cpp @@ -22,17 +22,6 @@ namespace doris::vectorized { -namespace ErrorCodes { -extern const int BAD_ARGUMENTS; -extern const int DUPLICATE_COLUMN; -extern const int EMPTY_DATA_PASSED; -extern const int NOT_FOUND_COLUMN_IN_BLOCK; -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; -extern const int ILLEGAL_INDEX; -extern const int LOGICAL_ERROR; -} // namespace ErrorCodes - DataTypeStruct::DataTypeStruct(const DataTypes& elems_) : elems(elems_), have_explicit_names(false) { /// Automatically assigned names in form of '1', '2', ... @@ -91,6 +80,94 @@ std::string DataTypeStruct::do_get_name() const { return s.str(); } +Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const { + DCHECK(!rb.eof()); + auto* struct_column = assert_cast<ColumnStruct*>(column); + + if (*rb.position() != '{') { + return Status::InvalidArgument("Struct does not start with '{' character, found '{}'", + *rb.position()); + } + if (rb.count() < 2 || *(rb.end() - 1) != '}') { + return Status::InvalidArgument("Struct does not end with '}' character, found '{}'", + *(rb.end() - 1)); + } + + // here need handle the empty struct '{}' + if (rb.count() == 2) { + return Status::OK(); + } + + ++rb.position(); + std::vector<ReadBuffer> field_rbs; + field_rbs.reserve(elems.size()); + + // here get the value "jack" and 20 from {"name":"jack","age":20} + while (!rb.eof()) { + size_t field_len = 0; + auto start = rb.position(); + while (!rb.eof() && *start != ',' && *start != '}') { + field_len++; + start++; + } + if (field_len >= rb.count()) { + return Status::InvalidArgument("Invalid Length"); + } + ReadBuffer field_rb(rb.position(), field_len); + size_t len = 0; + auto start_rb = field_rb.position(); + while (!field_rb.eof() && *start_rb != ':') { + len++; + start_rb++; + } + ReadBuffer field(field_rb.position() + len + 1, field_rb.count() - len - 1); + + if (field.count() < 2 || *field.position() != '"' || *field.end() != '"') { + field_rbs.push_back(field); + } else { + ReadBuffer field_has_quote(field.position() + 1, field.count() - 2); + field_rbs.push_back(field_has_quote); + } + + rb.position() += field_len + 1; + } + + for (size_t idx = 0; idx < elems.size(); idx++) { + elems[idx]->from_string(field_rbs[idx], &struct_column->get_column(idx)); + } + + return Status::OK(); +} + +std::string DataTypeStruct::to_string(const IColumn& column, size_t row_num) const { + auto ptr = column.convert_to_full_column_if_const(); + auto& struct_column = assert_cast<const ColumnStruct&>(*ptr.get()); + + std::stringstream ss; + ss << "<"; + for (size_t idx = 0; idx < elems.size(); idx++) { + if (idx != 0) { + ss << ", "; + } + ss << elems[idx]->to_string(struct_column.get_column(idx), row_num); + } + ss << ">"; + return ss.str(); +} + +void DataTypeStruct::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const { + auto ptr = column.convert_to_full_column_if_const(); + auto& struct_column = assert_cast<const ColumnStruct&>(*ptr.get()); + ostr.write("<", 1); + for (size_t idx = 0; idx < elems.size(); idx++) { + if (idx != 0) { + ostr.write(", ", 2); + } + elems[idx]->to_string(struct_column.get_column(idx), row_num, ostr); + } + ostr.write(">", 1); +} + static inline IColumn& extract_element_column(IColumn& column, size_t idx) { return assert_cast<ColumnStruct&>(column).get_column(idx); } diff --git a/be/src/vec/data_types/data_type_struct.h b/be/src/vec/data_types/data_type_struct.h index 88255de38c..47cfaab1a3 100644 --- a/be/src/vec/data_types/data_type_struct.h +++ b/be/src/vec/data_types/data_type_struct.h @@ -96,6 +96,9 @@ public: const char* deserialize(const char* buf, IColumn* column, int be_exec_version) const override; void to_pb_column_meta(PColumnMeta* col_meta) const override; + Status from_string(ReadBuffer& rb, IColumn* column) const override; + std::string to_string(const IColumn& column, size_t row_num) const override; + void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; // bool is_parametric() const { return true; } // SerializationPtr do_get_default_serialization() const override; // SerializationPtr get_serialization(const SerializationInfo& info) const override; diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index d300c09a76..ec4608c115 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -1541,6 +1541,16 @@ private: return &ConvertImplGenericToJsonb::execute; } } + // check struct value type and get to_type value + // TODO: need handle another type to cast struct + WrapperType create_struct_wrapper(const DataTypePtr& from_type, + const DataTypeStruct& to_type) const { + switch (from_type->get_type_id()) { + case TypeIndex::String: + default: + return &ConvertImplGenericFromString<ColumnString>::execute; + } + } WrapperType prepare_unpack_dictionaries(FunctionContext* context, const DataTypePtr& from_type, const DataTypePtr& to_type) const { @@ -1712,6 +1722,8 @@ private: case TypeIndex::Array: return create_array_wrapper(context, from_type, static_cast<const DataTypeArray&>(*to_type)); + case TypeIndex::Struct: + return create_struct_wrapper(from_type, static_cast<const DataTypeStruct&>(*to_type)); default: break; } @@ -1757,7 +1769,6 @@ protected: // TODO(xy): support return struct type for factory auto type = DataTypeFactory::instance().get(type_col->get_value<String>()); DCHECK(type != nullptr); - bool need_to_be_nullable = false; // 1. from_type is nullable need_to_be_nullable |= arguments[0].type->is_nullable(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index ab5bc992f8..77975c1eb9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -318,9 +318,17 @@ public class CastExpr extends Expr { type, Function.NullableMode.ALWAYS_NULLABLE, Lists.newArrayList(Type.VARCHAR), false, "doris::CastFunctions::cast_to_array_val", null, null, true); + } else if (type.isStructType()) { + fn = ScalarFunction.createBuiltin(getFnName(Type.STRUCT), + type, Function.NullableMode.ALWAYS_NULLABLE, + Lists.newArrayList(Type.VARCHAR), false, + "doris::CastFunctions::cast_to_struct_val", null, null, true); } if (fn == null) { + if (type.isStructType() && childType.isStringType()) { + return; + } if (childType.isNull() && Type.canCastTo(childType, type)) { return; } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java index 5690c115c1..c89c659207 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java @@ -129,7 +129,7 @@ public class TypeDef implements ParseNode { // check whether the sub-type is supported if (!parent.supportSubType(child)) { throw new AnalysisException( - parent.getPrimitiveType() + "unsupported sub-type: " + child.toSql()); + parent.getPrimitiveType() + " unsupported sub-type: " + child.toSql()); } if (child.getPrimitiveType().isStringType() && !child.isLengthSet()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index 9efe901907..7b0de9b7e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -59,6 +59,7 @@ public class Column implements Writable, GsonPostProcessable { public static final String DELETE_SIGN = "__DORIS_DELETE_SIGN__"; public static final String SEQUENCE_COL = "__DORIS_SEQUENCE_COL__"; private static final String COLUMN_ARRAY_CHILDREN = "item"; + private static final String COLUMN_STRUCT_CHILDREN = "field"; public static final int COLUMN_UNIQUE_ID_INIT_VALUE = -1; @SerializedName(value = "name") diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index 45d88897a9..587266926c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -176,8 +176,20 @@ public abstract class Type { arraySubTypes.add(STRING); structSubTypes = Lists.newArrayList(); - structSubTypes.add(INT); + structSubTypes.addAll(numericTypes); + structSubTypes.add(BOOLEAN); + structSubTypes.add(VARCHAR); structSubTypes.add(STRING); + structSubTypes.add(CHAR); + structSubTypes.add(DATE); + structSubTypes.add(DATETIME); + structSubTypes.add(DATEV2); + structSubTypes.add(DATETIMEV2); + structSubTypes.add(TIME); + structSubTypes.add(TIMEV2); + structSubTypes.add(DECIMAL32); + structSubTypes.add(DECIMAL64); + structSubTypes.add(DECIMAL128); } public static ArrayList<ScalarType> getIntegerTypes() { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org