eldenmoon commented on code in PR #45487: URL: https://github.com/apache/doris/pull/45487#discussion_r1886811855
########## be/src/vec/columns/column_object.cpp: ########## @@ -1020,6 +1036,110 @@ void ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std: "Index ({}) for serialize to sparse column is out of range", row); } +const char* parse_binary_from_sparse_column(TypeIndex type, const char* data, Field& res, + FieldInfo& info_res) { + const char* end = data; + switch (type) { + case TypeIndex::String: { + const size_t size = *reinterpret_cast<const size_t*>(data); + data += sizeof(size_t); + res = Field(String(data, size)); + end = data + size; + break; + } + case TypeIndex::Int8: { + res = *reinterpret_cast<const Int8*>(data); + end = data + sizeof(Int8); + break; + } + case TypeIndex::Int16: { + res = *reinterpret_cast<const Int16*>(data); + end = data + sizeof(Int16); + break; + } + case TypeIndex::Int32: { + res = *reinterpret_cast<const Int32*>(data); + end = data + sizeof(Int32); + break; + } + case TypeIndex::Int64: { + res = *reinterpret_cast<const Int64*>(data); + end = data + sizeof(Int64); + break; + } + case TypeIndex::Float32: { + res = *reinterpret_cast<const Float32*>(data); + end = data + sizeof(Float32); + break; + } + case TypeIndex::Float64: { + res = *reinterpret_cast<const Float64*>(data); + end = data + sizeof(Float64); + break; + } + case TypeIndex::JSONB: { + size_t size = *reinterpret_cast<const size_t*>(data); + data += sizeof(size_t); + res = JsonbField(data, size); + end = data + size; + break; + } + case TypeIndex::Array: { + const size_t size = *reinterpret_cast<const size_t*>(data); + data += sizeof(size_t); + res = Array(size); + vectorized::Array& array = res.get<Array>(); + info_res.num_dimensions++; + for (size_t i = 0; i < size; ++i) { + const uint8_t is_null = *reinterpret_cast<const uint8_t*>(data++); + if (is_null) { + array.emplace_back(Null()); + continue; + } + Field nested_field; + const TypeIndex nested_type = + assert_cast<const TypeIndex>(*reinterpret_cast<const uint8_t*>(data++)); + data = parse_binary_from_sparse_column(nested_type, data, nested_field, info_res); + array.emplace_back(std::move(nested_field)); + } + end = data; + break; + } + default: + throw doris::Exception(ErrorCode::OUT_OF_BOUND, + "Type ({}) for deserialize_from_sparse_column is invalid", type); + } + return end; +} + +std::pair<Field, FieldInfo> ColumnObject::deserialize_from_sparse_column(const ColumnString* value, + size_t row) const { + const auto& data_ref = value->get_data_at(row); + const char* data = data_ref.data; + DCHECK(data_ref.size > 0); + + FieldInfo info_res = { + .scalar_type_id = TypeIndex::Nothing, + .have_nulls = false, + .need_convert = false, + .num_dimensions = 1, + }; + // 0 is null + const uint8_t is_null = *reinterpret_cast<const uint8_t*>(data++); Review Comment: maybe we could avoid this extra 1 byte to represent null, since all null value will not be written to sparse column -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org