eldenmoon commented on code in PR #45487:
URL: https://github.com/apache/doris/pull/45487#discussion_r1886811855


##########
be/src/vec/columns/column_object.cpp:
##########
@@ -1020,6 +1036,110 @@ void 
ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std:
                            "Index ({}) for serialize to sparse column is out 
of range", row);
 }
 
+const char* parse_binary_from_sparse_column(TypeIndex type, const char* data, 
Field& res,
+                                            FieldInfo& info_res) {
+    const char* end = data;
+    switch (type) {
+    case TypeIndex::String: {
+        const size_t size = *reinterpret_cast<const size_t*>(data);
+        data += sizeof(size_t);
+        res = Field(String(data, size));
+        end = data + size;
+        break;
+    }
+    case TypeIndex::Int8: {
+        res = *reinterpret_cast<const Int8*>(data);
+        end = data + sizeof(Int8);
+        break;
+    }
+    case TypeIndex::Int16: {
+        res = *reinterpret_cast<const Int16*>(data);
+        end = data + sizeof(Int16);
+        break;
+    }
+    case TypeIndex::Int32: {
+        res = *reinterpret_cast<const Int32*>(data);
+        end = data + sizeof(Int32);
+        break;
+    }
+    case TypeIndex::Int64: {
+        res = *reinterpret_cast<const Int64*>(data);
+        end = data + sizeof(Int64);
+        break;
+    }
+    case TypeIndex::Float32: {
+        res = *reinterpret_cast<const Float32*>(data);
+        end = data + sizeof(Float32);
+        break;
+    }
+    case TypeIndex::Float64: {
+        res = *reinterpret_cast<const Float64*>(data);
+        end = data + sizeof(Float64);
+        break;
+    }
+    case TypeIndex::JSONB: {
+        size_t size = *reinterpret_cast<const size_t*>(data);
+        data += sizeof(size_t);
+        res = JsonbField(data, size);
+        end = data + size;
+        break;
+    }
+    case TypeIndex::Array: {
+        const size_t size = *reinterpret_cast<const size_t*>(data);
+        data += sizeof(size_t);
+        res = Array(size);
+        vectorized::Array& array = res.get<Array>();
+        info_res.num_dimensions++;
+        for (size_t i = 0; i < size; ++i) {
+            const uint8_t is_null = *reinterpret_cast<const uint8_t*>(data++);
+            if (is_null) {
+                array.emplace_back(Null());
+                continue;
+            }
+            Field nested_field;
+            const TypeIndex nested_type =
+                    assert_cast<const TypeIndex>(*reinterpret_cast<const 
uint8_t*>(data++));
+            data = parse_binary_from_sparse_column(nested_type, data, 
nested_field, info_res);
+            array.emplace_back(std::move(nested_field));
+        }
+        end = data;
+        break;
+    }
+    default:
+        throw doris::Exception(ErrorCode::OUT_OF_BOUND,
+                               "Type ({}) for deserialize_from_sparse_column 
is invalid", type);
+    }
+    return end;
+}
+
+std::pair<Field, FieldInfo> ColumnObject::deserialize_from_sparse_column(const 
ColumnString* value,
+                                                                         
size_t row) const {
+    const auto& data_ref = value->get_data_at(row);
+    const char* data = data_ref.data;
+    DCHECK(data_ref.size > 0);
+
+    FieldInfo info_res = {
+            .scalar_type_id = TypeIndex::Nothing,
+            .have_nulls = false,
+            .need_convert = false,
+            .num_dimensions = 1,
+    };
+    // 0 is null
+    const uint8_t is_null = *reinterpret_cast<const uint8_t*>(data++);

Review Comment:
   maybe we could avoid this extra 1 byte to represent null, since all null 
value will not be written to sparse column



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to