This is an automated email from the ASF dual-hosted git repository.
jerryshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 81a2edd9a4 [8619] feat(client-python): add statistic value serdes
(#10877)
81a2edd9a4 is described below
commit 81a2edd9a41b435a11b60c2668b09a1f0a87e23a
Author: George T. C. Lai <[email protected]>
AuthorDate: Wed May 6 17:14:12 2026 +0800
[8619] feat(client-python): add statistic value serdes (#10877)
### What changes were proposed in this pull request?
This PR aims to add implementation of statistic value serdes in Python
client that refers to the following Java classes.
`JsonUtils.java`
- StatisticValueSerializer
- StatisticValueDeserializer
`StatisticValues.java`
- StatisticValues
### Why are the changes needed?
We need to have these classes before being able to implement
`MetadataObjectStatisticsOperations`.
Fix: #8619
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests
---------
Signed-off-by: George T. C. Lai <[email protected]>
Co-authored-by: Copilot <[email protected]>
---
.../gravitino/api/rel/types/json_serdes/base.py | 10 +-
.../gravitino/api/stats/json_serdes/__init__.py | 16 ++
.../stats/json_serdes/statistic_value_serdes.py | 106 ++++++++
.../gravitino/api/stats/statistic_values.py | 267 +++++++++++++++++++++
.../api/stats/test_statistic_value_serdes.py | 174 ++++++++++++++
.../unittests/api/stats/test_statistic_values.py | 202 ++++++++++++++++
6 files changed, 774 insertions(+), 1 deletion(-)
diff --git a/clients/client-python/gravitino/api/rel/types/json_serdes/base.py
b/clients/client-python/gravitino/api/rel/types/json_serdes/base.py
index caba4fbe10..d837d0bc65 100644
--- a/clients/client-python/gravitino/api/rel/types/json_serdes/base.py
+++ b/clients/client-python/gravitino/api/rel/types/json_serdes/base.py
@@ -25,13 +25,21 @@ from gravitino.api.rel.expressions.expression import
Expression
from gravitino.api.rel.expressions.sorts.sort_order import SortOrder
from gravitino.api.rel.indexes.index import Index
from gravitino.api.rel.types.types import Type
+from gravitino.api.stats.statistic_value import StatisticValue
from gravitino.dto.rel.partitioning.partitioning import Partitioning
from gravitino.dto.rel.partitions.partition_dto import PartitionDTO
_GravitinoTypeT = TypeVar(
"_GravitinoTypeT",
bound=Union[
- Expression, Type, Partitioning, PartitionDTO, Distribution, Index,
SortOrder
+ Expression,
+ Type,
+ Partitioning,
+ PartitionDTO,
+ Distribution,
+ Index,
+ SortOrder,
+ StatisticValue,
],
)
diff --git a/clients/client-python/gravitino/api/stats/json_serdes/__init__.py
b/clients/client-python/gravitino/api/stats/json_serdes/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/clients/client-python/gravitino/api/stats/json_serdes/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git
a/clients/client-python/gravitino/api/stats/json_serdes/statistic_value_serdes.py
b/clients/client-python/gravitino/api/stats/json_serdes/statistic_value_serdes.py
new file mode 100644
index 0000000000..23449993f3
--- /dev/null
+++
b/clients/client-python/gravitino/api/stats/json_serdes/statistic_value_serdes.py
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any
+
+from dataclasses_json.core import Json
+
+from gravitino.api.rel.types.json_serdes.base import JsonSerializable
+from gravitino.api.rel.types.type import Name
+from gravitino.api.stats.statistic_value import StatisticValue
+from gravitino.api.stats.statistic_values import StatisticValues
+from gravitino.utils.precondition import Precondition
+
+
+class StatisticValueSerdes(JsonSerializable[StatisticValue[Any]]):
+ """Customized JSON Serializer and Deserializer for StatisticValue."""
+
+ @classmethod
+ def serialize(cls, value: StatisticValue[Any]) -> Json:
+ """Serialize the given StatisticValue.
+
+ Args:
+ value (StatisticValue): The StatisticValue to be serialized.
+
+ Returns:
+ Json: The serialized data corresponding to the given Gravitino
Type.
+ """
+
+ match value.data_type().name():
+ case Name.BOOLEAN | Name.STRING | Name.DOUBLE | Name.LONG:
+ return value.value()
+ case Name.LIST:
+ return [cls.serialize(item) for item in value.value()]
+ case Name.STRUCT:
+ return {k: cls.serialize(v) for k, v in value.value().items()}
+ case _:
+ raise ValueError(
+ f"Unsupported statistic value type: {value.data_type()}"
+ )
+
+ @classmethod
+ def deserialize(cls, data: Json) -> StatisticValue[Any]:
+ """Deserialize the given data to a StatisticValue.
+
+ Args:
+ data (Json): The data to be deserialized.
+
+ Returns:
+ StatisticValue[Any]: The deserialized StatisticValue.
+ """
+
+ return cls._get_statistic_value(data)
+
+ @classmethod
+ def _get_statistic_value(cls, data: Json) -> StatisticValue[Any]:
+ """Get the StatisticValue from the given data.
+
+ Args:
+ data (Json): The data to get the StatisticValue from.
+
+ Returns:
+ StatisticValue: The StatisticValue corresponding to the given data.
+ """
+
+ Precondition.check_argument(
+ data is not None, f"Cannot parse statistic value from invalid
JSON: {data}"
+ )
+
+ match data:
+ case bool():
+ return StatisticValues.boolean_value(data)
+ case int():
+ return StatisticValues.long_value(data)
+ case float():
+ return StatisticValues.double_value(data)
+ case str():
+ return StatisticValues.string_value(data)
+ case list():
+ return StatisticValues.list_value(
+ [cls._get_statistic_value(item) for item in data]
+ )
+ case dict():
+ return StatisticValues.object_value(
+ {
+ key: cls._get_statistic_value(value)
+ for key, value in data.items()
+ }
+ )
+ case _:
+ raise ValueError(
+ f"Unsupported data type for statistic value: {type(data)}"
+ )
diff --git a/clients/client-python/gravitino/api/stats/statistic_values.py
b/clients/client-python/gravitino/api/stats/statistic_values.py
new file mode 100644
index 0000000000..b140122e6e
--- /dev/null
+++ b/clients/client-python/gravitino/api/stats/statistic_values.py
@@ -0,0 +1,267 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from typing import Any, TypeVar
+
+from gravitino.api.rel.types.type import Type
+from gravitino.api.rel.types.types import Types
+from gravitino.api.stats.statistic_value import StatisticValue
+from gravitino.utils.precondition import Precondition
+
+T = TypeVar("T")
+
+
+class StatisticValues:
+ """A class representing a collection of statistic values."""
+
+ @staticmethod
+ def boolean_value(value: bool) -> "BooleanValue":
+ """Creates a statistic value that holds a boolean value.
+
+ Args:
+ value: the boolean value to be held by this statistic value
+
+ Returns:
+ A BooleanValue instance containing the provided boolean value
+ """
+ return StatisticValues.BooleanValue(value)
+
+ @staticmethod
+ def long_value(value: int) -> "LongValue":
+ """Creates a statistic value that holds a long value.
+
+ Args:
+ value: the long value to be held by this statistic value
+
+ Returns:
+ A LongValue instance containing the provided long value
+ """
+ return StatisticValues.LongValue(value)
+
+ @staticmethod
+ def double_value(value: float) -> "DoubleValue":
+ """Creates a statistic value that holds a double value.
+
+ Args:
+ value: the double value to be held by this statistic value
+
+ Returns:
+ A DoubleValue instance containing the provided double value
+ """
+ return StatisticValues.DoubleValue(value)
+
+ @staticmethod
+ def string_value(value: str) -> "StringValue":
+ """Creates a statistic value that holds a string value.
+
+ Args:
+ value: the string value to be held by this statistic value
+
+ Returns:
+ A StringValue instance containing the provided string value
+ """
+ return StatisticValues.StringValue(value)
+
+ @staticmethod
+ def list_value(value: list[StatisticValue[T]]) -> "ListValue[T]":
+ """Creates a statistic value that holds a list of other statistic
values.
+
+ Args:
+ value: the list of statistic values to be held by this statistic
value
+
+ Returns:
+ A ListValue instance containing the provided list of statistic
values
+ """
+ return StatisticValues.ListValue(value)
+
+ @staticmethod
+ def object_value(value: dict[str, StatisticValue[Any]]) ->
"ObjectValue[Any]":
+ """Creates a statistic value that holds a map of string keys to other
statistic values.
+
+ Args:
+ value: the map of string keys to statistic values to be held by
this statistic value
+
+ Returns:
+ An ObjectValue instance containing the provided map of statistic
values
+ """
+ return StatisticValues.ObjectValue(value)
+
+ @staticmethod
+ def _make_hash(value: StatisticValue[Any]) -> int:
+ """Recursively compute hash for any StatisticValue.
+
+ Args:
+ value: the StatisticValue to hash
+
+ Returns:
+ Hash code for the StatisticValue
+ """
+ match value:
+ case StatisticValues.ListValue():
+ return hash(tuple(StatisticValues._make_hash(v) for v in
value.value()))
+ case StatisticValues.ObjectValue():
+ return hash(
+ tuple(
+ sorted(
+ (k, StatisticValues._make_hash(v))
+ for k, v in value.value().items()
+ )
+ )
+ )
+ case _:
+ return hash(value.value())
+
+ class BooleanValue(StatisticValue[bool]):
+ """A statistic value that holds a Boolean value."""
+
+ def __init__(self, value: bool) -> None:
+ self._value = value
+
+ def value(self) -> bool:
+ return self._value
+
+ def data_type(self) -> Type:
+ return Types.BooleanType.get()
+
+ def __hash__(self) -> int:
+ return hash(self._value)
+
+ def __eq__(self, other) -> bool:
+ if not isinstance(other, StatisticValues.BooleanValue):
+ return False
+ return self._value == other._value
+
+ class LongValue(StatisticValue[int]):
+ """A statistic value that holds a Long value."""
+
+ def __init__(self, value: int) -> None:
+ self._value = value
+
+ def value(self) -> int:
+ return self._value
+
+ def data_type(self) -> Type:
+ return Types.LongType.get()
+
+ def __hash__(self) -> int:
+ return hash(self._value)
+
+ def __eq__(self, other) -> bool:
+ if not isinstance(other, StatisticValues.LongValue):
+ return False
+ return self._value == other._value
+
+ class DoubleValue(StatisticValue[float]):
+ """A statistic value that holds a Double value."""
+
+ def __init__(self, value: float) -> None:
+ self._value = value
+
+ def value(self) -> float:
+ return self._value
+
+ def data_type(self) -> Type:
+ return Types.DoubleType.get()
+
+ def __hash__(self) -> int:
+ return hash(self._value)
+
+ def __eq__(self, other) -> bool:
+ if not isinstance(other, StatisticValues.DoubleValue):
+ return False
+ return self._value == other._value
+
+ class StringValue(StatisticValue[str]):
+ """A statistic value that holds a String value."""
+
+ def __init__(self, value: str) -> None:
+ self._value = value
+
+ def value(self) -> str:
+ return self._value
+
+ def data_type(self) -> Type:
+ return Types.StringType.get()
+
+ def __hash__(self) -> int:
+ return hash(self._value)
+
+ def __eq__(self, other) -> bool:
+ if not isinstance(other, StatisticValues.StringValue):
+ return False
+ return self._value == other._value
+
+ class ListValue(StatisticValue[list[StatisticValue[T]]]):
+ """A statistic value that holds a List of other statistic values."""
+
+ def __init__(self, value_list: list[StatisticValue[T]]) -> None:
+ Precondition.check_argument(
+ value_list is not None and len(value_list) > 0,
+ "Values cannot be null or empty",
+ )
+ data_type = value_list[0].data_type()
+ Precondition.check_argument(
+ all(value.data_type() == data_type for value in value_list),
+ "All values in the list must have the same data type",
+ )
+ self._value_list = value_list
+
+ def value(self) -> list[StatisticValue[T]]:
+ return self._value_list
+
+ def data_type(self) -> Type:
+ return Types.ListType.nullable(self._value_list[0].data_type())
+
+ def __hash__(self) -> int:
+ return StatisticValues._make_hash(self)
+
+ def __eq__(self, other) -> bool:
+ if not isinstance(other, StatisticValues.ListValue):
+ return False
+ return self._value_list == other._value_list
+
+ class ObjectValue(StatisticValue[dict[str, StatisticValue[T]]]):
+ """A statistic value that holds a Map of String keys to other
statistic values."""
+
+ def __init__(self, value_map: dict[str, StatisticValue[T]]) -> None:
+ Precondition.check_argument(
+ value_map is not None and len(value_map) > 0,
+ "Values cannot be null or empty",
+ )
+ self._value_map = value_map
+
+ def value(self) -> dict[str, StatisticValue[T]]:
+ return self._value_map
+
+ def data_type(self) -> Type:
+ return Types.StructType.of(
+ *[
+ Types.StructType.Field.nullable_field(
+ key, self._value_map[key].data_type()
+ )
+ for key in sorted(self._value_map)
+ ]
+ )
+
+ def __hash__(self) -> int:
+ return StatisticValues._make_hash(self)
+
+ def __eq__(self, other) -> bool:
+ if not isinstance(other, StatisticValues.ObjectValue):
+ return False
+ return self._value_map == other._value_map
diff --git
a/clients/client-python/tests/unittests/api/stats/test_statistic_value_serdes.py
b/clients/client-python/tests/unittests/api/stats/test_statistic_value_serdes.py
new file mode 100644
index 0000000000..3cf60a617a
--- /dev/null
+++
b/clients/client-python/tests/unittests/api/stats/test_statistic_value_serdes.py
@@ -0,0 +1,174 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import random
+import unittest
+
+from gravitino.api.stats.json_serdes.statistic_value_serdes import (
+ StatisticValueSerdes,
+)
+from gravitino.api.stats.statistic_values import StatisticValues
+from gravitino.exceptions.base import IllegalArgumentException
+
+
+class TestStatisticValueJsonSerdes(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls) -> None:
+ super().setUpClass()
+ cls._rand_int = random.randint(0, 500)
+ cls._rand_float = random.uniform(0, 500)
+ cls._rand_str = f"str-{cls._rand_int}"
+
+ def test_deserialize_naive_types(self):
+ self.assertEqual(
+ StatisticValueSerdes.deserialize(self._rand_int),
+ StatisticValues.long_value(self._rand_int),
+ )
+ self.assertEqual(
+ StatisticValueSerdes.deserialize(self._rand_float),
+ StatisticValues.double_value(self._rand_float),
+ )
+ self.assertEqual(
+ StatisticValueSerdes.deserialize(self._rand_str),
+ StatisticValues.string_value(self._rand_str),
+ )
+ self.assertEqual(
+ StatisticValueSerdes.deserialize(True),
StatisticValues.boolean_value(True)
+ )
+
+ def test_deserialize_object_type(self):
+ data = {
+ "boolean": True,
+ "long": self._rand_int,
+ "double": self._rand_float,
+ "string": self._rand_str,
+ "list": [self._rand_int, self._rand_int + 1, self._rand_int + 2],
+ "struct": {
+ "nested_boolean": False,
+ "nested_string": "nested",
+ "nested_list": [self._rand_str, self._rand_str + "_another"],
+ },
+ }
+
+ deserialized_value = StatisticValueSerdes.deserialize(data)
+
+ expected_value = StatisticValues.object_value(
+ {
+ "boolean": StatisticValues.boolean_value(True),
+ "long": StatisticValues.long_value(self._rand_int),
+ "double": StatisticValues.double_value(self._rand_float),
+ "string": StatisticValues.string_value(self._rand_str),
+ "list": StatisticValues.list_value(
+ [
+ StatisticValues.long_value(self._rand_int),
+ StatisticValues.long_value(self._rand_int + 1),
+ StatisticValues.long_value(self._rand_int + 2),
+ ]
+ ),
+ "struct": StatisticValues.object_value(
+ {
+ "nested_boolean": StatisticValues.boolean_value(False),
+ "nested_string":
StatisticValues.string_value("nested"),
+ "nested_list": StatisticValues.list_value(
+ [
+ StatisticValues.string_value(self._rand_str),
+ StatisticValues.string_value(
+ self._rand_str + "_another"
+ ),
+ ]
+ ),
+ }
+ ),
+ }
+ )
+
+ self.assertEqual(deserialized_value, expected_value)
+
+ def test_deserialize_unsupported_type(self):
+ with self.assertRaises(IllegalArgumentException):
+ StatisticValueSerdes.deserialize(None)
+
+ def test_serialize_naive_types(self):
+ self.assertEqual(
+
StatisticValueSerdes.serialize(StatisticValues.long_value(self._rand_int)),
+ self._rand_int,
+ )
+ self.assertEqual(
+ StatisticValueSerdes.serialize(
+ StatisticValues.double_value(self._rand_float)
+ ),
+ self._rand_float,
+ )
+ self.assertEqual(
+ StatisticValueSerdes.serialize(
+ StatisticValues.string_value(self._rand_str)
+ ),
+ self._rand_str,
+ )
+ self.assertEqual(
+
StatisticValueSerdes.serialize(StatisticValues.boolean_value(True)), True
+ )
+
+ def test_serialize_object_type(self):
+ value = StatisticValues.object_value(
+ {
+ "boolean": StatisticValues.boolean_value(True),
+ "long": StatisticValues.long_value(self._rand_int),
+ "double": StatisticValues.double_value(self._rand_float),
+ "string": StatisticValues.string_value(self._rand_str),
+ "list": StatisticValues.list_value(
+ [
+ StatisticValues.long_value(self._rand_int),
+ StatisticValues.long_value(self._rand_int + 1),
+ StatisticValues.long_value(self._rand_int + 2),
+ ]
+ ),
+ "struct": StatisticValues.object_value(
+ {
+ "nested_boolean": StatisticValues.boolean_value(False),
+ "nested_string":
StatisticValues.string_value("nested"),
+ "nested_list": StatisticValues.list_value(
+ [
+ StatisticValues.string_value(self._rand_str),
+ StatisticValues.string_value(
+ self._rand_str + "_another"
+ ),
+ ]
+ ),
+ }
+ ),
+ }
+ )
+
+ serialized_data = StatisticValueSerdes.serialize(value)
+
+ expected_data = {
+ "boolean": True,
+ "long": self._rand_int,
+ "double": self._rand_float,
+ "string": self._rand_str,
+ "list": [self._rand_int, self._rand_int + 1, self._rand_int + 2],
+ "struct": {
+ "nested_boolean": False,
+ "nested_string": "nested",
+ "nested_list": [self._rand_str, self._rand_str + "_another"],
+ },
+ }
+
+ self.assertIsInstance(serialized_data, dict)
+ self.assertDictEqual(serialized_data, expected_data)
diff --git
a/clients/client-python/tests/unittests/api/stats/test_statistic_values.py
b/clients/client-python/tests/unittests/api/stats/test_statistic_values.py
new file mode 100644
index 0000000000..97ee66b589
--- /dev/null
+++ b/clients/client-python/tests/unittests/api/stats/test_statistic_values.py
@@ -0,0 +1,202 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import random
+import unittest
+
+from gravitino.api.rel.types.types import Types
+from gravitino.api.stats.statistic_value import StatisticValue
+from gravitino.api.stats.statistic_values import StatisticValues
+from gravitino.exceptions.base import IllegalArgumentException
+
+
+class TestStatisticValues(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls) -> None:
+ super().setUpClass()
+ cls._rand_int = random.randint(0, 500)
+ cls._rand_int_another = random.randint(cls._rand_int + 1, 1000)
+ cls._rand_float = random.uniform(0, 500)
+ cls._rand_float_another = random.uniform(cls._rand_float + 1, 1000)
+ cls._rand_str = f"str-{cls._rand_int}"
+ cls._rand_str_another = f"str-{cls._rand_int_another}"
+
+ def test_long_value(self):
+ value = StatisticValues.LongValue(self._rand_int)
+ twin_value = StatisticValues.long_value(self._rand_int)
+ another_value = StatisticValues.LongValue(self._rand_int_another)
+
+ self.assertEqual(value.value(), self._rand_int)
+ self.assertEqual(value.data_type().name(), Types.LongType.get().name())
+ self.assertEqual(hash(value), hash(self._rand_int))
+ self.assertEqual(value, twin_value)
+ self.assertNotEqual(value, another_value)
+ self.assertNotEqual(value,
StatisticValues.DoubleValue(self._rand_float))
+
+ def test_double_value(self):
+ value = StatisticValues.DoubleValue(float(self._rand_float))
+ twin_value = StatisticValues.double_value(float(self._rand_float))
+ another_value =
StatisticValues.DoubleValue(float(self._rand_float_another))
+
+ self.assertEqual(value.value(), float(self._rand_float))
+ self.assertEqual(value.data_type().name(),
Types.DoubleType.get().name())
+ self.assertEqual(hash(value), hash(float(self._rand_float)))
+ self.assertEqual(value, twin_value)
+ self.assertNotEqual(value, another_value)
+ self.assertNotEqual(value, StatisticValues.LongValue(self._rand_int))
+
+ def test_string_value(self):
+ value = StatisticValues.StringValue(self._rand_str)
+ twin_value = StatisticValues.string_value(self._rand_str)
+ another_value = StatisticValues.StringValue(self._rand_str_another)
+
+ self.assertEqual(value.value(), self._rand_str)
+ self.assertEqual(value.data_type().name(),
Types.StringType.get().name())
+ self.assertEqual(hash(value), hash(self._rand_str))
+ self.assertEqual(value, twin_value)
+ self.assertNotEqual(value, another_value)
+ self.assertNotEqual(value, StatisticValues.LongValue(self._rand_int))
+
+ def test_list_value(self):
+ value_list: list[StatisticValue[int]] = [
+ StatisticValues.LongValue(random.randint(0, 100)) for i in
range(10)
+ ]
+ another_value_list: list[StatisticValue[int]] = [
+ StatisticValues.LongValue(random.randint(0, 100)) for i in
range(10)
+ ]
+ value = StatisticValues.ListValue(value_list)
+ twin_value: StatisticValues.ListValue[int] =
StatisticValues.list_value(
+ value_list
+ )
+ another_value = StatisticValues.ListValue(another_value_list)
+
+ self.assertEqual(value.value(), value_list)
+ self.assertEqual(
+ value.data_type().name(),
+ Types.ListType.nullable(Types.LongType.get()).name(),
+ )
+ self.assertEqual(hash(value), hash(tuple(v.value() for v in
value_list)))
+ self.assertEqual(value, twin_value)
+ self.assertNotEqual(value, another_value)
+ self.assertNotEqual(value, StatisticValues.LongValue(self._rand_int))
+
+ def test_object_value(self):
+ value_dict: dict[str, StatisticValue[int]] = {
+ f"key_{i}": StatisticValues.LongValue(random.randint(0, 100))
+ for i in range(10)
+ }
+ another_value_dict: dict[str, StatisticValue[int]] = {
+ f"key_{i}": StatisticValues.LongValue(random.randint(0, 100))
+ for i in range(10)
+ }
+ value = StatisticValues.ObjectValue(value_dict)
+ twin_value: StatisticValues.ObjectValue[int] =
StatisticValues.object_value(
+ value_dict
+ )
+ another_value = StatisticValues.ObjectValue(another_value_dict)
+
+ expected_data_type = Types.StructType.of(
+ *[
+ Types.StructType.Field.nullable_field(key,
statistic_value.data_type())
+ for key, statistic_value in value_dict.items()
+ ]
+ )
+ self.assertEqual(value.value(), value_dict)
+ self.assertEqual(
+ value.data_type().simple_string(),
expected_data_type.simple_string()
+ )
+ self.assertEqual(hash(value), hash(twin_value))
+ self.assertEqual(value, twin_value)
+ self.assertNotEqual(value, another_value)
+ self.assertNotEqual(value, StatisticValues.LongValue(self._rand_int))
+
+ def test_boolean_value(self):
+ value = StatisticValues.BooleanValue(True)
+ twin_value = StatisticValues.boolean_value(True)
+ another_value = StatisticValues.BooleanValue(False)
+
+ self.assertEqual(value.value(), True)
+ self.assertEqual(value.data_type().name(),
Types.BooleanType.get().name())
+ self.assertEqual(hash(value), hash(True))
+ self.assertEqual(value, twin_value)
+ self.assertNotEqual(value, another_value)
+ self.assertNotEqual(value, StatisticValues.LongValue(False))
+
+ def test_object_value_empty_dict(self):
+ with self.assertRaisesRegex(
+ IllegalArgumentException, "Values cannot be null or empty"
+ ):
+ StatisticValues.ObjectValue({})
+
+ def test_list_value_empty_list(self):
+ with self.assertRaisesRegex(
+ IllegalArgumentException, "Values cannot be null or empty"
+ ):
+ StatisticValues.ListValue([])
+
+ def test_list_value_mismatched_types(self):
+ with self.assertRaisesRegex(
+ IllegalArgumentException,
+ "All values in the list must have the same data type",
+ ):
+ StatisticValues.ListValue(
+ [
+ StatisticValues.LongValue(self._rand_int),
+ StatisticValues.DoubleValue(self._rand_float),
+ ]
+ )
+
+ def test_list_value_nested_hash(self):
+ inner_list1 = StatisticValues.list_value(
+ [StatisticValues.long_value(1), StatisticValues.long_value(2)]
+ )
+ inner_list2 = StatisticValues.list_value(
+ [StatisticValues.long_value(3), StatisticValues.long_value(4)]
+ )
+ nested_list = StatisticValues.list_value([inner_list1, inner_list2])
+ twin_nested_list = StatisticValues.list_value([inner_list1,
inner_list2])
+
+ self.assertEqual(len(nested_list.value()), 2)
+ self.assertEqual(nested_list, twin_nested_list)
+ self.assertEqual(hash(nested_list), hash(twin_nested_list))
+
+ def test_object_value_nested_hash(self):
+ inner_list = StatisticValues.list_value(
+ [StatisticValues.long_value(10), StatisticValues.long_value(20)]
+ )
+ inner_obj = StatisticValues.object_value(
+ {"x": StatisticValues.long_value(100), "y":
StatisticValues.long_value(200)}
+ )
+ nested_obj = StatisticValues.object_value(
+ {
+ "simple": StatisticValues.long_value(42),
+ "list": inner_list,
+ "object": inner_obj,
+ }
+ )
+ twin_nested_obj = StatisticValues.object_value(
+ {
+ "simple": StatisticValues.long_value(42),
+ "list": inner_list,
+ "object": inner_obj,
+ }
+ )
+
+ self.assertEqual(len(nested_obj.value()), 3)
+ self.assertEqual(nested_obj, twin_nested_obj)
+ self.assertEqual(hash(nested_obj), hash(twin_nested_obj))