This is an automated email from the ASF dual-hosted git repository.
jerryshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new c3f9721773 [#8609] feat(client-python): add supports statistics
(#8692)
c3f9721773 is described below
commit c3f97217734010c3386d82f124833121b4e55548
Author: George T. C. Lai <[email protected]>
AuthorDate: Mon Apr 27 16:50:03 2026 +0800
[#8609] feat(client-python): add supports statistics (#8692)
### What changes were proposed in this pull request?
This PR is aimed at implementing the following Java classes.
- Statistic
- StatisticValue
- SupportsStatistics
A refactor of moving package `expressions` into `gravitino.api.rel` has
been done as well in this PR.
### Why are the changes needed?
We need to support relational table operations which depends on
`SupportsStatistics`.
#8609
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests
---------
Signed-off-by: George T. C. Lai <[email protected]>
Co-authored-by: Jerry Shao <[email protected]>
Co-authored-by: Yuhui <[email protected]>
Co-authored-by: Mark Hoerth <[email protected]>
Co-authored-by: Mark Hoerth <[email protected]>
Co-authored-by: Jerry Shao <[email protected]>
Co-authored-by: Claude Sonnet 4.6 <[email protected]>
Co-authored-by: mchades <[email protected]>
Co-authored-by: Copilot <[email protected]>
Co-authored-by: Chisom Uma <[email protected]>
Co-authored-by: Sun Yuhan <[email protected]>
Co-authored-by: Sun Yuhan <[email protected]>
---
.../client-python/gravitino/api/stats/__init__.py | 16 +++
.../client-python/gravitino/api/stats/statistic.py | 74 +++++++++++++
.../gravitino/api/stats/statistic_value.py | 48 +++++++++
.../gravitino/api/stats/supports_statistics.py | 74 +++++++++++++
clients/client-python/gravitino/exceptions/base.py | 8 ++
.../tests/unittests/api/stats/__init__.py | 16 +++
.../tests/unittests/api/stats/test_statistic.py | 87 ++++++++++++++++
.../unittests/api/stats/test_statistic_value.py | 47 +++++++++
.../api/stats/test_supports_statistics.py | 114 +++++++++++++++++++++
9 files changed, 484 insertions(+)
diff --git a/clients/client-python/gravitino/api/stats/__init__.py
b/clients/client-python/gravitino/api/stats/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/clients/client-python/gravitino/api/stats/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/clients/client-python/gravitino/api/stats/statistic.py
b/clients/client-python/gravitino/api/stats/statistic.py
new file mode 100644
index 0000000000..1d6db3ef49
--- /dev/null
+++ b/clients/client-python/gravitino/api/stats/statistic.py
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from abc import ABC, abstractmethod
+from typing import Any, Final
+
+from gravitino.api.auditable import Auditable
+from gravitino.api.stats.statistic_value import StatisticValue
+
+
+class Statistic(Auditable, ABC):
+ """Statistic interface represents a statistic that can be associated with
a metadata object.
+
+ It can be used to store various types of statistics, for example, table
statistics, partition
+ statistics, fileset statistics, etc.
+ """
+
+ CUSTOM_PREFIX: Final[str] = "custom-"
+ """The prefix for custom statistics. Custom statistics are user-defined
statistics."""
+
+ @abstractmethod
+ def name(self) -> str:
+ """Get the name of the statistic.
+
+ Returns:
+ str: the name of the statistic
+ """
+
+ @abstractmethod
+ def value(self) -> StatisticValue[Any] | None:
+ """Get the value of the statistic.
+
+ The value is optional. If the statistic is not set, this method
returns `None`.
+
+ Returns:
+ StatisticValue[Any] | None:
+ The statistic value, or `None` if it is unset.
+ """
+
+ @abstractmethod
+ def reserved(self) -> bool:
+ """The statistic is predefined by Gravitino if the value is true.
+
+ The statistic is defined by users if the value is false. For example,
the statistic
+ "row_count" is a reserved statistic. A custom statistic name must
start with "custom-"
+ prefix to avoid name conflict with reserved statistics, because
Gravitino may add more
+ reserved statistics in the future.
+
+ Returns:
+ bool: The type of the statistic. `True` if the statistic is
reserved, `False` otherwise
+ """
+
+ @abstractmethod
+ def modifiable(self) -> bool:
+ """Whether the statistic is modifiable.
+
+ Returns:
+ bool: If the statistic is modifiable, return `True`, otherwise
`False`.
+ """
diff --git a/clients/client-python/gravitino/api/stats/statistic_value.py
b/clients/client-python/gravitino/api/stats/statistic_value.py
new file mode 100644
index 0000000000..2130f93009
--- /dev/null
+++ b/clients/client-python/gravitino/api/stats/statistic_value.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar
+
+from gravitino.api.rel.types.type import Type
+
+_StatisticValueT = TypeVar("_StatisticValueT")
+
+
+class StatisticValue(Generic[_StatisticValueT], ABC):
+ """An interface representing a statistic value.
+
+ Type Parameters:
+ The type of the statistic value.
+ """
+
+ @abstractmethod
+ def value(self) -> _StatisticValueT:
+ """Returns the value of the statistic.
+
+ Returns:
+ The value of the statistic
+ """
+
+ @abstractmethod
+ def data_type(self) -> Type:
+ """Returns the data type of the statistic value.
+
+ Returns:
+ Type: the data type of the statistic value
+ """
diff --git a/clients/client-python/gravitino/api/stats/supports_statistics.py
b/clients/client-python/gravitino/api/stats/supports_statistics.py
new file mode 100644
index 0000000000..a0fcc28a02
--- /dev/null
+++ b/clients/client-python/gravitino/api/stats/supports_statistics.py
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from gravitino.api.stats.statistic import Statistic
+from gravitino.api.stats.statistic_value import StatisticValue
+
+
+class SupportsStatistics(ABC):
+ """SupportsStatistics provides methods to list and update statistics.
+
+ A table, a partition or a fileset can implement this interface to manage
its statistics.
+ """
+
+ @abstractmethod
+ def list_statistics(self) -> list[Statistic]:
+ """Lists all statistics.
+
+ Returns:
+ A list of statistics
+ """
+
+ @abstractmethod
+ def update_statistics(self, statistics: dict[str, StatisticValue[Any]]) ->
None:
+ """Updates statistics with the provided values.
+
+ If the statistic exists, it will be updated with the new value. If the
statistic does
+ not exist, it will be created. If the statistic is unmodifiable, it
will throw an
+ `UnmodifiableStatisticException`. If the statistic name is illegal, it
will throw an
+ `IllegalStatisticNameException`.
+
+ Args:
+ statistics: a map of statistic names to their values
+
+ Raises:
+ IllegalStatisticNameException: If the statistic name is illegal
+ UnmodifiableStatisticException: If the statistic is unmodifiable
+ """
+
+ @abstractmethod
+ def drop_statistics(self, statistics: list[str]) -> bool:
+ """Drop statistics by their names.
+
+ If the statistic is unmodifiable, it will throw an
`UnmodifiableStatisticException`.
+
+ Args:
+ statistics: a list of statistic names to be dropped
+
+ Returns:
+ bool:
+ `True` if the statistics were successfully dropped,
+ `False` if no statistics were dropped
+
+ Raises:
+ UnmodifiableStatisticException:
+ if any of the statistics to be dropped are unmodifiable
+ """
diff --git a/clients/client-python/gravitino/exceptions/base.py
b/clients/client-python/gravitino/exceptions/base.py
index 499a5d19a6..ced91b9fc9 100644
--- a/clients/client-python/gravitino/exceptions/base.py
+++ b/clients/client-python/gravitino/exceptions/base.py
@@ -205,6 +205,14 @@ class ForbiddenException(GravitinoRuntimeException):
"""An exception thrown when a user is forbidden to perform an action."""
+class IllegalStatisticNameException(IllegalArgumentException):
+ """An exception thrown when a statistic has an illegal name."""
+
+
+class UnmodifiableStatisticException(UnsupportedOperationException):
+ """An exception thrown when attempting to modify an unmodifiable
statistic."""
+
+
class NoSuchTableException(NotFoundException):
"""An exception thrown when a table with specified name is not existed."""
diff --git a/clients/client-python/tests/unittests/api/stats/__init__.py
b/clients/client-python/tests/unittests/api/stats/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/clients/client-python/tests/unittests/api/stats/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/clients/client-python/tests/unittests/api/stats/test_statistic.py
b/clients/client-python/tests/unittests/api/stats/test_statistic.py
new file mode 100644
index 0000000000..f9a277dca2
--- /dev/null
+++ b/clients/client-python/tests/unittests/api/stats/test_statistic.py
@@ -0,0 +1,87 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import unittest
+from datetime import datetime
+
+from gravitino.api.audit import Audit
+from gravitino.api.rel.types.types import Types
+from gravitino.api.stats.statistic import Statistic
+from gravitino.api.stats.statistic_value import StatisticValue
+
+
+class TestStatistic(unittest.TestCase):
+ def test_importability(self):
+ """Test that Statistic can be imported."""
+ self.assertIsNotNone(Statistic)
+
+ def test_custom_prefix_constant(self):
+ """Test that CUSTOM_PREFIX constant is defined correctly."""
+ self.assertEqual("custom-", Statistic.CUSTOM_PREFIX)
+
+ def test_is_abstract(self):
+ """Test that Statistic is an abstract base class."""
+ with self.assertRaises(TypeError):
+ Statistic() # pylint: disable=abstract-class-instantiated
+
+ def test_concrete_implementation(self):
+ """Test that a concrete implementation can be created."""
+
+ class ConcreteStatisticValue(StatisticValue[int]):
+ def value(self) -> int:
+ return 100
+
+ def data_type(self) -> Types.IntegerType:
+ return Types.IntegerType.get()
+
+ class ConcreteAudit(Audit):
+ def creator(self) -> str:
+ return "test"
+
+ def create_time(self) -> datetime:
+ return datetime(2024, 1, 1)
+
+ def last_modifier(self) -> str:
+ return "test"
+
+ def last_modified_time(self) -> datetime:
+ return datetime(2024, 1, 1)
+
+ class ConcreteStatistic(Statistic):
+ def name(self) -> str:
+ return "row_count"
+
+ def value(self) -> StatisticValue[int] | None:
+ return ConcreteStatisticValue()
+
+ def reserved(self) -> bool:
+ return True
+
+ def modifiable(self) -> bool:
+ return False
+
+ def audit_info(self) -> Audit:
+ return ConcreteAudit()
+
+ instance = ConcreteStatistic()
+ self.assertEqual("row_count", instance.name())
+ self.assertIsNotNone(instance.value())
+ self.assertEqual(100, instance.value().value())
+ self.assertTrue(instance.reserved())
+ self.assertFalse(instance.modifiable())
+ self.assertIsNotNone(instance.audit_info())
diff --git
a/clients/client-python/tests/unittests/api/stats/test_statistic_value.py
b/clients/client-python/tests/unittests/api/stats/test_statistic_value.py
new file mode 100644
index 0000000000..41db2763e2
--- /dev/null
+++ b/clients/client-python/tests/unittests/api/stats/test_statistic_value.py
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import unittest
+
+from gravitino.api.rel.types.types import Types
+from gravitino.api.stats.statistic_value import StatisticValue
+
+
+class TestStatisticValue(unittest.TestCase):
+ def test_importability(self):
+ """Test that StatisticValue can be imported."""
+ self.assertIsNotNone(StatisticValue)
+
+ def test_is_abstract(self):
+ """Test that StatisticValue is an abstract base class."""
+ with self.assertRaises(TypeError):
+ StatisticValue() # pylint: disable=abstract-class-instantiated
+
+ def test_concrete_implementation(self):
+ """Test that a concrete implementation can be created."""
+
+ class ConcreteStatisticValue(StatisticValue[int]):
+ def value(self) -> int:
+ return 42
+
+ def data_type(self) -> Types.IntegerType:
+ return Types.IntegerType.get()
+
+ instance = ConcreteStatisticValue()
+ self.assertEqual(42, instance.value())
+ self.assertEqual(Types.IntegerType.get(), instance.data_type())
diff --git
a/clients/client-python/tests/unittests/api/stats/test_supports_statistics.py
b/clients/client-python/tests/unittests/api/stats/test_supports_statistics.py
new file mode 100644
index 0000000000..1dd731088b
--- /dev/null
+++
b/clients/client-python/tests/unittests/api/stats/test_supports_statistics.py
@@ -0,0 +1,114 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import unittest
+from datetime import datetime
+from typing import Any
+
+from gravitino.api.audit import Audit
+from gravitino.api.rel.types.types import Types
+from gravitino.api.stats.statistic import Statistic
+from gravitino.api.stats.statistic_value import StatisticValue
+from gravitino.api.stats.supports_statistics import SupportsStatistics
+
+
+class TestSupportsStatistics(unittest.TestCase):
+ def test_importability(self):
+ """Test that SupportsStatistics can be imported."""
+ self.assertIsNotNone(SupportsStatistics)
+
+ def test_is_abstract(self):
+ """Test that SupportsStatistics is an abstract base class."""
+ with self.assertRaises(TypeError):
+ SupportsStatistics() # pylint: disable=abstract-class-instantiated
+
+ def test_concrete_implementation(self):
+ """Test that a concrete implementation can be created."""
+
+ class ConcreteStatisticValue(StatisticValue[int]):
+ def value(self) -> int:
+ return 50
+
+ def data_type(self) -> Types.IntegerType:
+ return Types.IntegerType.get()
+
+ class ConcreteAudit(Audit):
+ def creator(self) -> str:
+ return "test"
+
+ def create_time(self) -> datetime:
+ return datetime(2024, 1, 1)
+
+ def last_modifier(self) -> str:
+ return "test"
+
+ def last_modified_time(self) -> datetime:
+ return datetime(2024, 1, 1)
+
+ class ConcreteStatistic(Statistic):
+ def __init__(self, stat_name: str, stat_value: int):
+ self._name = stat_name
+ self._value = stat_value
+
+ def name(self) -> str:
+ return self._name
+
+ def value(self) -> StatisticValue[int] | None:
+ val = ConcreteStatisticValue()
+ val._val = self._value # pylint: disable=protected-access
+ return val
+
+ def reserved(self) -> bool:
+ return False
+
+ def modifiable(self) -> bool:
+ return True
+
+ def audit_info(self) -> Audit:
+ return ConcreteAudit()
+
+ class ConcreteSupportsStatistics(SupportsStatistics):
+ def __init__(self):
+ self._stats = {}
+
+ def list_statistics(self) -> list[Statistic]:
+ return [ConcreteStatistic(k, v.value()) for k, v in
self._stats.items()]
+
+ def update_statistics(
+ self, statistics: dict[str, StatisticValue[Any]]
+ ) -> None:
+ self._stats.update(statistics)
+
+ def drop_statistics(self, statistics: list[str]) -> bool:
+ dropped = False
+ for stat in statistics:
+ if stat in self._stats:
+ del self._stats[stat]
+ dropped = True
+ return dropped
+
+ instance = ConcreteSupportsStatistics()
+ self.assertEqual([], instance.list_statistics())
+
+ stat_val = ConcreteStatisticValue()
+ instance.update_statistics({"test_stat": stat_val})
+ self.assertEqual(1, len(instance.list_statistics()))
+
+ result = instance.drop_statistics(["test_stat"])
+ self.assertTrue(result)
+ self.assertEqual([], instance.list_statistics())