kevinjqliu commented on code in PR #1629:
URL: https://github.com/apache/iceberg-python/pull/1629#discussion_r1957158183
##########
pyiceberg/catalog/sql.py:
##########
@@ -610,15 +610,26 @@ def list_namespaces(self, namespace: Union[str,
Identifier] = ()) -> List[Identi
table_stmt =
select(IcebergTables.table_namespace).where(IcebergTables.catalog_name ==
self.name)
namespace_stmt =
select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name
== self.name)
if namespace:
- namespace_str = Catalog.namespace_to_string(namespace,
NoSuchNamespaceError)
- table_stmt =
table_stmt.where(IcebergTables.table_namespace.like(namespace_str))
- namespace_stmt =
namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_str))
+ namespace_like = Catalog.namespace_to_string(namespace,
NoSuchNamespaceError) + "%"
Review Comment:
oops, youre right. i should have read the docs
##########
tests/catalog/test_sql.py:
##########
@@ -1117,17 +1117,30 @@ def
test_create_namespace_with_empty_identifier(catalog: SqlCatalog, empty_names
lazy_fixture("catalog_sqlite"),
],
)
[email protected]("namespace_list", [lazy_fixture("database_list"),
lazy_fixture("hierarchical_namespace_list")])
-def test_list_namespaces(catalog: SqlCatalog, namespace_list: List[str]) ->
None:
+def test_list_namespaces(catalog: SqlCatalog) -> None:
+ namespace_list = ["db", "db.ns1", "db.ns1.ns2", "db.ns2", "db2",
"db2.ns1", "db%"]
for namespace in namespace_list:
catalog.create_namespace(namespace)
- # Test global list
+
ns_list = catalog.list_namespaces()
- for namespace in namespace_list:
- assert Catalog.identifier_to_tuple(namespace) in ns_list
- # Test individual namespace list
- assert len(one_namespace := catalog.list_namespaces(namespace)) == 1
- assert Catalog.identifier_to_tuple(namespace) == one_namespace[0]
+ expected_list: list[tuple[str, ...]] = [("db",), ("db2",), ("db%",)]
+ for ns in expected_list:
+ assert ns in ns_list
Review Comment:
this worked for me
```
assert sorted(catalog.list_namespaces()) == [("db",), ("db%",), ("db2",)]
assert sorted(catalog.list_namespaces("db")) == [("db", "ns1"), ("db",
"ns2")]
assert catalog.list_namespaces("db.ns1") == [("db", "ns1", "ns2")]
assert catalog.list_namespaces("db.ns1.ns2") == []
```
##########
pyiceberg/catalog/sql.py:
##########
@@ -610,15 +610,26 @@ def list_namespaces(self, namespace: Union[str,
Identifier] = ()) -> List[Identi
table_stmt =
select(IcebergTables.table_namespace).where(IcebergTables.catalog_name ==
self.name)
namespace_stmt =
select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name
== self.name)
if namespace:
- namespace_str = Catalog.namespace_to_string(namespace,
NoSuchNamespaceError)
- table_stmt =
table_stmt.where(IcebergTables.table_namespace.like(namespace_str))
- namespace_stmt =
namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_str))
+ namespace_like = Catalog.namespace_to_string(namespace,
NoSuchNamespaceError) + "%"
+ table_stmt =
table_stmt.where(IcebergTables.table_namespace.like(namespace_like))
+ namespace_stmt =
namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_like))
stmt = union(
table_stmt,
namespace_stmt,
)
with Session(self.engine) as session:
- return [Catalog.identifier_to_tuple(namespace_col) for
namespace_col in session.execute(stmt).scalars()]
+ namespaces = [Catalog.identifier_to_tuple(namespace_col) for
namespace_col in session.execute(stmt).scalars()]
Review Comment:
oops sorry for the ambiguous statement. i meant to filter the results
inline.
Something like this, filters the level and fuzzy match together
```
with Session(self.engine) as session:
namespace_tuple = Catalog.identifier_to_tuple(namespace)
sub_namespaces_level_length = len(namespace_tuple) + 1 if
namespace else 1
namespaces = [
ns[:sub_namespaces_level_length] # truncate to the required
level
for ns in {Catalog.identifier_to_tuple(ns) for ns in
session.execute(stmt).scalars()}
if len(ns) >= sub_namespaces_level_length # only get sub
namespaces/children
and ns[: len(namespace_tuple)] == namespace_tuple # exclude
fuzzy matches when `namespace` contains `%` or `_`
]
return namespaces
```
##########
tests/catalog/test_base.py:
##########
@@ -312,7 +312,7 @@ def test_rename_table(catalog: InMemoryCatalog) -> None:
assert table._identifier == Catalog.identifier_to_tuple(new_table)
# And
- assert ("new", "namespace") in catalog.list_namespaces()
+ assert ("new",) in catalog.list_namespaces()
Review Comment:
although looking at the context of the test, i think its testing for
existence of `("new", "namespace")`
so maybe something like this
```suggestion
assert ("new", "namespace") in catalog.list_namespaces("new")
```
##########
tests/catalog/test_sql.py:
##########
@@ -1158,13 +1171,13 @@ def test_list_non_existing_namespaces(catalog:
SqlCatalog) -> None:
def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema,
table_identifier: Identifier) -> None:
namespace = Catalog.namespace_from(table_identifier)
catalog.create_namespace(namespace)
- assert namespace in catalog.list_namespaces()
+ assert namespace[:1] in catalog.list_namespaces()
Review Comment:
i see, thanks for the explanation! i think the assert here is testing that
the newly created namespace exists.
so perhaps something like this matches its behavior more
```
assert namespace in catalog.list_namespaces(namespace[:-1])
```
##########
tests/catalog/test_base.py:
##########
@@ -312,7 +312,7 @@ def test_rename_table(catalog: InMemoryCatalog) -> None:
assert table._identifier == Catalog.identifier_to_tuple(new_table)
# And
- assert ("new", "namespace") in catalog.list_namespaces()
+ assert ("new",) in catalog.list_namespaces()
Review Comment:
👍 this is the right behavior, only return the top level namespace
https://github.com/apache/iceberg/blob/41b458b7022c7b0cd78eeca9102392db7889d3c9/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java#L770-L772
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]