Re: [PR] Fix: `SqlCatalog` list_namespaces() should return only sub-namespaces [iceberg-python]

via GitHub Sun, 16 Feb 2025 12:22:52 -0800


kevinjqliu commented on code in PR #1629:
URL: https://github.com/apache/iceberg-python/pull/1629#discussion_r1957158183



##########
pyiceberg/catalog/sql.py:
##########
@@ -610,15 +610,26 @@ def list_namespaces(self, namespace: Union[str, 
Identifier] = ()) -> List[Identi
         table_stmt = 
select(IcebergTables.table_namespace).where(IcebergTables.catalog_name == 
self.name)
         namespace_stmt = 
select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name
 == self.name)
         if namespace:
-            namespace_str = Catalog.namespace_to_string(namespace, 
NoSuchNamespaceError)
-            table_stmt = 
table_stmt.where(IcebergTables.table_namespace.like(namespace_str))
-            namespace_stmt = 
namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_str))
+            namespace_like = Catalog.namespace_to_string(namespace, 
NoSuchNamespaceError) + "%"

Review Comment:
   oops, youre right. i should have read the docs 



##########
tests/catalog/test_sql.py:
##########
@@ -1117,17 +1117,30 @@ def 
test_create_namespace_with_empty_identifier(catalog: SqlCatalog, empty_names
         lazy_fixture("catalog_sqlite"),
     ],
 )
-@pytest.mark.parametrize("namespace_list", [lazy_fixture("database_list"), 
lazy_fixture("hierarchical_namespace_list")])
-def test_list_namespaces(catalog: SqlCatalog, namespace_list: List[str]) -> 
None:
+def test_list_namespaces(catalog: SqlCatalog) -> None:
+    namespace_list = ["db", "db.ns1", "db.ns1.ns2", "db.ns2", "db2", 
"db2.ns1", "db%"]
     for namespace in namespace_list:
         catalog.create_namespace(namespace)
-    # Test global list
+
     ns_list = catalog.list_namespaces()
-    for namespace in namespace_list:
-        assert Catalog.identifier_to_tuple(namespace) in ns_list
-        # Test individual namespace list
-        assert len(one_namespace := catalog.list_namespaces(namespace)) == 1
-        assert Catalog.identifier_to_tuple(namespace) == one_namespace[0]
+    expected_list: list[tuple[str, ...]] = [("db",), ("db2",), ("db%",)]
+    for ns in expected_list:
+        assert ns in ns_list

Review Comment:
   this worked for me
   ```
       assert sorted(catalog.list_namespaces()) == [("db",), ("db%",), ("db2",)]
       assert sorted(catalog.list_namespaces("db")) == [("db", "ns1"), ("db", 
"ns2")]
       assert catalog.list_namespaces("db.ns1") == [("db", "ns1", "ns2")]
       assert catalog.list_namespaces("db.ns1.ns2") == []
   ```



##########
pyiceberg/catalog/sql.py:
##########
@@ -610,15 +610,26 @@ def list_namespaces(self, namespace: Union[str, 
Identifier] = ()) -> List[Identi
         table_stmt = 
select(IcebergTables.table_namespace).where(IcebergTables.catalog_name == 
self.name)
         namespace_stmt = 
select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name
 == self.name)
         if namespace:
-            namespace_str = Catalog.namespace_to_string(namespace, 
NoSuchNamespaceError)
-            table_stmt = 
table_stmt.where(IcebergTables.table_namespace.like(namespace_str))
-            namespace_stmt = 
namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_str))
+            namespace_like = Catalog.namespace_to_string(namespace, 
NoSuchNamespaceError) + "%"
+            table_stmt = 
table_stmt.where(IcebergTables.table_namespace.like(namespace_like))
+            namespace_stmt = 
namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_like))
         stmt = union(
             table_stmt,
             namespace_stmt,
         )
         with Session(self.engine) as session:
-            return [Catalog.identifier_to_tuple(namespace_col) for 
namespace_col in session.execute(stmt).scalars()]
+            namespaces = [Catalog.identifier_to_tuple(namespace_col) for 
namespace_col in session.execute(stmt).scalars()]

Review Comment:
   oops sorry for the ambiguous statement. i meant to filter the results 
inline. 
   
   Something like this, filters the level and fuzzy match together
   
   ```
           with Session(self.engine) as session:
               namespace_tuple = Catalog.identifier_to_tuple(namespace)
               sub_namespaces_level_length = len(namespace_tuple) + 1 if 
namespace else 1
               namespaces = [
                   ns[:sub_namespaces_level_length]  # truncate to the required 
level
                   for ns in {Catalog.identifier_to_tuple(ns) for ns in 
session.execute(stmt).scalars()}
                   if len(ns) >= sub_namespaces_level_length  # only get sub 
namespaces/children
                   and ns[: len(namespace_tuple)] == namespace_tuple  # exclude 
fuzzy matches when `namespace` contains `%` or `_`
               ]
   
               return namespaces
   ```



##########
tests/catalog/test_base.py:
##########
@@ -312,7 +312,7 @@ def test_rename_table(catalog: InMemoryCatalog) -> None:
     assert table._identifier == Catalog.identifier_to_tuple(new_table)
 
     # And
-    assert ("new", "namespace") in catalog.list_namespaces()
+    assert ("new",) in catalog.list_namespaces()

Review Comment:
   although looking at the context of the test, i think its testing for 
existence of `("new", "namespace")`
   
   so maybe something like this
   ```suggestion
       assert ("new", "namespace") in catalog.list_namespaces("new")
   ```



##########
tests/catalog/test_sql.py:
##########
@@ -1158,13 +1171,13 @@ def test_list_non_existing_namespaces(catalog: 
SqlCatalog) -> None:
 def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, 
table_identifier: Identifier) -> None:
     namespace = Catalog.namespace_from(table_identifier)
     catalog.create_namespace(namespace)
-    assert namespace in catalog.list_namespaces()
+    assert namespace[:1] in catalog.list_namespaces()

Review Comment:
   i see, thanks for the explanation! i think the assert here is testing that 
the newly created namespace exists. 
   so perhaps something like this matches its behavior more
   
   ```
   assert namespace in catalog.list_namespaces(namespace[:-1])
   ```



##########
tests/catalog/test_base.py:
##########
@@ -312,7 +312,7 @@ def test_rename_table(catalog: InMemoryCatalog) -> None:
     assert table._identifier == Catalog.identifier_to_tuple(new_table)
 
     # And
-    assert ("new", "namespace") in catalog.list_namespaces()
+    assert ("new",) in catalog.list_namespaces()

Review Comment:
   👍 this is the right behavior, only return the top level namespace
   
https://github.com/apache/iceberg/blob/41b458b7022c7b0cd78eeca9102392db7889d3c9/core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java#L770-L772



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Re: [PR] Fix: `SqlCatalog` list_namespaces() should return only sub-namespaces [iceberg-python]

Reply via email to