Re: [PR] Add array data type support for Python [fluss-rust]

via GitHub Fri, 03 Apr 2026 06:52:55 -0700


qzyu999 commented on code in PR #474:
URL: https://github.com/apache/fluss-rust/pull/474#discussion_r3032944038



##########
bindings/python/test/test_log_table.py:
##########
@@ -755,3 +756,98 @@ def _poll_arrow_ids(scanner, expected_count, timeout_s=10):
         if arrow_table.num_rows > 0:
             all_ids.extend(arrow_table.column("id").to_pylist())
     return all_ids
+
+
+async def test_append_and_scan_with_array(connection, admin):
+    """Test appending and scanning with array columns."""
+    table_path = fluss.TablePath("fluss", "py_test_append_and_scan_with_array")
+    await admin.drop_table(table_path, ignore_if_not_exists=True)
+
+    pa_schema = pa.schema(
+        [
+            pa.field("id", pa.int32()),
+            pa.field("tags", pa.list_(pa.string())),
+            pa.field("scores", pa.list_(pa.int32())),
+        ]
+    )
+    schema = fluss.Schema(pa_schema)
+    table_descriptor = fluss.TableDescriptor(schema)
+    await admin.create_table(table_path, table_descriptor, 
ignore_if_exists=False)
+
+    table = await connection.get_table(table_path)
+    append_writer = table.new_append().create_writer()
+
+    # Batch 1: Testing both standard and large lists
+    batch1 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 2], type=pa.int32()),
+            pa.array([["a", "b"], ["c"]], type=pa.list_(pa.string())),
+            pa.array([[10, 20], [30]], type=pa.list_(pa.int32())),
+        ],
+        schema=pa_schema,
+    )
+    append_writer.write_arrow_batch(batch1)
+    await append_writer.flush()
+
+    # Verify via LogScanner (record-by-record)
+    scanner = await table.new_scan().create_log_scanner()
+    scanner.subscribe_buckets({0: fluss.EARLIEST_OFFSET})
+    records = _poll_records(scanner, expected_count=2)
+
+    assert len(records) == 2
+    records.sort(key=lambda r: r.row["id"])
+
+    assert records[0].row["tags"] == ["a", "b"]
+    assert records[0].row["scores"] == [10, 20]
+    assert records[1].row["tags"] == ["c"]
+    assert records[1].row["scores"] == [30]
+
+    # Verify via to_arrow (batch-based)
+    scanner2 = await table.new_scan().create_record_batch_log_scanner()
+    scanner2.subscribe_buckets({0: fluss.EARLIEST_OFFSET})
+    result_table = scanner2.to_arrow()
+
+    assert result_table.num_rows == 2
+    assert result_table.column("tags").to_pylist() == [["a", "b"], ["c"]]
+    assert result_table.column("scores").to_pylist() == [[10, 20], [30]]
+
[email protected](reason="FixedSizeList support requires server-side updates 
(≥0.9.1). "

Review Comment:
   Hi @fresh-borzoni, thanks for asking, I removed the `pytest.mark.skip()` in 
9071cf82df02ad647a6075ede40ace2c22c74ca4. The reason it's there is because I 
was testing it locally and was having some issues and wasn't sure of what to 
do. My understanding was that the test would fail in GitHub's CI due to 
something about the server-side not being able to test `FixedSizeList` as you 
mentioned. I thought I would just include the test since I had confirmed 
locally that it worked.
   
   Edit: Would you prefer actually that we skip this test?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Add array data type support for Python [fluss-rust]

Reply via email to