Re: [PR] Add array data type support for Python [fluss-rust]

via GitHub Sun, 05 Apr 2026 18:17:37 -0700


qzyu999 commented on code in PR #474:
URL: https://github.com/apache/fluss-rust/pull/474#discussion_r3037603801



##########
bindings/python/test/test_log_table.py:
##########
@@ -755,3 +754,244 @@ def _poll_arrow_ids(scanner, expected_count, 
timeout_s=10):
         if arrow_table.num_rows > 0:
             all_ids.extend(arrow_table.column("id").to_pylist())
     return all_ids
+
+
+async def test_append_and_scan_with_array(connection, admin):
+    """Test appending and scanning with array columns."""
+    table_path = fluss.TablePath("fluss", "py_test_append_and_scan_with_array")
+    await admin.drop_table(table_path, ignore_if_not_exists=True)
+
+    pa_schema = pa.schema(
+        [
+            pa.field("id", pa.int32()),
+            pa.field("tags", pa.list_(pa.string())),
+            pa.field("scores", pa.list_(pa.int32())),
+        ]
+    )
+    schema = fluss.Schema(pa_schema)
+    table_descriptor = fluss.TableDescriptor(schema)
+    await admin.create_table(table_path, table_descriptor, 
ignore_if_exists=False)
+
+    table = await connection.get_table(table_path)
+    append_writer = table.new_append().create_writer()
+
+    # Batch 1: Testing both standard and large lists
+    batch1 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 2], type=pa.int32()),
+            pa.array([["a", "b"], ["c"]], type=pa.list_(pa.string())),
+            pa.array([[10, 20], [30]], type=pa.list_(pa.int32())),
+        ],
+        schema=pa_schema,
+    )
+    append_writer.write_arrow_batch(batch1)
+
+    # Batch 2: Testing null values inside arrays and null arrays
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([3, 4, 5, 6], type=pa.int32()),
+            pa.array([["d", None], None, [], [None]], 
type=pa.list_(pa.string())),
+            pa.array([[40, 50], [60], None, []], type=pa.list_(pa.int32())),
+        ],
+        schema=pa_schema,
+    )
+    append_writer.write_arrow_batch(batch2)
+    await append_writer.flush()
+
+    # Verify via LogScanner (record-by-record)
+    scanner = await table.new_scan().create_log_scanner()
+    scanner.subscribe_buckets({0: fluss.EARLIEST_OFFSET})
+    records = _poll_records(scanner, expected_count=6)
+
+    assert len(records) == 6
+    records.sort(key=lambda r: r.row["id"])
+
+    # Verify Batch 1
+    assert records[0].row["tags"] == ["a", "b"]
+    assert records[0].row["scores"] == [10, 20]
+    assert records[1].row["tags"] == ["c"]
+    assert records[1].row["scores"] == [30]
+
+    # Verify Batch 2
+    assert records[2].row["tags"] == ["d", None]
+    assert records[2].row["scores"] == [40, 50]
+    assert records[3].row["tags"] is None
+    assert records[3].row["scores"] == [60]
+    assert records[4].row["tags"] == []
+    assert records[4].row["scores"] is None
+    assert records[5].row["tags"] == [None]
+    assert records[5].row["scores"] == []
+
+    # Verify via to_arrow (batch-based)
+    scanner2 = await table.new_scan().create_record_batch_log_scanner()
+    scanner2.subscribe_buckets({0: fluss.EARLIEST_OFFSET})
+    result_table = scanner2.to_arrow()
+
+    assert result_table.num_rows == 6
+    assert result_table.column("tags").to_pylist() == [
+        ["a", "b"],
+        ["c"],
+        ["d", None],
+        None,
+        [],
+        [None],
+    ]
+    assert result_table.column("scores").to_pylist() == [
+        [10, 20],
+        [30],
+        [40, 50],
+        [60],
+        None,
+        [],
+    ]
+
+
[email protected](reason="Server currently only accepts ListVector. 
FixedSizeList causes IPC mismatch until server supports it.")

Review Comment:
   Hi @leekeiabstraction, thanks for catching this, added a test for 
FixedSizeList in a2fdfb5a6113a7db25e0c289e25544647d71951f, but again it needs 
to be skipped via `pytest` also.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Add array data type support for Python [fluss-rust]

Reply via email to