This is an automated email from the ASF dual-hosted git repository.
rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5647eda53f GH-49349: [Doc][Python] Simplify doctests in tables.pxi and
types.pxi (#49350)
5647eda53f is described below
commit 5647eda53ff0a895ebe061d99e92a5ea3dd652ed
Author: tadeja <[email protected]>
AuthorDate: Thu Mar 5 16:03:09 2026 +0100
GH-49349: [Doc][Python] Simplify doctests in tables.pxi and types.pxi
(#49350)
### Rationale for this change
Closes #49349
### What changes are included in this PR?
a) Changing docstring examples from pandas 2.3.3 or 3 agnostic to pandas 3
specific output
b) Replace `pa.Table.from_arrays([...], names=[...])` with `pa.table({...})`
### Are these changes tested?
Yes, tests pass locally.
### Are there any user-facing changes?
No.
* GitHub Issue: #49349
Authored-by: Tadeja Kadunc <[email protected]>
Signed-off-by: Rok Mihevc <[email protected]>
---
python/pyarrow/table.pxi | 135 ++++++++++++++++++++---------------------------
python/pyarrow/types.pxi | 2 +-
2 files changed, 57 insertions(+), 80 deletions(-)
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 361ba145c8..5ca7762dec 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1809,9 +1809,8 @@ cdef class _Tabular(_PandasConvertible):
Table (works similarly for RecordBatch)
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays([[2, 4, 5, 100],
- ... ["Flamingo", "Horse", "Brittle
stars", "Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
>>> table.column_names
['n_legs', 'animals']
"""
@@ -1873,13 +1872,9 @@ cdef class _Tabular(_PandasConvertible):
Table (works similarly for RecordBatch)
>>> import pyarrow as pa
- >>> import pandas as pd
- >>> df = pd.DataFrame({'year': [None, 2022, 2019, 2021],
+ >>> table = pa.table({'year': [None, 2022, 2019, 2021],
... 'n_legs': [2, 4, 5, 100],
... 'animals': ["Flamingo", "Horse", None,
"Centipede"]})
- >>> table = pa.Table.from_arrays(
- ... [[None, 2022, 2019, 2021], [2, 4, 5, 100], ["Flamingo",
"Horse", None, "Centipede"]],
- ... names=['year', 'n_legs', 'animals'])
>>> table.drop_null()
pyarrow.Table
year: int64
@@ -1911,9 +1906,8 @@ cdef class _Tabular(_PandasConvertible):
Table (works similarly for RecordBatch)
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
>>> table.field(0)
pyarrow.Field<n_legs: int64>
>>> table.field(1)
@@ -2065,9 +2059,8 @@ cdef class _Tabular(_PandasConvertible):
Table (works similarly for RecordBatch)
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[None, 4, 5, None], ["Flamingo", "Horse", None, "Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [None, 4, 5, None],
+ ... 'animals': ["Flamingo", "Horse", None,
"Centipede"]})
>>> for i in table.itercolumns():
... print(i.null_count)
...
@@ -2134,11 +2127,10 @@ cdef class _Tabular(_PandasConvertible):
Table (works similarly for RecordBatch)
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2020, 2022, 2021, 2022, 2019, 2021],
- ... [2, 2, 4, 4, 5, 100],
- ... ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['year', 'n_legs', 'animal'])
+ >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
+ ... 'n_legs': [2, 2, 4, 4, 5, 100],
+ ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse",
+ ... "Brittle stars", "Centipede"]})
>>> table.sort_by('animal')
pyarrow.Table
year: int64
@@ -2180,10 +2172,9 @@ cdef class _Tabular(_PandasConvertible):
Table (works similarly for RecordBatch)
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2020, 2022, 2019, 2021], [2, 4, 5, 100],
- ... ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
- ... names=['year', 'n_legs', 'animals'])
+ >>> table = pa.table({'year': [2020, 2022, 2019, 2021],
+ ... 'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
>>> table.take([1,3])
pyarrow.Table
year: int64
@@ -2471,9 +2462,8 @@ cdef class _Tabular(_PandasConvertible):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
Append column at the end:
@@ -2542,7 +2532,7 @@ cdef class RecordBatch(_Tabular):
month: int64
day: int64
n_legs: int64
- animals: ...string
+ animals: large_string
----
year: [2020,2022,2021,2022]
month: [3,5,7,9]
@@ -2582,7 +2572,7 @@ cdef class RecordBatch(_Tabular):
month: int64
day: int64
n_legs: int64
- animals: ...string
+ animals: large_string
----
year: [2020,2022,2021,2022]
month: [3,5,7,9]
@@ -3406,7 +3396,7 @@ cdef class RecordBatch(_Tabular):
month: int64
day: int64
n_legs: int64
- animals: ...string
+ animals: large_string
----
year: [2020,2022,2021,2022]
month: [3,5,7,9]
@@ -4146,7 +4136,7 @@ cdef class Table(_Tabular):
pyarrow.Table
year: int64
n_legs: int64
- animals: ...string
+ animals: large_string
----
year: [[2020,2022,2019,2021]]
n_legs: [[2,4,5,100]]
@@ -4272,10 +4262,9 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2020, 2022, 2019, 2021], [2, 4, 5, 100],
- ... ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
- ... names=['year', 'n_legs', 'animals'])
+ >>> table = pa.table({'year': [2020, 2022, 2019, 2021],
+ ... 'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
>>> table.slice(length=3)
pyarrow.Table
year: int64
@@ -4336,10 +4325,9 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2020, 2022, 2019, 2021], [2, 4, 5, 100],
- ... ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
- ... names=['year', 'n_legs', 'animals'])
+ >>> table = pa.table({'year': [2020, 2022, 2019, 2021],
+ ... 'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
>>> table.select([0,1])
pyarrow.Table
year: int64
@@ -4675,9 +4663,8 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
>>> table.schema
n_legs: int64
animals: string
@@ -4772,7 +4759,7 @@ cdef class Table(_Tabular):
>>> pa.Table.from_pandas(df)
pyarrow.Table
n_legs: int64
- animals: ...string
+ animals: large_string
----
n_legs: [[2,4,5,100]]
animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
@@ -5117,9 +5104,8 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
Convert a Table to a RecordBatchReader:
@@ -5175,9 +5161,8 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
>>> table.schema
n_legs: int64
animals: string
@@ -5267,9 +5252,8 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[None, 4, 5, None], ["Flamingo", "Horse", None, "Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [None, 4, 5, None],
+ ... 'animals': ["Flamingo", "Horse", None,
"Centipede"]})
>>> table.nbytes
72
"""
@@ -5296,9 +5280,8 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[None, 4, 5, None], ["Flamingo", "Horse", None, "Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [None, 4, 5, None],
+ ... 'animals': ["Flamingo", "Horse", None,
"Centipede"]})
>>> table.get_total_buffer_size()
76
"""
@@ -5337,9 +5320,8 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
Add column:
@@ -5402,9 +5384,8 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
>>> table.remove_column(1)
pyarrow.Table
n_legs: int64
@@ -5440,9 +5421,8 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
Replace a column:
@@ -5501,9 +5481,8 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['n_legs', 'animals'])
+ >>> table = pa.table({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
>>> new_names = ["n", "name"]
>>> table.rename_columns(new_names)
pyarrow.Table
@@ -5593,11 +5572,10 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> table = pa.Table.from_arrays(
- ... [[2020, 2022, 2021, 2022, 2019, 2021],
- ... [2, 2, 4, 4, 5, 100],
- ... ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars",
"Centipede"]],
- ... names=['year', 'n_legs', 'animal'])
+ >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
+ ... 'n_legs': [2, 2, 4, 4, 5, 100],
+ ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse",
+ ... "Brittle stars", "Centipede"]})
>>> table.group_by('year').aggregate([('n_legs', 'sum')])
pyarrow.Table
year: int64
@@ -5656,12 +5634,11 @@ cdef class Table(_Tabular):
--------
>>> import pyarrow as pa
>>> import pyarrow.compute as pc
- >>> t1 = pa.Table.from_arrays(
- ... [[1, 2, 3], [2020, 2022, 2019]],
- ... names=['id', 'year'])
- >>> t2 = pa.Table.from_arrays(
- ... [[3, 4], [5, 100], ["Brittle stars", "Centipede"]],
- ... names=['id', 'n_legs', 'animal'])
+ >>> t1 = pa.table({'id': [1, 2, 3],
+ ... 'year': [2020, 2022, 2019]})
+ >>> t2 = pa.table({'id': [3, 4],
+ ... 'n_legs': [5, 100],
+ ... 'animal': ["Brittle stars", "Centipede"]})
Left outer join:
@@ -5973,7 +5950,7 @@ def record_batch(data, names=None, schema=None,
metadata=None):
month: int64
day: int64
n_legs: int64
- animals: ...string
+ animals: large_string
----
year: [2020,2022,2021,2022]
month: [3,5,7,9]
@@ -6134,7 +6111,7 @@ def table(data, names=None, schema=None, metadata=None,
nthreads=None):
pyarrow.Table
year: int64
n_legs: int64
- animals: ...string
+ animals: large_string
----
year: [[2020,2022,2019,2021]]
n_legs: [[2,4,5,100]]
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index e84f1b073f..e9eef89651 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -3140,7 +3140,7 @@ cdef class Schema(_Weakrefable):
>>> pa.Schema.from_pandas(df)
int: int64
- str: ...string
+ str: large_string
-- schema metadata --
pandas: '{"index_columns": [{"kind": "range", "name": null, ...
"""