AlenkaF commented on code in PR #49247:
URL: https://github.com/apache/arrow/pull/49247#discussion_r3020923611
##########
python/pyarrow/tests/test_pandas.py:
##########
@@ -3047,6 +3049,10 @@ def test_all_none_category(self):
def test_empty_arrays(self):
for dtype_str, pa_type in self.type_pairs:
+ if (Version(pd.__version__) >= Version("3.0.0") and
+ pa_type == pa.string()):
+ # PyArrow backed string dtype are set by default
+ dtype_str = 'str'
Review Comment:
Yes correct. But later in the test we are creating a pyarrow array and then
converting that to pandas to check the conversion from numpy to pandas, AFAIU.
I get this error when running the test with pandas 3.0:
```python
> tm.assert_series_equal(pd.Series(result), expected,
check_names=False)
E AssertionError: Attributes of Series are different
E
E Attribute "dtype" are different
E [left]: <StringDtype(na_value=nan)>
E [right]: object
```
<details>
<summary>Full output</summary>
______________________________________________________________________________________
TestConvertMisc.test_empty_arrays
______________________________________________________________________________________
self = <pyarrow.tests.test_pandas.TestConvertMisc object at 0x115db68b0>
def test_empty_arrays(self):
for dtype_str, pa_type in self.type_pairs:
# if (Version(pd.__version__) >= Version("3.0.0") and
# pa_type == pa.string()):
# # PyArrow backed string dtype are set by default
# dtype_str = 'str'
arr = np.array([], dtype=np.dtype(dtype_str))
> _check_array_roundtrip(arr, type=pa_type)
pyarrow/tests/test_pandas.py:3057:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
values = array([], dtype=object), expected = Series([], dtype: object), mask
= None, type = DataType(string)
def _check_array_roundtrip(values, expected=None, mask=None,
type=None):
arr = pa.array(values, from_pandas=True, mask=mask, type=type)
result = arr.to_pandas()
values_nulls = pd.isnull(values)
if mask is None:
assert arr.null_count == values_nulls.sum()
else:
assert arr.null_count == (mask | values_nulls).sum()
if expected is None:
if mask is None:
expected = pd.Series(values)
else:
expected = pd.Series(values).copy()
expected[mask.copy()] = None
if expected.dtype == 'object':
expected = expected.replace({np.nan: None})
> tm.assert_series_equal(pd.Series(result), expected,
check_names=False)
E AssertionError: Attributes of Series are different
E
E Attribute "dtype" are different
E [left]: <StringDtype(na_value=nan)>
E [right]: object
pyarrow/tests/test_pandas.py:164: AssertionError
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]