Bug#950924: python-feather-format: FTBFS with pandas 1.0: test_boolean_object_nulls / test_sparse_dataframe fail

Rebecca N. Palmer Sat, 08 Feb 2020 05:48:55 -0800

Package: python3-feather-format
Version: 0.3.1+dfsg1-3
Control: tags -1 patch
Control: block 950430 by -1


Two tests fail with pandas 1.0 (from experimental):

======================================================================

ERROR: test_boolean_object_nulls(feather.tests.test_reader.TestFeatherReader)

----------------------------------------------------------------------
Traceback (most recent call last):

File"/build/python-feather-format-0.3.1+dfsg1/feather/tests/test_reader.py",line 248, in test_boolean_object_nulls

    self._check_pandas_roundtrip(df, null_counts=[1 * repeats])

File"/build/python-feather-format-0.3.1+dfsg1/feather/tests/test_reader.py",line 70, in _check_pandas_roundtrip

    feather.write_dataframe(df, path)

File "/build/python-feather-format-0.3.1+dfsg1/feather/api.py", line57, in write_dataframe

    raise ValueError(msg)
ValueError: cannot serialize column 0 named bools with dtype boolean

======================================================================
ERROR: test_sparse_dataframe (feather.tests.test_reader.TestFeatherReader)
----------------------------------------------------------------------
Traceback (most recent call last):

File"/build/python-feather-format-0.3.1+dfsg1/feather/tests/test_reader.py",line 346, in test_sparse_dataframe

    df = pd.DataFrame(data).to_sparse(fill_value=1)

File "/usr/lib/python3/dist-packages/pandas/core/generic.py", line5273, in __getattr__

    return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'to_sparse'

----------------------------------------------------------------------
Ran 25 tests in 0.255s

FAILED (errors=2)

Fix:

--- python-feather-format-0.3.1+dfsg1.orig/feather/api.py
+++ python-feather-format-0.3.1+dfsg1/feather/api.py
@@ -39,9 +39,11 @@ def write_dataframe(df, path):
     # TODO(wesm): pipeline conversion to Arrow memory layout
     for i, name in enumerate(df.columns):
         col = df.iloc[:, i]
+        if pandas.api.types.is_sparse(col):
+            col = col.sparse.to_dense()

         if pdapi.is_object_dtype(col):
-            inferred_type = pandas.api.types.infer_dtype(col)
+            inferred_type = pandas.api.types.infer_dtype(col, skipna=False)
             msg = ("cannot serialize column {n} "
                    "named {name} with dtype {dtype}".format(
                        n=i, name=name, dtype=inferred_type))
--- python-feather-format-0.3.1+dfsg1.orig/feather/tests/test_reader.py
+++ python-feather-format-0.3.1+dfsg1/feather/tests/test_reader.py
@@ -343,8 +343,8 @@ class TestFeatherReader(unittest.TestCas
         # GH #221
         data = {'A': [0,1,2],
                 'B': [1,0,1]}
-        df = pd.DataFrame(data).to_sparse(fill_value=1)
-        expected = df.to_dense()
+        df = pd.DataFrame(data).astype(pd.SparseDtype(int, fill_value=1))
+        expected = df.sparse.to_dense()
         self._check_pandas_roundtrip(df, expected)

     def test_duplicate_columns(self):

Bug#950924: python-feather-format: FTBFS with pandas 1.0: test_boolean_object_nulls / test_sparse_dataframe fail

Reply via email to