=================================== FAILURES ===================================
_____________________________ test_query_with_meta _____________________________
db = 'sqlite:////tmp/tmpmr8qr16b.'
def test_query_with_meta(db):
from sqlalchemy import sql
data = {
"name": pd.Series([], name="name", dtype="str"),
"age": pd.Series([], name="age", dtype="int"),
}
index = pd.Index([], name="number", dtype="int")
meta = pd.DataFrame(data, index=index)
s1 = sql.select(
[sql.column("number"), sql.column("name"), sql.column("age")]
).select_from(sql.table("test"))
out = read_sql_query(s1, db, npartitions=2, index_col="number", meta=meta)
# Don't check dtype for windows https://github.com/dask/dask/issues/8620
assert_eq(out, df[["name", "age"]], check_dtype=sys.platform != "win32")
/usr/lib/python3/dist-packages/dask/dataframe/io/tests/test_sql.py:443: _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
a = name age
number 0 Alice 33
1 Bob 40
2 Chris 22
3 Dora 16
4 Edith 53
5 Francis 30
6 Garreth 20
b = name age
number 0 Alice 33
1 Bob 40
2 Chris 22
3 Dora 16
4 Edith 53
5 Francis 30
6 Garreth 20
check_names = True, check_dtype = True, check_divisions = True
check_index = True, scheduler = 'sync', kwargs = {}
def assert_eq(
a,
b,
check_names=True,
check_dtype=True,
check_divisions=True,
check_index=True,
scheduler="sync",
**kwargs,
):
if check_divisions:
assert_divisions(a, scheduler=scheduler)
assert_divisions(b, scheduler=scheduler)
if hasattr(a, "divisions") and hasattr(b, "divisions"):
at = type(np.asarray(a.divisions).tolist()[0]) # numpy to python
bt = type(np.asarray(b.divisions).tolist()[0]) # scalar conversion
assert at == bt, (at, bt)
assert_sane_keynames(a)
assert_sane_keynames(b)
a = _check_dask(
a, check_names=check_names, check_dtypes=check_dtype, scheduler=scheduler
)
b = _check_dask(
b, check_names=check_names, check_dtypes=check_dtype, scheduler=scheduler
)
if hasattr(a, "to_pandas"):
a = a.to_pandas()
if hasattr(b, "to_pandas"):
b = b.to_pandas()
if isinstance(a, (pd.DataFrame, pd.Series)):
a = _maybe_sort(a, check_index)
b = _maybe_sort(b, check_index)
if not check_index:
a = a.reset_index(drop=True)
b = b.reset_index(drop=True)
if isinstance(a, pd.DataFrame):
tm.assert_frame_equal(
a, b, check_names=check_names, check_dtype=check_dtype, **kwargs
E AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="age") are
different
E E Attribute "dtype" are different
E [left]: int32
E [right]: int64
/usr/lib/python3/dist-packages/dask/dataframe/utils.py:562: AssertionError
_____________________________ test_categorize_info _____________________________
@pytest.mark.skipif(not PANDAS_GT_120, reason="need newer version of Pandas")
def test_categorize_info():
# assert that we can call info after categorize
# workaround for: https://github.com/pydata/pandas/issues/14368
from io import StringIO
pandas_format._put_lines = put_lines
df = pd.DataFrame(
{"x": [1, 2, 3, 4], "y": pd.Series(list("aabc")), "z": pd.Series(list("aabc"))},
index=[0, 1, 2, 3],
)
ddf = dd.from_pandas(df, npartitions=4).categorize(["y"])
# Verbose=False
buf = StringIO()
ddf.info(buf=buf, verbose=True)
expected = (
"<class 'dask.dataframe.core.DataFrame'>\n"
"Int64Index: 4 entries, 0 to 3\n"
"Data columns (total 3 columns):\n"
" # Column Non-Null Count Dtype\n"
"--- ------ -------------- -----\n"
" 0 x 4 non-null int64\n"
" 1 y 4 non-null category\n"
" 2 z 4 non-null object\n"
"dtypes: category(1), object(1), int64(1)\n"
"memory usage: 496.0 bytes\n"
)
assert buf.getvalue() == expected
E assert "<class 'dask...312.0 bytes\n" == "<class 'dask...496.0 bytes\n"
E <class 'dask.dataframe.core.DataFrame'>
E Int64Index: 4 entries, 0 to 3
E Data columns (total 3 columns):
E # Column Non-Null Count Dtype
E --- ------ -------------- -----
E 0 x 4 non-null int64
E 1 y 4 non-null category...
E E ...Full output truncated (7 lines hidden), use '-vv' to show
/usr/lib/python3/dist-packages/dask/dataframe/tests/test_dataframe.py:3629:
AssertionError
=============================== warnings summary ===============================