Source: dask Version: 2022.02.0+dfsg-1 Severity: normal Control: forwarded -1 https://github.com/dask/dask/issues/8620
dask 2022.02.0 is failing two CI tests on 32 bit arches (armhf, i386), one in test_query_with_meta, the other in test_categorize_info The test_query_with_meta error is reported upstream at https://github.com/dask/dask/issues/8620 The test_categorize_info error was dealt with upsteam with your patch applied in https://github.com/dask/dask/pull/8851 which should be applied in the 2022.04.0 release. Since we've got the pyarrow dependency getting in the way of upgrading to the more recent dask releases as noted in Bug#1013080, should we pull in the PR#8851 patch to debian/patches to fix test_categorize_info ? _____________________________ test_query_with_meta _____________________________ db = 'sqlite:////tmp/tmp61ugakdn.' def test_query_with_meta(db): from sqlalchemy import sql data = { "name": pd.Series([], name="name", dtype="str"), "age": pd.Series([], name="age", dtype="int"), } index = pd.Index([], name="number", dtype="int") meta = pd.DataFrame(data, index=index) s1 = sql.select( [sql.column("number"), sql.column("name"), sql.column("age")] ).select_from(sql.table("test")) out = read_sql_query(s1, db, npartitions=2, index_col="number", meta=meta) # Don't check dtype for windows https://github.com/dask/dask/issues/8620 > assert_eq(out, df[["name", "age"]], check_dtype=sys.platform != "win32") /usr/lib/python3/dist-packages/dask/dataframe/io/tests/test_sql.py:443: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ a = name age number 0 Alice 33 1 Bob 40 2 Chris 22 3 Dora 16 4 Edith 53 5 Francis 30 6 Garreth 20 b = name age number 0 Alice 33 1 Bob 40 2 Chris 22 3 Dora 16 4 Edith 53 5 Francis 30 6 Garreth 20 check_names = True, check_dtype = True, check_divisions = True check_index = True, scheduler = 'sync', kwargs = {} def assert_eq( a, b, check_names=True, check_dtype=True, check_divisions=True, check_index=True, scheduler="sync", **kwargs, ): if check_divisions: assert_divisions(a, scheduler=scheduler) assert_divisions(b, scheduler=scheduler) if hasattr(a, "divisions") and hasattr(b, "divisions"): at = type(np.asarray(a.divisions).tolist()[0]) # numpy to python bt = type(np.asarray(b.divisions).tolist()[0]) # scalar conversion assert at == bt, (at, bt) assert_sane_keynames(a) assert_sane_keynames(b) a = _check_dask( a, check_names=check_names, check_dtypes=check_dtype, scheduler=scheduler ) b = _check_dask( b, check_names=check_names, check_dtypes=check_dtype, scheduler=scheduler ) if hasattr(a, "to_pandas"): a = a.to_pandas() if hasattr(b, "to_pandas"): b = b.to_pandas() if isinstance(a, (pd.DataFrame, pd.Series)): a = _maybe_sort(a, check_index) b = _maybe_sort(b, check_index) if not check_index: a = a.reset_index(drop=True) b = b.reset_index(drop=True) if isinstance(a, pd.DataFrame): > tm.assert_frame_equal( a, b, check_names=check_names, check_dtype=check_dtype, **kwargs E AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="age") are different E E Attribute "dtype" are different E [left]: int32 E [right]: int64 /usr/lib/python3/dist-packages/dask/dataframe/utils.py:562: AssertionError _____________________________ test_categorize_info _____________________________ @pytest.mark.skipif(not PANDAS_GT_120, reason="need newer version of Pandas") def test_categorize_info(): # assert that we can call info after categorize # workaround for: https://github.com/pydata/pandas/issues/14368 from io import StringIO pandas_format._put_lines = put_lines df = pd.DataFrame( {"x": [1, 2, 3, 4], "y": pd.Series(list("aabc")), "z": pd.Series(list("aabc"))}, index=[0, 1, 2, 3], ) ddf = dd.from_pandas(df, npartitions=4).categorize(["y"]) # Verbose=False buf = StringIO() ddf.info(buf=buf, verbose=True) expected = ( "<class 'dask.dataframe.core.DataFrame'>\n" "Int64Index: 4 entries, 0 to 3\n" "Data columns (total 3 columns):\n" " # Column Non-Null Count Dtype\n" "--- ------ -------------- -----\n" " 0 x 4 non-null int64\n" " 1 y 4 non-null category\n" " 2 z 4 non-null object\n" "dtypes: category(1), object(1), int64(1)\n" "memory usage: 496.0 bytes\n" ) > assert buf.getvalue() == expected E assert "<class 'dask...312.0 bytes\n" == "<class 'dask...496.0 bytes\n" E <class 'dask.dataframe.core.DataFrame'> E Int64Index: 4 entries, 0 to 3 E Data columns (total 3 columns): E # Column Non-Null Count Dtype E --- ------ -------------- ----- E 0 x 4 non-null int64 E 1 y 4 non-null category... E E ...Full output truncated (7 lines hidden), use '-vv' to show /usr/lib/python3/dist-packages/dask/dataframe/tests/test_dataframe.py:3629: AssertionError -- System Information: Debian Release: bookworm/sid APT prefers unstable APT policy: (500, 'unstable'), (1, 'experimental') Architecture: amd64 (x86_64) Foreign Architectures: i386 Kernel: Linux 5.18.0-3-amd64 (SMP w/8 CPU threads; PREEMPT) Kernel taint flags: TAINT_PROPRIETARY_MODULE, TAINT_WARN, TAINT_OOT_MODULE Locale: LANG=en_AU.UTF-8, LC_CTYPE=en_AU.UTF-8 (charmap=UTF-8), LANGUAGE=en_AU:en Shell: /bin/sh linked to /bin/dash Init: systemd (via /run/systemd/system) LSM: AppArmor: enabled