Package: python3-pandas,python3-dask
Version: 2.2.3+dfsg-6,2024.12.1+dfsg-1

The pandas test test_frame_setitem_dask_array_into_new_col is failing in Salsa; it looks like assigning a dask array to a DataFrame used to make a copy and now does not.

I'm not sure yet whether this is an actual problem - I'm mostly opening this bug so I don't forget about it.

__________________ test_frame_setitem_dask_array_into_new_col __________________

    @pytest.mark.xfail(
        condition=not IS64,
        reason="dask has different nativesize-int vs int64 type rules",
        strict=False,
    )
    def test_frame_setitem_dask_array_into_new_col():
        # GH#47128

# dask sets "compute.use_numexpr" to False, so catch the current value
        # and ensure to reset it afterwards to avoid impacting other tests
        olduse = pd.get_option("compute.use_numexpr")

        try:
            da = td.versioned_importorskip("dask.array")

            dda = da.array([1, 2])
            df = DataFrame({"a": ["a", "b"]})
            df["b"] = dda
            df["c"] = dda
            df.loc[[False, True], "b"] = 100
            result = df.loc[[1], :]
expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1])
>           tm.assert_frame_equal(result, expected)

pandas/tests/test_downstream.py:259:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

left = array([100]), right = array([2]), err_msg = None

    def _raise(left, right, err_msg) -> NoReturn:
        if err_msg is None:
            if left.shape != right.shape:
                raise_assert_detail(
obj, f"{obj} shapes are different", left.shape, right.shape
                )

            diff = 0
            for left_arr, right_arr in zip(left, right):
                # count up differences
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
                    diff += 1

            diff = diff * 100.0 / left.size
            msg = f"{obj} values are different ({np.round(diff, 5)} %)"
> raise_assert_detail(obj, msg, left, right, index_values=index_values) E AssertionError: DataFrame.iloc[:, 2] (column name="c") are different
E
E DataFrame.iloc[:, 2] (column name="c") values are different (100.0 %)
E           [index]: [1]
E           [left]:  [100]
E           [right]: [2]

pandas/_testing/asserters.py:684: AssertionError

Reply via email to