Source: scikit-learn Version: 1.4.2+dfsg-6 Severity: normal scikit-learn tests fail with scipy 1.14 from experimental Perhaps it's been fixed in the latest upstream release, I don't know.
https://ci.debian.net/packages/s/scikit-learn/unstable/amd64/51759247/ 648s _______ test_standard_scaler_partial_fit_numerical_stability[csc_array] ________ 648s 648s sparse_container = <class 'scipy.sparse._csc.csc_array'> 648s 648s @pytest.mark.parametrize("sparse_container", CSC_CONTAINERS + CSR_CONTAINERS) 648s def test_standard_scaler_partial_fit_numerical_stability(sparse_container): 648s # Test if the incremental computation introduces significative errors 648s # for large datasets with values of large magniture 648s rng = np.random.RandomState(0) 648s n_features = 2 648s n_samples = 100 648s offsets = rng.uniform(-1e15, 1e15, size=n_features) 648s scales = rng.uniform(1e3, 1e6, size=n_features) 648s X = rng.randn(n_samples, n_features) * scales + offsets 648s 648s scaler_batch = StandardScaler().fit(X) 648s scaler_incr = StandardScaler() 648s for chunk in X: 648s scaler_incr = scaler_incr.partial_fit(chunk.reshape(1, n_features)) 648s 648s # Regardless of abs values, they must not be more diff 6 significant digits 648s tol = 10 ** (-6) 648s assert_allclose(scaler_incr.mean_, scaler_batch.mean_, rtol=tol) 648s assert_allclose(scaler_incr.var_, scaler_batch.var_, rtol=tol) 648s assert_allclose(scaler_incr.scale_, scaler_batch.scale_, rtol=tol) 648s # NOTE Be aware that for much larger offsets std is very unstable (last 648s # assert) while mean is OK. 648s 648s # Sparse input 648s size = (100, 3) 648s scale = 1e20 648s X = sparse_container(rng.randint(0, 2, size).astype(np.float64) * scale) 648s 648s # with_mean=False is required with sparse input 648s scaler = StandardScaler(with_mean=False).fit(X) 648s scaler_incr = StandardScaler(with_mean=False) 648s 648s for chunk in X: 648s > scaler_incr = scaler_incr.partial_fit(chunk) 648s 648s /usr/lib/python3/dist-packages/sklearn/preprocessing/tests/test_data.py:598: 648s _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 648s /usr/lib/python3/dist-packages/sklearn/base.py:1474: in wrapper 648s return fit_method(estimator, *args, **kwargs) 648s _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 648s 648s self = StandardScaler(with_mean=False) 648s X = <Compressed Sparse Row sparse array of dtype 'float64' 648s with 0 stored elements and shape (3,)> 648s y = None, sample_weight = None 648s 648s @_fit_context(prefer_skip_nested_validation=True) 648s def partial_fit(self, X, y=None, sample_weight=None): 648s """Online computation of mean and std on X for later scaling. 648s 648s All of X is processed as a single batch. This is intended for cases 648s when :meth:`fit` is not feasible due to very large number of 648s `n_samples` or because X is read from a continuous stream. 648s 648s The algorithm for incremental mean and std is given in Equation 1.5a,b 648s in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. "Algorithms 648s for computing the sample variance: Analysis and recommendations." 648s The American Statistician 37.3 (1983): 242-247: 648s 648s Parameters 648s ---------- 648s X : {array-like, sparse matrix} of shape (n_samples, n_features) 648s The data used to compute the mean and standard deviation 648s used for later scaling along the features axis. 648s 648s y : None 648s Ignored. 648s 648s sample_weight : array-like of shape (n_samples,), default=None 648s Individual weights for each sample. 648s 648s .. versionadded:: 0.24 648s parameter *sample_weight* support to StandardScaler. 648s 648s Returns 648s ------- 648s self : object 648s Fitted scaler. 648s """ 648s first_call = not hasattr(self, "n_samples_seen_") 648s X = self._validate_data( 648s X, 648s accept_sparse=("csr", "csc"), 648s dtype=FLOAT_DTYPES, 648s force_all_finite="allow-nan", 648s reset=first_call, 648s ) 648s > n_features = X.shape[1] 648s E IndexError: tuple index out of range 648s 648s /usr/lib/python3/dist-packages/sklearn/preprocessing/_data.py:919: IndexError 648s _______ test_standard_scaler_partial_fit_numerical_stability[csr_array] ________ 648s 648s sparse_container = <class 'scipy.sparse._csr.csr_array'> 648s 648s @pytest.mark.parametrize("sparse_container", CSC_CONTAINERS + CSR_CONTAINERS) 648s def test_standard_scaler_partial_fit_numerical_stability(sparse_container): 648s # Test if the incremental computation introduces significative errors 648s # for large datasets with values of large magniture 648s rng = np.random.RandomState(0) 648s n_features = 2 648s n_samples = 100 648s offsets = rng.uniform(-1e15, 1e15, size=n_features) 648s scales = rng.uniform(1e3, 1e6, size=n_features) 648s X = rng.randn(n_samples, n_features) * scales + offsets 648s 648s scaler_batch = StandardScaler().fit(X) 648s scaler_incr = StandardScaler() 648s for chunk in X: 648s scaler_incr = scaler_incr.partial_fit(chunk.reshape(1, n_features)) 648s 648s # Regardless of abs values, they must not be more diff 6 significant digits 648s tol = 10 ** (-6) 648s assert_allclose(scaler_incr.mean_, scaler_batch.mean_, rtol=tol) 648s assert_allclose(scaler_incr.var_, scaler_batch.var_, rtol=tol) 648s assert_allclose(scaler_incr.scale_, scaler_batch.scale_, rtol=tol) 648s # NOTE Be aware that for much larger offsets std is very unstable (last 648s # assert) while mean is OK. 648s 648s # Sparse input 648s size = (100, 3) 648s scale = 1e20 648s X = sparse_container(rng.randint(0, 2, size).astype(np.float64) * scale) 648s 648s # with_mean=False is required with sparse input 648s scaler = StandardScaler(with_mean=False).fit(X) 648s scaler_incr = StandardScaler(with_mean=False) 648s 648s for chunk in X: 648s > scaler_incr = scaler_incr.partial_fit(chunk) 648s 648s /usr/lib/python3/dist-packages/sklearn/preprocessing/tests/test_data.py:598: 648s _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 648s /usr/lib/python3/dist-packages/sklearn/base.py:1474: in wrapper 648s return fit_method(estimator, *args, **kwargs) 648s _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 648s 648s self = StandardScaler(with_mean=False) 648s X = <Compressed Sparse Row sparse array of dtype 'float64' 648s with 0 stored elements and shape (3,)> 648s y = None, sample_weight = None 648s 648s @_fit_context(prefer_skip_nested_validation=True) 648s def partial_fit(self, X, y=None, sample_weight=None): 648s """Online computation of mean and std on X for later scaling. 648s 648s All of X is processed as a single batch. This is intended for cases 648s when :meth:`fit` is not feasible due to very large number of 648s `n_samples` or because X is read from a continuous stream. 648s 648s The algorithm for incremental mean and std is given in Equation 1.5a,b 648s in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. "Algorithms 648s for computing the sample variance: Analysis and recommendations." 648s The American Statistician 37.3 (1983): 242-247: 648s 648s Parameters 648s ---------- 648s X : {array-like, sparse matrix} of shape (n_samples, n_features) 648s The data used to compute the mean and standard deviation 648s used for later scaling along the features axis. 648s 648s y : None 648s Ignored. 648s 648s sample_weight : array-like of shape (n_samples,), default=None 648s Individual weights for each sample. 648s 648s .. versionadded:: 0.24 648s parameter *sample_weight* support to StandardScaler. 648s 648s Returns 648s ------- 648s self : object 648s Fitted scaler. 648s """ 648s first_call = not hasattr(self, "n_samples_seen_") 648s X = self._validate_data( 648s X, 648s accept_sparse=("csr", "csc"), 648s dtype=FLOAT_DTYPES, 648s force_all_finite="allow-nan", 648s reset=first_call, 648s ) 648s > n_features = X.shape[1] 648s E IndexError: tuple index out of range 648s 648s /usr/lib/python3/dist-packages/sklearn/preprocessing/_data.py:919: IndexError ... 650s FAILED ../../../../usr/lib/python3/dist-packages/sklearn/preprocessing/tests/test_data.py::test_standard_scaler_partial_fit_numerical_stability[csc_array] 650s FAILED ../../../../usr/lib/python3/dist-packages/sklearn/preprocessing/tests/test_data.py::test_standard_scaler_partial_fit_numerical_stability[csr_array] 650s = 2 failed, 29267 passed, 3384 skipped, 2 deselected, 88 xfailed, 45 xpassed, 3276 warnings in 600.51s (0:10:00) = We've only recently upgrade to scipy 1.13 but we'll want to upgrade further to scopy 1.14 before too long.