Skip to content

Commit

Permalink
FIX propagate configuration to workers in parallel (#25363)
Browse files Browse the repository at this point in the history
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
  • Loading branch information
3 people committed Jan 20, 2023
1 parent de084fc commit a7cd0ca
Show file tree
Hide file tree
Showing 40 changed files with 314 additions and 116 deletions.
3 changes: 1 addition & 2 deletions benchmarks/bench_saga.py
Expand Up @@ -7,8 +7,7 @@
import time
import os

from joblib import Parallel
from sklearn.utils.fixes import delayed
from sklearn.utils.parallel import delayed, Parallel
import matplotlib.pyplot as plt
import numpy as np

Expand Down
15 changes: 10 additions & 5 deletions build_tools/azure/linting.sh
Expand Up @@ -34,10 +34,15 @@ then
exit 1
fi

joblib_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/fixes.py")"

if [ ! -z "$joblib_import" ]; then
echo "Use from sklearn.utils.fixes import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:"
echo "$joblib_import"
joblib_delayed_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")"
if [ ! -z "$joblib_delayed_import" ]; then
echo "Use from sklearn.utils.parallel import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:"
echo "$joblib_delayed_import"
exit 1
fi
joblib_Parallel_import="$(git grep -l -A 10 -E "joblib import.+Parallel" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")"
if [ ! -z "$joblib_Parallel_import" ]; then
echo "Use from sklearn.utils.parallel import Parallel instead of joblib Parallel. The following files contains imports to joblib.Parallel:"
echo "$joblib_Parallel_import"
exit 1
fi
7 changes: 7 additions & 0 deletions doc/modules/classes.rst
Expand Up @@ -1668,9 +1668,16 @@ Utilities from joblib:
:toctree: generated/
:template: function.rst

utils.parallel.delayed
utils.parallel_backend
utils.register_parallel_backend

.. autosummary::
:toctree: generated/
:template: class.rst

utils.parallel.Parallel


Recently deprecated
===================
Expand Down
17 changes: 17 additions & 0 deletions doc/whats_new/v1.2.rst
Expand Up @@ -9,6 +9,16 @@ Version 1.2.1

**In Development**

Changes impacting all modules
-----------------------------

- |Fix| Fix a bug where the current configuration was ignored in estimators using
`n_jobs > 1`. This bug was triggered for tasks dispatched by the auxillary
thread of `joblib` as :func:`sklearn.get_config` used to access an empty thread
local configuration instead of the configuration visible from the thread where
`joblib.Parallel` was first called.
:pr:`25363` by :user:`Guillaume Lemaitre <glemaitre>`.

Changed models
--------------

Expand Down Expand Up @@ -139,6 +149,13 @@ Changelog
boolean. The type is maintained, instead of converting to `float64.`
:pr:`25147` by :user:`Tim Head <betatim>`.

- |API| :func:`utils.fixes.delayed` is deprecated in 1.2.1 and will be removed
in 1.5. Instead, import :func:`utils.parallel.delayed` and use it in
conjunction with the newly introduced :func:`utils.parallel.Parallel`
to ensure proper propagation of the scikit-learn configuration to
the workers.
:pr:`25363` by :user:`Guillaume Lemaitre <glemaitre>`.

.. _changes_1_2:

Version 1.2.0
Expand Down
3 changes: 1 addition & 2 deletions sklearn/calibration.py
Expand Up @@ -14,7 +14,6 @@

from math import log
import numpy as np
from joblib import Parallel

from scipy.special import expit
from scipy.special import xlogy
Expand All @@ -36,7 +35,7 @@
)

from .utils.multiclass import check_classification_targets
from .utils.fixes import delayed
from .utils.parallel import delayed, Parallel
from .utils._param_validation import StrOptions, HasMethods, Hidden
from .utils.validation import (
_check_fit_params,
Expand Down
3 changes: 1 addition & 2 deletions sklearn/cluster/_mean_shift.py
Expand Up @@ -16,13 +16,12 @@

import numpy as np
import warnings
from joblib import Parallel
from numbers import Integral, Real

from collections import defaultdict
from ..utils._param_validation import Interval, validate_params
from ..utils.validation import check_is_fitted
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel
from ..utils import check_random_state, gen_batches, check_array
from ..base import BaseEstimator, ClusterMixin
from ..neighbors import NearestNeighbors
Expand Down
3 changes: 1 addition & 2 deletions sklearn/compose/_column_transformer.py
Expand Up @@ -12,7 +12,6 @@

import numpy as np
from scipy import sparse
from joblib import Parallel

from ..base import clone, TransformerMixin
from ..utils._estimator_html_repr import _VisualBlock
Expand All @@ -26,7 +25,7 @@
from ..utils import check_pandas_support
from ..utils.metaestimators import _BaseComposition
from ..utils.validation import check_array, check_is_fitted, _check_feature_names_in
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel


__all__ = ["ColumnTransformer", "make_column_transformer", "make_column_selector"]
Expand Down
3 changes: 1 addition & 2 deletions sklearn/covariance/_graph_lasso.py
Expand Up @@ -13,7 +13,6 @@
from numbers import Integral, Real
import numpy as np
from scipy import linalg
from joblib import Parallel

from . import empirical_covariance, EmpiricalCovariance, log_likelihood

Expand All @@ -23,7 +22,7 @@
check_random_state,
check_scalar,
)
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel
from ..utils._param_validation import Interval, StrOptions

# mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast'
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_dict_learning.py
Expand Up @@ -13,15 +13,15 @@

import numpy as np
from scipy import linalg
from joblib import Parallel, effective_n_jobs
from joblib import effective_n_jobs

from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..utils import check_array, check_random_state, gen_even_slices, gen_batches
from ..utils._param_validation import Hidden, Interval, StrOptions
from ..utils._param_validation import validate_params
from ..utils.extmath import randomized_svd, row_norms, svd_flip
from ..utils.validation import check_is_fitted
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel
from ..linear_model import Lasso, orthogonal_mp_gram, LassoLars, Lars


Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_lda.py
Expand Up @@ -15,13 +15,13 @@
import numpy as np
import scipy.sparse as sp
from scipy.special import gammaln, logsumexp
from joblib import Parallel, effective_n_jobs
from joblib import effective_n_jobs

from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..utils import check_random_state, gen_batches, gen_even_slices
from ..utils.validation import check_non_negative
from ..utils.validation import check_is_fitted
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel
from ..utils._param_validation import Interval, StrOptions

from ._online_lda_fast import (
Expand Down
3 changes: 1 addition & 2 deletions sklearn/decomposition/tests/test_dict_learning.py
Expand Up @@ -5,15 +5,14 @@
from functools import partial
import itertools

from joblib import Parallel

import sklearn

from sklearn.base import clone

from sklearn.exceptions import ConvergenceWarning

from sklearn.utils import check_array
from sklearn.utils.parallel import Parallel

from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_array_almost_equal
Expand Down
4 changes: 1 addition & 3 deletions sklearn/ensemble/_bagging.py
Expand Up @@ -12,8 +12,6 @@
from warnings import warn
from functools import partial

from joblib import Parallel

from ._base import BaseEnsemble, _partition_estimators
from ..base import ClassifierMixin, RegressorMixin
from ..metrics import r2_score, accuracy_score
Expand All @@ -25,7 +23,7 @@
from ..utils.random import sample_without_replacement
from ..utils._param_validation import Interval, HasMethods, StrOptions
from ..utils.validation import has_fit_parameter, check_is_fitted, _check_sample_weight
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel


__all__ = ["BaggingClassifier", "BaggingRegressor"]
Expand Down
3 changes: 1 addition & 2 deletions sklearn/ensemble/_forest.py
Expand Up @@ -48,7 +48,6 @@ class calls the ``fit`` method of each sub-estimator on random samples
import numpy as np
from scipy.sparse import issparse
from scipy.sparse import hstack as sparse_hstack
from joblib import Parallel

from ..base import is_classifier
from ..base import ClassifierMixin, MultiOutputMixin, RegressorMixin, TransformerMixin
Expand All @@ -66,7 +65,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
from ..utils import check_random_state, compute_sample_weight
from ..exceptions import DataConversionWarning
from ._base import BaseEnsemble, _partition_estimators
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel
from ..utils.multiclass import check_classification_targets, type_of_target
from ..utils.validation import (
check_is_fitted,
Expand Down
3 changes: 1 addition & 2 deletions sklearn/ensemble/_stacking.py
Expand Up @@ -8,7 +8,6 @@
from numbers import Integral

import numpy as np
from joblib import Parallel
import scipy.sparse as sparse

from ..base import clone
Expand All @@ -33,7 +32,7 @@
from ..utils.metaestimators import available_if
from ..utils.validation import check_is_fitted
from ..utils.validation import column_or_1d
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel
from ..utils._param_validation import HasMethods, StrOptions
from ..utils.validation import _check_feature_names_in

Expand Down
4 changes: 1 addition & 3 deletions sklearn/ensemble/_voting.py
Expand Up @@ -18,8 +18,6 @@

import numpy as np

from joblib import Parallel

from ..base import ClassifierMixin
from ..base import RegressorMixin
from ..base import TransformerMixin
Expand All @@ -36,7 +34,7 @@
from ..utils._param_validation import StrOptions
from ..exceptions import NotFittedError
from ..utils._estimator_html_repr import _VisualBlock
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel


class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):
Expand Down
6 changes: 3 additions & 3 deletions sklearn/ensemble/tests/test_forest.py
Expand Up @@ -18,16 +18,15 @@
from typing import Dict, Any

import numpy as np
from joblib import Parallel
from scipy.sparse import csr_matrix
from scipy.sparse import csc_matrix
from scipy.sparse import coo_matrix
from scipy.special import comb

import pytest

import joblib

import pytest

import sklearn
from sklearn.dummy import DummyRegressor
from sklearn.metrics import mean_poisson_deviance
Expand All @@ -52,6 +51,7 @@
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.utils.parallel import Parallel
from sklearn.utils.validation import check_random_state

from sklearn.metrics import mean_squared_error
Expand Down
4 changes: 2 additions & 2 deletions sklearn/feature_selection/_rfe.py
Expand Up @@ -8,15 +8,15 @@

import numpy as np
from numbers import Integral, Real
from joblib import Parallel, effective_n_jobs
from joblib import effective_n_jobs


from ..utils.metaestimators import available_if
from ..utils.metaestimators import _safe_split
from ..utils._param_validation import HasMethods, Interval
from ..utils._tags import _safe_tags
from ..utils.validation import check_is_fitted
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel
from ..base import BaseEstimator
from ..base import MetaEstimatorMixin
from ..base import clone
Expand Down
3 changes: 1 addition & 2 deletions sklearn/inspection/_permutation_importance.py
@@ -1,7 +1,6 @@
"""Permutation importance for estimators."""
import numbers
import numpy as np
from joblib import Parallel

from ..ensemble._bagging import _generate_indices
from ..metrics import check_scoring
Expand All @@ -10,7 +9,7 @@
from ..utils import Bunch, _safe_indexing
from ..utils import check_random_state
from ..utils import check_array
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel


def _weights_scorer(scorer, estimator, X, y, sample_weight):
Expand Down
3 changes: 1 addition & 2 deletions sklearn/inspection/_plot/partial_dependence.py
Expand Up @@ -6,7 +6,6 @@
import numpy as np
from scipy import sparse
from scipy.stats.mstats import mquantiles
from joblib import Parallel

from .. import partial_dependence
from .._pd_utils import _check_feature_names, _get_feature_index
Expand All @@ -16,7 +15,7 @@
from ...utils import check_matplotlib_support # noqa
from ...utils import check_random_state
from ...utils import _safe_indexing
from ...utils.fixes import delayed
from ...utils.parallel import delayed, Parallel
from ...utils._encode import _unique


Expand Down
3 changes: 1 addition & 2 deletions sklearn/linear_model/_base.py
Expand Up @@ -25,7 +25,6 @@
from scipy import sparse
from scipy.sparse.linalg import lsqr
from scipy.special import expit
from joblib import Parallel
from numbers import Integral

from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, MultiOutputMixin
Expand All @@ -40,7 +39,7 @@
from ..utils._seq_dataset import ArrayDataset32, CSRDataset32
from ..utils._seq_dataset import ArrayDataset64, CSRDataset64
from ..utils.validation import check_is_fitted, _check_sample_weight
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel

# TODO: bayesian_ridge_regression and bayesian_regression_ard
# should be squashed into its respective objects.
Expand Down
4 changes: 2 additions & 2 deletions sklearn/linear_model/_coordinate_descent.py
Expand Up @@ -14,7 +14,7 @@

import numpy as np
from scipy import sparse
from joblib import Parallel, effective_n_jobs
from joblib import effective_n_jobs

from ._base import LinearModel, _pre_fit
from ..base import RegressorMixin, MultiOutputMixin
Expand All @@ -30,7 +30,7 @@
check_is_fitted,
column_or_1d,
)
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel

# mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast'
from . import _cd_fast as cd_fast # type: ignore
Expand Down
3 changes: 1 addition & 2 deletions sklearn/linear_model/_least_angle.py
Expand Up @@ -16,7 +16,6 @@
import numpy as np
from scipy import linalg, interpolate
from scipy.linalg.lapack import get_lapack_funcs
from joblib import Parallel

from ._base import LinearModel, LinearRegression
from ._base import _deprecate_normalize, _preprocess_data
Expand All @@ -28,7 +27,7 @@
from ..utils._param_validation import Hidden, Interval, StrOptions
from ..model_selection import check_cv
from ..exceptions import ConvergenceWarning
from ..utils.fixes import delayed
from ..utils.parallel import delayed, Parallel

SOLVE_TRIANGULAR_ARGS = {"check_finite": False}

Expand Down

0 comments on commit a7cd0ca

Please sign in to comment.