Skip to content

Commit

Permalink
MAINT Ensure disjoint interval constraints (#25797)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremiedbb committed Mar 15, 2023
1 parent 36bb7e5 commit 9d55835
Show file tree
Hide file tree
Showing 19 changed files with 168 additions and 198 deletions.
15 changes: 8 additions & 7 deletions sklearn/cluster/_optics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ..metrics.pairwise import _VALID_METRICS
from ..utils import gen_batches, get_chunk_n_rows
from ..utils._param_validation import Interval, HasMethods, StrOptions, validate_params
from ..utils._param_validation import RealNotInt
from ..utils.validation import check_memory
from ..neighbors import NearestNeighbors
from ..base import BaseEstimator, ClusterMixin
Expand Down Expand Up @@ -233,7 +234,7 @@ class OPTICS(ClusterMixin, BaseEstimator):
_parameter_constraints: dict = {
"min_samples": [
Interval(Integral, 2, None, closed="left"),
Interval(Real, 0, 1, closed="both"),
Interval(RealNotInt, 0, 1, closed="both"),
],
"max_eps": [Interval(Real, 0, None, closed="both")],
"metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable],
Expand All @@ -245,7 +246,7 @@ class OPTICS(ClusterMixin, BaseEstimator):
"predecessor_correction": ["boolean"],
"min_cluster_size": [
Interval(Integral, 2, None, closed="left"),
Interval(Real, 0, 1, closed="right"),
Interval(RealNotInt, 0, 1, closed="right"),
None,
],
"algorithm": [StrOptions({"auto", "brute", "ball_tree", "kd_tree"})],
Expand Down Expand Up @@ -431,7 +432,7 @@ def _compute_core_distances_(X, neighbors, min_samples, working_memory):
"X": [np.ndarray, "sparse matrix"],
"min_samples": [
Interval(Integral, 2, None, closed="left"),
Interval(Real, 0, 1, closed="both"),
Interval(RealNotInt, 0, 1, closed="both"),
],
"max_eps": [Interval(Real, 0, None, closed="both")],
"metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable],
Expand Down Expand Up @@ -723,12 +724,12 @@ def cluster_optics_dbscan(*, reachability, core_distances, ordering, eps):
"predecessor": [np.ndarray],
"ordering": [np.ndarray],
"min_samples": [
Interval(Integral, 1, None, closed="neither"),
Interval(Real, 0, 1, closed="both"),
Interval(Integral, 2, None, closed="left"),
Interval(RealNotInt, 0, 1, closed="both"),
],
"min_cluster_size": [
Interval(Integral, 1, None, closed="neither"),
Interval(Real, 0, 1, closed="both"),
Interval(Integral, 2, None, closed="left"),
Interval(RealNotInt, 0, 1, closed="both"),
None,
],
"xi": [Interval(Real, 0, 1, closed="both")],
Expand Down
2 changes: 1 addition & 1 deletion sklearn/cluster/tests/test_optics.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def test_minimum_number_of_sample_check():

# Compute OPTICS
X = [[1, 1]]
clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10, min_cluster_size=1)
clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10, min_cluster_size=1.0)

# Run the fit
with pytest.raises(ValueError, match=msg):
Expand Down
3 changes: 2 additions & 1 deletion sklearn/decomposition/_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from ..utils.extmath import stable_cumsum
from ..utils.validation import check_is_fitted
from ..utils._param_validation import Interval, StrOptions
from ..utils._param_validation import RealNotInt


def _assess_dimension(spectrum, rank, n_samples):
Expand Down Expand Up @@ -363,7 +364,7 @@ class PCA(_BasePCA):
_parameter_constraints: dict = {
"n_components": [
Interval(Integral, 0, None, closed="left"),
Interval(Real, 0, 1, closed="neither"),
Interval(RealNotInt, 0, 1, closed="neither"),
StrOptions({"mle"}),
None,
],
Expand Down
7 changes: 4 additions & 3 deletions sklearn/ensemble/_bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numbers
import numpy as np
from abc import ABCMeta, abstractmethod
from numbers import Integral, Real
from numbers import Integral
from warnings import warn
from functools import partial

Expand All @@ -22,6 +22,7 @@
from ..utils.multiclass import check_classification_targets
from ..utils.random import sample_without_replacement
from ..utils._param_validation import Interval, HasMethods, StrOptions
from ..utils._param_validation import RealNotInt
from ..utils.validation import has_fit_parameter, check_is_fitted, _check_sample_weight
from ..utils._tags import _safe_tags
from ..utils.parallel import delayed, Parallel
Expand Down Expand Up @@ -248,11 +249,11 @@ class BaseBagging(BaseEnsemble, metaclass=ABCMeta):
"n_estimators": [Interval(Integral, 1, None, closed="left")],
"max_samples": [
Interval(Integral, 1, None, closed="left"),
Interval(Real, 0, 1, closed="right"),
Interval(RealNotInt, 0, 1, closed="right"),
],
"max_features": [
Interval(Integral, 1, None, closed="left"),
Interval(Real, 0, 1, closed="right"),
Interval(RealNotInt, 0, 1, closed="right"),
],
"bootstrap": ["boolean"],
"bootstrap_features": ["boolean"],
Expand Down
3 changes: 2 additions & 1 deletion sklearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
)
from ..utils.validation import _num_samples
from ..utils._param_validation import Interval, StrOptions
from ..utils._param_validation import RealNotInt


__all__ = [
Expand Down Expand Up @@ -206,7 +207,7 @@ class BaseForest(MultiOutputMixin, BaseEnsemble, metaclass=ABCMeta):
"warm_start": ["boolean"],
"max_samples": [
None,
Interval(Real, 0.0, 1.0, closed="right"),
Interval(RealNotInt, 0.0, 1.0, closed="right"),
Interval(Integral, 1, None, closed="left"),
],
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
_check_monotonic_cst,
)
from ...utils._param_validation import Interval, StrOptions
from ...utils._param_validation import RealNotInt
from ...utils._openmp_helpers import _openmp_effective_n_threads
from ...utils.multiclass import check_classification_targets
from ...metrics import check_scoring
Expand Down Expand Up @@ -99,7 +100,7 @@ class BaseHistGradientBoosting(BaseEstimator, ABC):
],
"n_iter_no_change": [Interval(Integral, 1, None, closed="left")],
"validation_fraction": [
Interval(Real, 0, 1, closed="neither"),
Interval(RealNotInt, 0, 1, closed="neither"),
Interval(Integral, 1, None, closed="left"),
None,
],
Expand Down
3 changes: 2 additions & 1 deletion sklearn/ensemble/_iforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
get_chunk_n_rows,
)
from ..utils._param_validation import Interval, StrOptions
from ..utils._param_validation import RealNotInt
from ..utils.validation import check_is_fitted, _num_samples
from ..base import OutlierMixin

Expand Down Expand Up @@ -206,7 +207,7 @@ class IsolationForest(OutlierMixin, BaseBagging):
"max_samples": [
StrOptions({"auto"}),
Interval(Integral, 1, None, closed="left"),
Interval(Real, 0, 1, closed="right"),
Interval(RealNotInt, 0, 1, closed="right"),
],
"contamination": [
StrOptions({"auto"}),
Expand Down
7 changes: 4 additions & 3 deletions sklearn/feature_extraction/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from ..base import BaseEstimator, TransformerMixin
from ..utils import check_array, check_random_state
from ..utils._param_validation import Hidden, Interval, validate_params
from ..utils._param_validation import RealNotInt

__all__ = [
"PatchExtractor",
Expand Down Expand Up @@ -343,8 +344,8 @@ def _extract_patches(arr, patch_shape=8, extraction_step=1):
"image": [np.ndarray],
"patch_size": [tuple, list],
"max_patches": [
Interval(Real, left=0, right=1, closed="neither"),
Interval(Integral, left=1, right=None, closed="left"),
Interval(RealNotInt, 0, 1, closed="neither"),
Interval(Integral, 1, None, closed="left"),
None,
],
"random_state": ["random_state"],
Expand Down Expand Up @@ -542,7 +543,7 @@ class PatchExtractor(TransformerMixin, BaseEstimator):
"patch_size": [tuple, None],
"max_patches": [
None,
Interval(Real, 0, 1, closed="neither"),
Interval(RealNotInt, 0, 1, closed="neither"),
Interval(Integral, 1, None, closed="left"),
],
"random_state": ["random_state"],
Expand Down
7 changes: 4 additions & 3 deletions sklearn/feature_extraction/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from collections import defaultdict
from collections.abc import Mapping
from functools import partial
from numbers import Integral, Real
from numbers import Integral
from operator import itemgetter
import re
import unicodedata
Expand All @@ -32,6 +32,7 @@
from ..utils import _IS_32BIT
from ..exceptions import NotFittedError
from ..utils._param_validation import StrOptions, Interval, HasMethods
from ..utils._param_validation import RealNotInt


__all__ = [
Expand Down Expand Up @@ -1148,11 +1149,11 @@ class CountVectorizer(_VectorizerMixin, BaseEstimator):
"ngram_range": [tuple],
"analyzer": [StrOptions({"word", "char", "char_wb"}), callable],
"max_df": [
Interval(Real, 0, 1, closed="both"),
Interval(RealNotInt, 0, 1, closed="both"),
Interval(Integral, 1, None, closed="left"),
],
"min_df": [
Interval(Real, 0, 1, closed="both"),
Interval(RealNotInt, 0, 1, closed="both"),
Interval(Integral, 1, None, closed="left"),
],
"max_features": [Interval(Integral, 1, None, closed="left"), None],
Expand Down
7 changes: 4 additions & 3 deletions sklearn/feature_selection/_rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
"""Recursive feature elimination for feature ranking"""

import numpy as np
from numbers import Integral, Real
from numbers import Integral
from joblib import effective_n_jobs


from ..utils.metaestimators import available_if
from ..utils.metaestimators import _safe_split
from ..utils._param_validation import HasMethods, Interval
from ..utils._param_validation import RealNotInt
from ..utils._tags import _safe_tags
from ..utils.validation import check_is_fitted
from ..utils.parallel import delayed, Parallel
Expand Down Expand Up @@ -187,12 +188,12 @@ class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
"estimator": [HasMethods(["fit"])],
"n_features_to_select": [
None,
Interval(Real, 0, 1, closed="right"),
Interval(RealNotInt, 0, 1, closed="right"),
Interval(Integral, 0, None, closed="neither"),
],
"step": [
Interval(Integral, 0, None, closed="neither"),
Interval(Real, 0, 1, closed="neither"),
Interval(RealNotInt, 0, 1, closed="neither"),
],
"verbose": ["verbose"],
"importance_getter": [str, callable],
Expand Down
3 changes: 2 additions & 1 deletion sklearn/feature_selection/_sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ._base import SelectorMixin
from ..base import BaseEstimator, MetaEstimatorMixin, clone
from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions
from ..utils._param_validation import RealNotInt
from ..utils._tags import _safe_tags
from ..utils.validation import check_is_fitted
from ..model_selection import cross_val_score
Expand Down Expand Up @@ -154,7 +155,7 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator
"estimator": [HasMethods(["fit"])],
"n_features_to_select": [
StrOptions({"auto", "warn"}, deprecated={"warn"}),
Interval(Real, 0, 1, closed="right"),
Interval(RealNotInt, 0, 1, closed="right"),
Interval(Integral, 0, None, closed="neither"),
Hidden(None),
],
Expand Down
3 changes: 2 additions & 1 deletion sklearn/linear_model/_ransac.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from ._base import LinearRegression
from ..utils.validation import has_fit_parameter
from ..utils._param_validation import Interval, Options, StrOptions, HasMethods, Hidden
from ..utils._param_validation import RealNotInt
from ..exceptions import ConvergenceWarning

_EPSILON = np.spacing(1)
Expand Down Expand Up @@ -236,7 +237,7 @@ class RANSACRegressor(
"estimator": [HasMethods(["fit", "score", "predict"]), None],
"min_samples": [
Interval(Integral, 1, None, closed="left"),
Interval(Real, 0, 1, closed="both"),
Interval(RealNotInt, 0, 1, closed="both"),
None,
],
"residual_threshold": [Interval(Real, 0, None, closed="left"), None],
Expand Down
5 changes: 3 additions & 2 deletions sklearn/model_selection/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from ..utils.validation import check_array
from ..utils.multiclass import type_of_target
from ..utils._param_validation import validate_params, Interval
from ..utils._param_validation import RealNotInt

__all__ = [
"BaseCrossValidator",
Expand Down Expand Up @@ -2464,12 +2465,12 @@ def check_cv(cv=5, y=None, *, classifier=False):
@validate_params(
{
"test_size": [
Interval(numbers.Real, 0, 1, closed="neither"),
Interval(RealNotInt, 0, 1, closed="neither"),
Interval(numbers.Integral, 1, None, closed="left"),
None,
],
"train_size": [
Interval(numbers.Real, 0, 1, closed="neither"),
Interval(RealNotInt, 0, 1, closed="neither"),
Interval(numbers.Integral, 1, None, closed="left"),
None,
],
Expand Down
5 changes: 3 additions & 2 deletions sklearn/preprocessing/_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# License: BSD 3 clause

import numbers
from numbers import Integral, Real
from numbers import Integral
import warnings

import numpy as np
Expand All @@ -14,6 +14,7 @@
from ..utils.validation import check_is_fitted
from ..utils.validation import _check_feature_names_in
from ..utils._param_validation import Interval, StrOptions, Hidden
from ..utils._param_validation import RealNotInt
from ..utils._mask import _get_mask

from ..utils._encode import _encode, _check_unknown, _unique, _get_counts
Expand Down Expand Up @@ -493,7 +494,7 @@ class OneHotEncoder(_BaseEncoder):
"max_categories": [Interval(Integral, 1, None, closed="left"), None],
"min_frequency": [
Interval(Integral, 1, None, closed="left"),
Interval(Real, 0, 1, closed="neither"),
Interval(RealNotInt, 0, 1, closed="neither"),
None,
],
"sparse": [Hidden(StrOptions({"deprecated"})), "boolean"], # deprecated
Expand Down
18 changes: 17 additions & 1 deletion sklearn/tests/test_public_functions.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from importlib import import_module
from inspect import signature
from numbers import Integral, Real

import pytest

from sklearn.utils._param_validation import generate_invalid_param_val
from sklearn.utils._param_validation import generate_valid_param
from sklearn.utils._param_validation import make_constraint
from sklearn.utils._param_validation import InvalidParameterError
from sklearn.utils._param_validation import Interval


def _get_func_info(func_module):
Expand Down Expand Up @@ -70,6 +72,20 @@ def _check_function_param_validation(
# This parameter is not validated
continue

# Mixing an interval of reals and an interval of integers must be avoided.
if any(
isinstance(constraint, Interval) and constraint.type == Integral
for constraint in constraints
) and any(
isinstance(constraint, Interval) and constraint.type == Real
for constraint in constraints
):
raise ValueError(
f"The constraint for parameter {param_name} of {func_name} can't have a"
" mix of intervals of Integral and Real types. Use the type"
" RealNotInt instead of Real."
)

match = (
rf"The '{param_name}' parameter of {func_name} must be .* Got .* instead."
)
Expand All @@ -85,7 +101,7 @@ def _check_function_param_validation(

for constraint in constraints:
try:
bad_value = generate_invalid_param_val(constraint, constraints)
bad_value = generate_invalid_param_val(constraint)
except NotImplementedError:
continue

Expand Down

0 comments on commit 9d55835

Please sign in to comment.