Skip to content

Commit

Permalink
ENH Raise NotFittedError in get_feature_names_out for MissingIndicato…
Browse files Browse the repository at this point in the history
…r, KBinsDiscretizer, SplineTransformer, DictVectorizer (#25402)

Co-authored-by: Alex <alex.buzenet.fr@gmail.com>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
  • Loading branch information
3 people committed Jan 23, 2023
1 parent aea3cfb commit d85b1d3
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 5 deletions.
6 changes: 5 additions & 1 deletion doc/whats_new/v1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,27 +36,31 @@ Changes impacting all modules
raises a `NotFittedError` if the instance is not fitted. This ensures the error is
consistent in all estimators with the `get_feature_names_out` method.

- :class:`impute.MissingIndicator`
- :class:`feature_extraction.DictVectorizer`
- :class:`feature_extraction.text.TfidfTransformer`
- :class:`kernel_approximation.AdditiveChi2Sampler`
- :class:`impute.IterativeImputer`
- :class:`impute.KNNImputer`
- :class:`impute.SimpleImputer`
- :class:`isotonic.IsotonicRegression`
- :class:`preprocessing.Binarizer`
- :class:`preprocessing.KBinsDiscretizer`
- :class:`preprocessing.MaxAbsScaler`
- :class:`preprocessing.MinMaxScaler`
- :class:`preprocessing.Normalizer`
- :class:`preprocessing.OrdinalEncoder`
- :class:`preprocessing.PowerTransformer`
- :class:`preprocessing.QuantileTransformer`
- :class:`preprocessing.RobustScaler`
- :class:`preprocessing.SplineTransformer`
- :class:`preprocessing.StandardScaler`

The `NotFittedError` displays an informative message asking to fit the instance
with the appropriate arguments.

:pr:`25294` by :user:`John Pangas <jpangas>` and :pr:`25291`, :pr:`25367` by
:user:`Rahil Parikh <rprkh>`.
:user:`Rahil Parikh <rprkh>` and :pr:`25402` by :user:`Alex Buzenet <albuzenet>`.

Changelog
---------
Expand Down
2 changes: 2 additions & 0 deletions sklearn/feature_extraction/_dict_vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from ..base import BaseEstimator, TransformerMixin
from ..utils import check_array
from ..utils.validation import check_is_fitted


class DictVectorizer(TransformerMixin, BaseEstimator):
Expand Down Expand Up @@ -384,6 +385,7 @@ def get_feature_names_out(self, input_features=None):
feature_names_out : ndarray of str objects
Transformed feature names.
"""
check_is_fitted(self, "feature_names_")
if any(not isinstance(name, str) for name in self.feature_names_):
feature_names = [str(name) for name in self.feature_names_]
else:
Expand Down
4 changes: 4 additions & 0 deletions sklearn/impute/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,9 @@ def _fit(self, X, y=None, precomputed=False):
# in the Imputer calling MissingIndicator
if not self._precomputed:
X = self._validate_input(X, in_fit=True)
else:
# only create `n_features_in_` in the precomputed case
self._check_n_features(X, reset=True)

self._n_features = X.shape[1]

Expand Down Expand Up @@ -1054,6 +1057,7 @@ def get_feature_names_out(self, input_features=None):
feature_names_out : ndarray of str objects
Transformed feature names.
"""
check_is_fitted(self, "n_features_in_")
input_features = _check_feature_names_in(self, input_features)
prefix = self.__class__.__name__.lower()
return np.asarray(
Expand Down
1 change: 1 addition & 0 deletions sklearn/preprocessing/_discretization.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,7 @@ def get_feature_names_out(self, input_features=None):
feature_names_out : ndarray of str objects
Transformed feature names.
"""
check_is_fitted(self, "n_features_in_")
input_features = _check_feature_names_in(self, input_features)
if hasattr(self, "_encoder"):
return self._encoder.get_feature_names_out(input_features)
Expand Down
1 change: 1 addition & 0 deletions sklearn/preprocessing/_polynomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,7 @@ def get_feature_names_out(self, input_features=None):
feature_names_out : ndarray of str objects
Transformed feature names.
"""
check_is_fitted(self, "n_features_in_")
n_splines = self.bsplines_[0].c.shape[1]

input_features = _check_feature_names_in(self, input_features)
Expand Down
4 changes: 0 additions & 4 deletions sklearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,11 +463,8 @@ def test_transformers_get_feature_names_out(transformer):
]

WHITELISTED_FAILING_ESTIMATORS = [
"DictVectorizer",
"GaussianRandomProjection",
"GenericUnivariateSelect",
"KBinsDiscretizer",
"MissingIndicator",
"RFE",
"RFECV",
"SelectFdr",
Expand All @@ -478,7 +475,6 @@ def test_transformers_get_feature_names_out(transformer):
"SelectPercentile",
"SequentialFeatureSelector",
"SparseRandomProjection",
"SplineTransformer",
"VarianceThreshold",
]

Expand Down

0 comments on commit d85b1d3

Please sign in to comment.