ENH Raise NotFittedError in get_feature_names_out for MissingIndicato…

…r, KBinsDiscretizer, SplineTransformer, DictVectorizer (#25402) Co-authored-by: Alex <alex.buzenet.fr@gmail.com> Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
scikit-learn · Jan 23, 2023 · d85b1d3 · d85b1d3
1 parent aea3cfb
commit d85b1d3
Show file tree

Hide file tree

Showing 6 changed files with 13 additions and 5 deletions.
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -36,27 +36,31 @@ Changes impacting all modules
   raises a `NotFittedError` if the instance is not fitted. This ensures the error is
   consistent in all estimators with the `get_feature_names_out` method.
 
+  - :class:`impute.MissingIndicator`
+  - :class:`feature_extraction.DictVectorizer`
   - :class:`feature_extraction.text.TfidfTransformer`
   - :class:`kernel_approximation.AdditiveChi2Sampler`
   - :class:`impute.IterativeImputer`
   - :class:`impute.KNNImputer`
   - :class:`impute.SimpleImputer`
   - :class:`isotonic.IsotonicRegression`
   - :class:`preprocessing.Binarizer`
+  - :class:`preprocessing.KBinsDiscretizer`
   - :class:`preprocessing.MaxAbsScaler`
   - :class:`preprocessing.MinMaxScaler`
   - :class:`preprocessing.Normalizer`
   - :class:`preprocessing.OrdinalEncoder`
   - :class:`preprocessing.PowerTransformer`
   - :class:`preprocessing.QuantileTransformer`
   - :class:`preprocessing.RobustScaler`
+  - :class:`preprocessing.SplineTransformer`
   - :class:`preprocessing.StandardScaler`
 
   The `NotFittedError` displays an informative message asking to fit the instance
   with the appropriate arguments.
 
   :pr:`25294` by :user:`John Pangas <jpangas>` and :pr:`25291`, :pr:`25367` by
-  :user:`Rahil Parikh <rprkh>`.
+  :user:`Rahil Parikh <rprkh>` and :pr:`25402` by :user:`Alex Buzenet <albuzenet>`.
 
 Changelog
 ---------

diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py
@@ -12,6 +12,7 @@
 
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import check_array
+from ..utils.validation import check_is_fitted
 
 
 class DictVectorizer(TransformerMixin, BaseEstimator):
@@ -384,6 +385,7 @@ def get_feature_names_out(self, input_features=None):
         feature_names_out : ndarray of str objects
             Transformed feature names.
         """
+        check_is_fitted(self, "feature_names_")
         if any(not isinstance(name, str) for name in self.feature_names_):
             feature_names = [str(name) for name in self.feature_names_]
         else:

diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
@@ -937,6 +937,9 @@ def _fit(self, X, y=None, precomputed=False):
         # in the Imputer calling MissingIndicator
         if not self._precomputed:
             X = self._validate_input(X, in_fit=True)
+        else:
+            # only create `n_features_in_` in the precomputed case
+            self._check_n_features(X, reset=True)
 
         self._n_features = X.shape[1]
 
@@ -1054,6 +1057,7 @@ def get_feature_names_out(self, input_features=None):
         feature_names_out : ndarray of str objects
             Transformed feature names.
         """
+        check_is_fitted(self, "n_features_in_")
         input_features = _check_feature_names_in(self, input_features)
         prefix = self.__class__.__name__.lower()
         return np.asarray(

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
@@ -448,6 +448,7 @@ def get_feature_names_out(self, input_features=None):
         feature_names_out : ndarray of str objects
             Transformed feature names.
         """
+        check_is_fitted(self, "n_features_in_")
         input_features = _check_feature_names_in(self, input_features)
         if hasattr(self, "_encoder"):
             return self._encoder.get_feature_names_out(input_features)

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
@@ -673,6 +673,7 @@ def get_feature_names_out(self, input_features=None):
         feature_names_out : ndarray of str objects
             Transformed feature names.
         """
+        check_is_fitted(self, "n_features_in_")
         n_splines = self.bsplines_[0].c.shape[1]
 
         input_features = _check_feature_names_in(self, input_features)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -463,11 +463,8 @@ def test_transformers_get_feature_names_out(transformer):
 ]
 
 WHITELISTED_FAILING_ESTIMATORS = [
-    "DictVectorizer",
     "GaussianRandomProjection",
     "GenericUnivariateSelect",
-    "KBinsDiscretizer",
-    "MissingIndicator",
     "RFE",
     "RFECV",
     "SelectFdr",
@@ -478,7 +475,6 @@ def test_transformers_get_feature_names_out(transformer):
     "SelectPercentile",
     "SequentialFeatureSelector",
     "SparseRandomProjection",
-    "SplineTransformer",
     "VarianceThreshold",
 ]