Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022
diff --git a/doc/whats_new/v0.10.rst b/doc/whats_new/v0.10.rst
 - Add support for automatic parameters validation as in scikit-learn >= 1.2.
 :pr:`955` by:user:`Guillaume Lemaitre <glemaitre>`.

 - Add support for `feature_names_in_` as well as `get_feature_names_out` for
  all samplers.
 :pr:`959` by:user:`Guillaume Lemaitre <glemaitre>`.

 Deprecation
 ...........

diff --git a/imblearn/base.py b/imblearn/base.py

 importnumpyasnp
 fromsklearn.baseimportBaseEstimator

 try:
 # scikit-learn >= 1.2
 fromsklearn.baseimportOneToOneFeatureMixin
 exceptImportError:
 fromsklearn.baseimport_OneToOneFeatureMixinasOneToOneFeatureMixin
 fromsklearn.preprocessingimportlabel_binarize
 fromsklearn.utils.multiclassimportcheck_classification_targets

            )


 classBaseSampler(SamplerMixin,_ParamsValidationMixin):
 classBaseSampler(SamplerMixin,OneToOneFeatureMixin,_ParamsValidationMixin):
 """Base class for sampling algorithms.

    Warning: This class should not be used directly. Use the derive classes

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    sklearn.preprocessing.FunctionTransfomer : Stateless transformer.
diff --git a/imblearn/combine/_smote_enn.py b/imblearn/combine/_smote_enn.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    SMOTETomek : Over-sample using SMOTE followed by under-sampling removing
diff --git a/imblearn/combine/_smote_tomek.py b/imblearn/combine/_smote_tomek.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    SMOTEENN : Over-sample using SMOTE followed by under-sampling using Edited
diff --git a/imblearn/metrics/pairwise.py b/imblearn/metrics/pairwise.py
        List of length `n_features` containing the conditional probabilities
        for each category given a class.

    n_features_in_ : int
        Number of features in the input dataset.

        .. versionadded:: 0.10

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    sklearn.neighbors.DistanceMetric : Interface for fast metric computation.
diff --git a/imblearn/over_sampling/_adasyn.py b/imblearn/over_sampling/_adasyn.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    SMOTE : Over-sample using SMOTE.
diff --git a/imblearn/over_sampling/_random_over_sampler.py b/imblearn/over_sampling/_random_over_sampler.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    BorderlineSMOTE : Over-sample using the borderline-SMOTE variant.
diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    SMOTENC : Over-sample using SMOTE for continuous and categorical features.

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    SMOTE : Over-sample using SMOTE.

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    SMOTE : Over-sample using SMOTE.
diff --git a/imblearn/over_sampling/_smote/cluster.py b/imblearn/over_sampling/_smote/cluster.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    SMOTE : Over-sample using SMOTE.
diff --git a/imblearn/over_sampling/_smote/filter.py b/imblearn/over_sampling/_smote/filter.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    SMOTE : Over-sample using SMOTE.

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    SMOTE : Over-sample using SMOTE.
diff --git a/imblearn/tests/test_common.py b/imblearn/tests/test_common.py
 #          Christos Aridas
 # License: MIT

 importwarnings
 fromcollectionsimportOrderedDict

 importnumpyasnp
 fromimblearn.under_samplingimportNearMiss,RandomUnderSampler
 fromimblearn.utils.estimator_checksimport (
 _set_checking_parameters,
 check_dataframe_column_names_consistency,
 check_param_validation,
 parametrize_with_checks,
 )
 X_res,y_res=sampler.fit_resample(X,y)
 assertX_res.shape[0]==sum(strategy.values())
 asserty_res.shape[0]==sum(strategy.values())


 @pytest.mark.parametrize(
 "estimator",_tested_estimators(),ids=_get_check_estimator_ids
 )
 deftest_pandas_column_name_consistency(estimator):
 _set_checking_parameters(estimator)
 withignore_warnings(category=(FutureWarning)):
 withwarnings.catch_warnings(record=True)asrecord:
 check_dataframe_column_names_consistency(
 estimator.__class__.__name__,estimator
            )
 forwarninginrecord:
 assert"was fitted without feature names"notinstr(warning.message)
diff --git a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    EditedNearestNeighbours : Under-sampling by editing samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py b/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    EditedNearestNeighbours : Undersample by editing samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py b/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    CondensedNearestNeighbour : Undersample by condensing samples.

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    CondensedNearestNeighbour : Undersample by condensing samples.

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    CondensedNearestNeighbour: Under-sampling by condensing samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    NearMiss : Undersample based on near-miss search.
diff --git a/imblearn/under_sampling/_prototype_selection/_nearmiss.py b/imblearn/under_sampling/_prototype_selection/_nearmiss.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    RandomUnderSampler : Random undersample the dataset.
diff --git a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    EditedNearestNeighbours : Undersample by editing noisy samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py b/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    EditedNearestNeighbours : Undersample by editing noisy samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    NearMiss : Undersample using near-miss samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_tomek_links.py b/imblearn/under_sampling/_prototype_selection/_tomek_links.py

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    EditedNearestNeighbours : Undersample by samples edition.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -22,6 +22,10 @@ Compatibility
		- Add support for automatic parameters validation as in scikit-learn >= 1.2.
		:pr:`955` by:user:`Guillaume Lemaitre <glemaitre>`.

		- Add support for `feature_names_in_` as well as `get_feature_names_out` for
		all samplers.
		:pr:`959` by:user:`Guillaume Lemaitre <glemaitre>`.

		Deprecation
		...........

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,6 +8,12 @@

		importnumpyasnp
		fromsklearn.baseimportBaseEstimator

		try:
		# scikit-learn >= 1.2
		fromsklearn.baseimportOneToOneFeatureMixin
		exceptImportError:
		fromsklearn.baseimport_OneToOneFeatureMixinasOneToOneFeatureMixin
		fromsklearn.preprocessingimportlabel_binarize
		fromsklearn.utils.multiclassimportcheck_classification_targets

Expand DownExpand Up		@@ -133,7 +139,7 @@ class attribute, which is a dictionary `param_name: list of constraints`. See
		)


		classBaseSampler(SamplerMixin,_ParamsValidationMixin):
		classBaseSampler(SamplerMixin,OneToOneFeatureMixin,_ParamsValidationMixin):
		"""Base class for sampling algorithms.

		Warning: This class should not be used directly. Use the derive classes
Expand DownExpand Up		@@ -260,6 +266,12 @@ class FunctionSampler(BaseSampler):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		sklearn.preprocessing.FunctionTransfomer : Stateless transformer.
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -67,6 +67,12 @@ class SMOTEENN(BaseSampler):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		SMOTETomek : Over-sample using SMOTE followed by under-sampling removing
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -66,6 +66,12 @@ class SMOTETomek(BaseSampler):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		SMOTEENN : Over-sample using SMOTE followed by under-sampling using Edited
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -71,6 +71,17 @@ class ValueDifferenceMetric(BaseEstimator, _ParamsValidationMixin):
		List of length `n_features` containing the conditional probabilities
		for each category given a class.

		n_features_in_ : int
		Number of features in the input dataset.

		.. versionadded:: 0.10

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		sklearn.neighbors.DistanceMetric : Interface for fast metric computation.
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -73,6 +73,12 @@ class ADASYN(BaseOverSampler):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		SMOTE : Over-sample using SMOTE.
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -76,6 +76,12 @@ class RandomOverSampler(BaseOverSampler):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		BorderlineSMOTE : Over-sample using the borderline-SMOTE variant.
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -264,6 +264,12 @@ class SMOTE(BaseSMOTE):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		SMOTENC : Over-sample using SMOTE for continuous and categorical features.
Expand DownExpand Up		@@ -442,6 +448,12 @@ class SMOTENC(SMOTE):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		SMOTE : Over-sample using SMOTE.
Expand DownExpand Up		@@ -759,6 +771,12 @@ class SMOTEN(SMOTE):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		SMOTE : Over-sample using SMOTE.
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -93,6 +93,12 @@ class KMeansSMOTE(BaseSMOTE):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		SMOTE : Over-sample using SMOTE.
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -100,6 +100,12 @@ class BorderlineSMOTE(BaseSMOTE):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		SMOTE : Over-sample using SMOTE.
Expand DownExpand Up		@@ -352,6 +358,12 @@ class SVMSMOTE(BaseSMOTE):

		.. versionadded:: 0.9

		feature_names_in_ : ndarray of shape (`n_features_in_`,)
		Names of features seen during `fit`. Defined only when `X` has feature
		names that are all strings.

		.. versionadded:: 0.10

		See Also
		--------
		SMOTE : Over-sample using SMOTE.
Expand Down