Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022
diff --git a/README.rst b/README.rst
 .. |PythonMinVersion| replace:: 3.8
 .. |NumPyMinVersion| replace:: 1.17.3
 .. |SciPyMinVersion| replace:: 1.3.2
 .. |ScikitLearnMinVersion| replace:: 1.1.0
 .. |ScikitLearnMinVersion| replace:: 1.1.3
 .. |MatplotlibMinVersion| replace:: 3.1.2
 .. |PandasMinVersion| replace:: 1.0.5
 .. |TensorflowMinVersion| replace:: 2.4.3
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
        black --check --diff .
      displayName: Run black
    - bash: |
        ./build_tools/circle/linting.sh
        ./build_tools/azure/linting.sh
      displayName: Run linting
    - bash: |
        mypy imblearn/
        ne(variables['Build.Reason'], 'Schedule')
      )
    matrix:
 py37_conda_forge_openblas_ubuntu_1804:
 py38_conda_forge_openblas_ubuntu_1804:
        DISTRIB: 'conda'
        CONDA_CHANNEL: 'conda-forge'
        PYTHON_VERSION: '3.8'
        THREADPOOLCTL_VERSION: 'min'
        COVERAGE: 'false'
      # Linux + Python 3.8 build with OpenBLAS and without SITE_JOBLIB
 py37_conda_defaults_openblas:
 py38_conda_defaults_openblas:
        DISTRIB: 'conda'
        CONDA_CHANNEL: 'conda-forge'
        PYTHON_VERSION: '3.8'
        BLAS: 'openblas'
        NUMPY_VERSION: '1.19.5'  # we cannot get an older version of the dependencies resolution
        NUMPY_VERSION: '1.21.0'  # we cannot get an older version of the dependencies resolution
        SCIPY_VERSION: 'min'
        SKLEARN_VERSION: 'min'
        MATPLOTLIB_VERSION: 'none'
        PYTHON_ARCH: '64'
        PYTEST_VERSION: '*'
        COVERAGE: 'true'
      py38_pip_openblas_32bit:
        PYTHON_VERSION: '3.8'
        PYTHON_ARCH: '32'
diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh
    make_conda "python=$PYTHON_VERSION"
    python -m pip install -U pip

    python -m pip install scikit-learn pandas matplotlib
    python -m pip install pandas matplotlib
    python -m pip install --pre scikit-learn

 elif [[ "$DISTRIB" == "conda-pip-latest-tensorflow" ]]; then
    make_conda "python=$PYTHON_VERSION"
diff --git a/build_tools/azure/linting.sh b/build_tools/azure/linting.sh
 #!/bin/bash

 set -e
 # pipefail is necessary to propagate exit codes
 set -o pipefail

 flake8 --show-source .
 echo -e "No problem detected by flake8\n"

 # For docstrings and warnings of deprecated attributes to be rendered
 # properly, the property decorator must come before the deprecated decorator
 # (else they are treated as functions)

 # do not error when grep -B1 "@property" finds nothing
 set +e
 bad_deprecation_property_order=`git grep -A 10 "@property"  -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"`

 if [ ! -z "$bad_deprecation_property_order" ]
 then
    echo "property decorator should come before deprecated decorator"
    echo "found the following occurrencies:"
    echo $bad_deprecation_property_order
    exit 1
 fi

 # Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE

 doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS|NORMALIZE_WHITESPACE)")"

 if [ ! -z "$doctest_directive" ]
 then
    echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:"
    echo "$doctest_directive"
    exit 1
 fi

 joblib_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/fixes.py")"

 if [ ! -z "$joblib_import" ]; then
    echo "Use from sklearn.utils.fixes import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:"
    echo "$joblib_import"
    exit 1
 fi
diff --git a/doc/ensemble.rst b/doc/ensemble.rst
  >>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
  >>> bc = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
  ...                        random_state=0)
  >>> bc.fit(X_train, y_train) #doctest: +ELLIPSIS
  >>> bc.fit(X_train, y_train) #doctest:
  BaggingClassifier(...)
  >>> y_pred = bc.predict(X_test)
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
  0.77...

 In :class:`BalancedBaggingClassifier`, each bootstrap sample will be further
  ...                                 sampling_strategy='auto',
  ...                                 replacement=False,
  ...                                 random_state=0)
  >>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
  >>> bbc.fit(X_train, y_train) # doctest:
  BalancedBaggingClassifier(...)
  >>> y_pred = bbc.predict(X_test)
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
  0.8...

 Changing the `sampler` will give rise to different known implementation

  >>> from imblearn.ensemble import BalancedRandomForestClassifier
  >>> brf = BalancedRandomForestClassifier(n_estimators=100, random_state=0)
  >>> brf.fit(X_train, y_train) # doctest: +ELLIPSIS
  >>> brf.fit(X_train, y_train) # doctest:
  BalancedRandomForestClassifier(...)
  >>> y_pred = brf.predict(X_test)
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
  0.8...

 .. _boosting:
  >>> from imblearn.ensemble import RUSBoostClassifier
  >>> rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R',
  ...                               random_state=0)
  >>> rusboost.fit(X_train, y_train)  # doctest: +ELLIPSIS
  >>> rusboost.fit(X_train, y_train)  # doctest:
  RUSBoostClassifier(...)
  >>> y_pred = rusboost.predict(X_test)
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
  0...

 A specific method which uses :class:`~sklearn.ensemble.AdaBoostClassifier` as

  >>> from imblearn.ensemble import EasyEnsembleClassifier
  >>> eec = EasyEnsembleClassifier(random_state=0)
  >>> eec.fit(X_train, y_train) # doctest: +ELLIPSIS
  >>> eec.fit(X_train, y_train) # doctest:
  EasyEnsembleClassifier(...)
  >>> y_pred = eec.predict(X_test)
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
  >>> balanced_accuracy_score(y_test, y_pred)  # doctest:
  0.6...

 .. topic:: Examples
diff --git a/doc/over_sampling.rst b/doc/over_sampling.rst

  >>> from sklearn.svm import LinearSVC
  >>> clf = LinearSVC()
  >>> clf.fit(X_resampled, y_resampled) # doctest : +ELLIPSIS
  >>> clf.fit(X_resampled, y_resampled)
  LinearSVC(...)

 In the figure below, we compare the decision functions of a classifier trained
diff --git a/imblearn/_min_dependencies.py b/imblearn/_min_dependencies.py
 NUMPY_MIN_VERSION = "1.17.3"
 SCIPY_MIN_VERSION = "1.3.2"
 PANDAS_MIN_VERSION = "1.0.5"
 SKLEARN_MIN_VERSION = "1.1.0"
 SKLEARN_MIN_VERSION = "1.1.3"
 TENSORFLOW_MIN_VERSION = "2.4.3"
 KERAS_MIN_VERSION = "2.4.3"
 JOBLIB_MIN_VERSION = "1.0.0"
 JOBLIB_MIN_VERSION = "1.1.1"
 THREADPOOLCTL_MIN_VERSION = "2.0.0"
 PYTEST_MIN_VERSION = "5.0.1"

diff --git a/imblearn/combine/_smote_enn.py b/imblearn/combine/_smote_enn.py

    >>> from collections import Counter
    >>> from sklearn.datasets import make_classification
    >>> from imblearn.combine import SMOTEENN # doctest: +NORMALIZE_WHITESPACE
    >>> from imblearn.combine import SMOTEENN # doctest:
    >>> X, y = make_classification(n_classes=2, class_sep=2,
    ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
    ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
diff --git a/imblearn/combine/_smote_tomek.py b/imblearn/combine/_smote_tomek.py
    >>> from collections import Counter
    >>> from sklearn.datasets import make_classification
    >>> from imblearn.combine import \
 SMOTETomek # doctest: +NORMALIZE_WHITESPACE
 SMOTETomek # doctest:
    >>> X, y = make_classification(n_classes=2, class_sep=2,
    ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
    ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
 #          Christos Aridas
 # License: MIT

 import inspect
 import numbers
 import warnings

 import numpy as np


    Parameters
    ----------
 base_estimator : estimator object, default=None
 estimator : estimator object, default=None
        The base estimator to fit on random subsets of the dataset.
        If None, then the base estimator is a decision tree.

        .. versionadded:: 0.10

    n_estimators : int, default=10
        The number of base estimators in the ensemble.


        .. versionadded:: 0.8

    base_estimator : estimator object, default=None
        The base estimator to fit on random subsets of the dataset.
        If None, then the base estimator is a decision tree.

        .. deprecated:: 0.10
           `base_estimator` was renamed to `estimator` in version 0.10 and
           will be removed in 0.12.

    Attributes
    ----------
    estimator_ : estimator
        The base estimator from which the ensemble is grown.

        .. versionadded:: 0.10

    base_estimator_ : estimator
        The base estimator from which the ensemble is grown.

        .. deprecated:: 1.2
           `base_estimator_` is deprecated in `scikit-learn` 1.2 and will be
           removed in 1.4. Use `estimator_` instead. When the minimum version
           of `scikit-learn` supported by `imbalanced-learn` will reach 1.4,
           this attribute will be removed.

    n_features_ : int
        The number of features when `fit` is performed.

        .. deprecated:: 1.0
           `n_features_` is deprecated in `scikit-learn` 1.0 and will be removed
           in version 1.2.Depending ofthe version of `scikit-learn`installed,
 youwillget be warned or not.
           in version 1.2.Whentheminimumversion of `scikit-learn`supported
 by `imbalanced-learn`willreach 1.2, this attribute will be removed.

    estimators_ : list of estimators
        The collection of fitted base estimators.
    >>> from sklearn.model_selection import train_test_split
    >>> from sklearn.metrics import confusion_matrix
    >>> from imblearn.ensemble import \
 BalancedBaggingClassifier # doctest: +NORMALIZE_WHITESPACE
 BalancedBaggingClassifier # doctest:
    >>> X, y = make_classification(n_classes=2, class_sep=2,
    ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
    ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
    ...                                                     random_state=0)
    >>> bbc = BalancedBaggingClassifier(random_state=42)
    >>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
    >>> bbc.fit(X_train, y_train) # doctest:
    BalancedBaggingClassifier(...)
    >>> y_pred = bbc.predict(X_test)
    >>> print(confusion_matrix(y_test, y_pred))
    @_deprecate_positional_args
    def __init__(
        self,
 base_estimator=None,
 estimator=None,
        n_estimators=10,
        *,
        max_samples=1.0,
        random_state=None,
        verbose=0,
        sampler=None,
        base_estimator="deprecated",
    ):
        # TODO: remove when supporting scikit-learn>=1.2
        bagging_classifier_signature = inspect.signature(super().__init__)
        estimator_params = {"base_estimator": base_estimator}
        if "estimator" in bagging_classifier_signature.parameters:
            estimator_params["estimator"] = estimator
        else:
            self.estimator = estimator

        super().__init__(
 base_estimator,
 **estimator_params,
            n_estimators=n_estimators,
            max_samples=max_samples,
            max_features=max_features,
                f"n_estimators must be greater than zero, " f"got {self.n_estimators}."
            )

        if self.base_estimator is not None:
        if self.estimator is not None and (
            self.base_estimator not in [None, "deprecated"]
        ):
            raise ValueError(
                "Both `estimator` and `base_estimator` were set. Only set `estimator`."
            )

        if self.estimator is not None:
            base_estimator = clone(self.estimator)
        elif self.base_estimator not in [None, "deprecated"]:
            warnings.warn(
                "`base_estimator` was renamed to `estimator` in version 0.10 and "
                "will be removed in 0.12.",
                FutureWarning,
            )
            base_estimator = clone(self.base_estimator)
        else:
            base_estimator = clone(default)

        if self.sampler_._sampling_type != "bypass":
            self.sampler_.set_params(sampling_strategy=self._sampling_strategy)

        self.base_estimator_ = Pipeline(
            [
                ("sampler", self.sampler_),
                ("classifier", base_estimator),
            ]
        self._estimator = Pipeline(
            [("sampler", self.sampler_), ("classifier", base_estimator)]
        )
        try:
            # scikit-learn < 1.2
            self.base_estimator_ = self._estimator
        except AttributeError:
            pass

    # TODO: remove when supporting scikit-learn>=1.4
    @property
    def estimator_(self):
        """Estimator used to grow the ensemble."""
        return self._estimator

    # TODO: remove when supporting scikit-learn>=1.2
    @property
    def n_features_(self):
        """Number of features when ``fit`` is performed."""
        warnings.warn(
            "`n_features_` was deprecated in scikit-learn 1.0. This attribute will "
            "not be accessible when the minimum supported version of scikit-learn "
            "is 1.2.",
            FutureWarning,
        )
        return self.n_features_in_

    def fit(self, X, y):
        """Build a Bagging ensemble of estimators from the training set (X, y).
Original file line number	Diff line number	Diff line change
Expand Up		@@ -30,7 +30,7 @@
		.. \|PythonMinVersion\| replace:: 3.8
		.. \|NumPyMinVersion\| replace:: 1.17.3
		.. \|SciPyMinVersion\| replace:: 1.3.2
		.. \|ScikitLearnMinVersion\| replace:: 1.1.0
		.. \|ScikitLearnMinVersion\| replace:: 1.1.3
		.. \|MatplotlibMinVersion\| replace:: 3.1.2
		.. \|PandasMinVersion\| replace:: 1.0.5
		.. \|TensorflowMinVersion\| replace:: 2.4.3
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -51,7 +51,7 @@ jobs:
		black --check --diff .
		displayName: Run black
		- bash: \|
		./build_tools/circle/linting.sh
		./build_tools/azure/linting.sh
		displayName: Run linting
		- bash: \|
		mypy imblearn/
Expand DownExpand Up		@@ -112,7 +112,7 @@ jobs:
		ne(variables['Build.Reason'], 'Schedule')
		)
		matrix:
		py37_conda_forge_openblas_ubuntu_1804:
		py38_conda_forge_openblas_ubuntu_1804:
		DISTRIB: 'conda'
		CONDA_CHANNEL: 'conda-forge'
		PYTHON_VERSION: '3.8'
Expand DownExpand Up		@@ -141,12 +141,12 @@ jobs:
		THREADPOOLCTL_VERSION: 'min'
		COVERAGE: 'false'
		# Linux + Python 3.8 build with OpenBLAS and without SITE_JOBLIB
		py37_conda_defaults_openblas:
		py38_conda_defaults_openblas:
		DISTRIB: 'conda'
		CONDA_CHANNEL: 'conda-forge'
		PYTHON_VERSION: '3.8'
		BLAS: 'openblas'
		NUMPY_VERSION: '1.19.5' # we cannot get an older version of the dependencies resolution
		NUMPY_VERSION: '1.21.0' # we cannot get an older version of the dependencies resolution
		SCIPY_VERSION: 'min'
		SKLEARN_VERSION: 'min'
		MATPLOTLIB_VERSION: 'none'
Expand DownExpand Up		@@ -275,6 +275,3 @@ jobs:
		PYTHON_ARCH: '64'
		PYTEST_VERSION: '*'
		COVERAGE: 'true'
		py38_pip_openblas_32bit:
		PYTHON_VERSION: '3.8'
		PYTHON_ARCH: '32'
Original file line number	Diff line number	Diff line change
Expand Up		@@ -67,7 +67,8 @@ elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
		make_conda "python=$PYTHON_VERSION"
		python -m pip install -U pip

		python -m pip install scikit-learn pandas matplotlib
		python -m pip install pandas matplotlib
		python -m pip install --pre scikit-learn

		elif [[ "$DISTRIB" == "conda-pip-latest-tensorflow" ]]; then
		make_conda "python=$PYTHON_VERSION"
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,43 @@
		#!/bin/bash

		set -e
		# pipefail is necessary to propagate exit codes
		set -o pipefail

		flake8 --show-source .
		echo -e "No problem detected by flake8\n"

		# For docstrings and warnings of deprecated attributes to be rendered
		# properly, the property decorator must come before the deprecated decorator
		# (else they are treated as functions)

		# do not error when grep -B1 "@property" finds nothing
		set +e
		bad_deprecation_property_order=`git grep -A 10 "@property" -- "*.py" \| awk '/@property/,/def /' \| grep -B1 "@deprecated"`

		if [ ! -z "$bad_deprecation_property_order" ]
		then
		echo "property decorator should come before deprecated decorator"
		echo "found the following occurrencies:"
		echo $bad_deprecation_property_order
		exit 1
		fi

		# Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE

		doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS\|NORMALIZE_WHITESPACE)")"

		if [ ! -z "$doctest_directive" ]
		then
		echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:"
		echo "$doctest_directive"
		exit 1
		fi

		joblib_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/fixes.py")"

		if [ ! -z "$joblib_import" ]; then
		echo "Use from sklearn.utils.fixes import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:"
		echo "$joblib_import"
		exit 1
		fi
Original file line number	Diff line number	Diff line change
Expand Up		@@ -35,10 +35,10 @@ data set, this classifier will favor the majority classes::
		>>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
		>>> bc = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
		... random_state=0)
		>>> bc.fit(X_train, y_train) #doctest: +ELLIPSIS
		>>> bc.fit(X_train, y_train) #doctest:
		BaggingClassifier(...)
		>>> y_pred = bc.predict(X_test)
		>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
		>>> balanced_accuracy_score(y_test, y_pred) # doctest:
		0.77...

		In :class:`BalancedBaggingClassifier`, each bootstrap sample will be further
Expand All		@@ -54,10 +54,10 @@ sampling is controlled by the parameter `sampler` or the two parameters
		... sampling_strategy='auto',
		... replacement=False,
		... random_state=0)
		>>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
		>>> bbc.fit(X_train, y_train) # doctest:
		BalancedBaggingClassifier(...)
		>>> y_pred = bbc.predict(X_test)
		>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
		>>> balanced_accuracy_score(y_test, y_pred) # doctest:
		0.8...

		Changing the `sampler` will give rise to different known implementation
Expand All		@@ -78,10 +78,10 @@ each tree of the forest will be provided a balanced bootstrap sample

		>>> from imblearn.ensemble import BalancedRandomForestClassifier
		>>> brf = BalancedRandomForestClassifier(n_estimators=100, random_state=0)
		>>> brf.fit(X_train, y_train) # doctest: +ELLIPSIS
		>>> brf.fit(X_train, y_train) # doctest:
		BalancedRandomForestClassifier(...)
		>>> y_pred = brf.predict(X_test)
		>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
		>>> balanced_accuracy_score(y_test, y_pred) # doctest:
		0.8...

		.. _boosting:
Expand All		@@ -97,10 +97,10 @@ a boosting iteration :cite:`seiffert2009rusboost`::
		>>> from imblearn.ensemble import RUSBoostClassifier
		>>> rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R',
		... random_state=0)
		>>> rusboost.fit(X_train, y_train) # doctest: +ELLIPSIS
		>>> rusboost.fit(X_train, y_train) # doctest:
		RUSBoostClassifier(...)
		>>> y_pred = rusboost.predict(X_test)
		>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
		>>> balanced_accuracy_score(y_test, y_pred) # doctest:
		0...

		A specific method which uses :class:`~sklearn.ensemble.AdaBoostClassifier` as
Expand All		@@ -111,10 +111,10 @@ the :class:`BalancedBaggingClassifier` API, one can construct the ensemble as::

		>>> from imblearn.ensemble import EasyEnsembleClassifier
		>>> eec = EasyEnsembleClassifier(random_state=0)
		>>> eec.fit(X_train, y_train) # doctest: +ELLIPSIS
		>>> eec.fit(X_train, y_train) # doctest:
		EasyEnsembleClassifier(...)
		>>> y_pred = eec.predict(X_test)
		>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
		>>> balanced_accuracy_score(y_test, y_pred) # doctest:
		0.6...

		.. topic:: Examples
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -40,7 +40,7 @@ a classifier::

		>>> from sklearn.svm import LinearSVC
		>>> clf = LinearSVC()
		>>> clf.fit(X_resampled, y_resampled) # doctest : +ELLIPSIS
		>>> clf.fit(X_resampled, y_resampled)
		LinearSVC(...)

		In the figure below, we compare the decision functions of a classifier trained
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,10 +4,10 @@
		NUMPY_MIN_VERSION = "1.17.3"
		SCIPY_MIN_VERSION = "1.3.2"
		PANDAS_MIN_VERSION = "1.0.5"
		SKLEARN_MIN_VERSION = "1.1.0"
		SKLEARN_MIN_VERSION = "1.1.3"
		TENSORFLOW_MIN_VERSION = "2.4.3"
		KERAS_MIN_VERSION = "2.4.3"
		JOBLIB_MIN_VERSION = "1.0.0"
		JOBLIB_MIN_VERSION = "1.1.1"
		THREADPOOLCTL_MIN_VERSION = "2.0.0"
		PYTEST_MIN_VERSION = "5.0.1"

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -91,7 +91,7 @@ class SMOTEENN(BaseSampler):

		>>> from collections import Counter
		>>> from sklearn.datasets import make_classification
		>>> from imblearn.combine import SMOTEENN # doctest: +NORMALIZE_WHITESPACE
		>>> from imblearn.combine import SMOTEENN # doctest:
		>>> X, y = make_classification(n_classes=2, class_sep=2,
		... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
		... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -90,7 +90,7 @@ class SMOTETomek(BaseSampler):
		>>> from collections import Counter
		>>> from sklearn.datasets import make_classification
		>>> from imblearn.combine import \
		SMOTETomek # doctest: +NORMALIZE_WHITESPACE
		SMOTETomek # doctest:
		>>> X, y = make_classification(n_classes=2, class_sep=2,
		... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
		... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,7 +4,9 @@
		# Christos Aridas
		# License: MIT

		import inspect
		import numbers
		import warnings

		import numpy as np

Expand DownExpand Up		@@ -41,10 +43,12 @@ class BalancedBaggingClassifier(BaggingClassifier):

		Parameters
		----------
		base_estimator : estimator object, default=None
		estimator : estimator object, default=None
		The base estimator to fit on random subsets of the dataset.
		If None, then the base estimator is a decision tree.

		.. versionadded:: 0.10

		n_estimators : int, default=10
		The number of base estimators in the ensemble.

Expand DownExpand Up		@@ -100,18 +104,37 @@ class BalancedBaggingClassifier(BaggingClassifier):

		.. versionadded:: 0.8

		base_estimator : estimator object, default=None
		The base estimator to fit on random subsets of the dataset.
		If None, then the base estimator is a decision tree.

		.. deprecated:: 0.10
		`base_estimator` was renamed to `estimator` in version 0.10 and
		will be removed in 0.12.

		Attributes
		----------
		estimator_ : estimator
		The base estimator from which the ensemble is grown.

		.. versionadded:: 0.10

		base_estimator_ : estimator
		The base estimator from which the ensemble is grown.

		.. deprecated:: 1.2
		`base_estimator_` is deprecated in `scikit-learn` 1.2 and will be
		removed in 1.4. Use `estimator_` instead. When the minimum version
		of `scikit-learn` supported by `imbalanced-learn` will reach 1.4,
		this attribute will be removed.

		n_features_ : int
		The number of features when `fit` is performed.

		.. deprecated:: 1.0
		`n_features_` is deprecated in `scikit-learn` 1.0 and will be removed
		in version 1.2.Depending ofthe version of `scikit-learn`installed,
		youwillget be warned or not.
		in version 1.2.Whentheminimumversion of `scikit-learn`supported
		by `imbalanced-learn`willreach 1.2, this attribute will be removed.

		estimators_ : list of estimators
		The collection of fitted base estimators.
Expand DownExpand Up		@@ -209,7 +232,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
		>>> from sklearn.model_selection import train_test_split
		>>> from sklearn.metrics import confusion_matrix
		>>> from imblearn.ensemble import \
		BalancedBaggingClassifier # doctest: +NORMALIZE_WHITESPACE
		BalancedBaggingClassifier # doctest:
		>>> X, y = make_classification(n_classes=2, class_sep=2,
		... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
		... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
Expand All		@@ -218,7 +241,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
		>>> X_train, X_test, y_train, y_test = train_test_split(X, y,
		... random_state=0)
		>>> bbc = BalancedBaggingClassifier(random_state=42)
		>>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
		>>> bbc.fit(X_train, y_train) # doctest:
		BalancedBaggingClassifier(...)
		>>> y_pred = bbc.predict(X_test)
		>>> print(confusion_matrix(y_test, y_pred))
Expand All		@@ -229,7 +252,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
		@_deprecate_positional_args
		def __init__(
		self,
		base_estimator=None,
		estimator=None,
		n_estimators=10,
		*,
		max_samples=1.0,
Expand All		@@ -244,10 +267,18 @@ def __init__(
		random_state=None,
		verbose=0,
		sampler=None,
		base_estimator="deprecated",
		):
		# TODO: remove when supporting scikit-learn>=1.2
		bagging_classifier_signature = inspect.signature(super().__init__)
		estimator_params = {"base_estimator": base_estimator}
		if "estimator" in bagging_classifier_signature.parameters:
		estimator_params["estimator"] = estimator
		else:
		self.estimator = estimator

		super().__init__(
		base_estimator,
		**estimator_params,
		n_estimators=n_estimators,
		max_samples=max_samples,
		max_features=max_features,
Expand DownExpand Up		@@ -294,20 +325,54 @@ def _validate_estimator(self, default=DecisionTreeClassifier()):
		f"n_estimators must be greater than zero, " f"got {self.n_estimators}."
		)

		if self.base_estimator is not None:
		if self.estimator is not None and (
		self.base_estimator not in [None, "deprecated"]
		):
		raise ValueError(
		"Both `estimator` and `base_estimator` were set. Only set `estimator`."
		)

		if self.estimator is not None:
		base_estimator = clone(self.estimator)
		elif self.base_estimator not in [None, "deprecated"]:
		warnings.warn(
		"`base_estimator` was renamed to `estimator` in version 0.10 and "
		"will be removed in 0.12.",
		FutureWarning,
		)
		base_estimator = clone(self.base_estimator)
		else:
		base_estimator = clone(default)

		if self.sampler_._sampling_type != "bypass":
		self.sampler_.set_params(sampling_strategy=self._sampling_strategy)

		self.base_estimator_ = Pipeline(
		[
		("sampler", self.sampler_),
		("classifier", base_estimator),
		]
		self._estimator = Pipeline(
		[("sampler", self.sampler_), ("classifier", base_estimator)]
		)
		try:
		# scikit-learn < 1.2
		self.base_estimator_ = self._estimator
		except AttributeError:
		pass

		# TODO: remove when supporting scikit-learn>=1.4
		@property
		def estimator_(self):
		"""Estimator used to grow the ensemble."""
		return self._estimator

		# TODO: remove when supporting scikit-learn>=1.2
		@property
		def n_features_(self):
		"""Number of features when ``fit`` is performed."""
		warnings.warn(
		"`n_features_` was deprecated in scikit-learn 1.0. This attribute will "
		"not be accessible when the minimum supported version of scikit-learn "
		"is 1.2.",
		FutureWarning,
		)
		return self.n_features_in_

		def fit(self, X, y):
		"""Build a Bagging ensemble of estimators from the training set (X, y).
Expand Down