Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

MAINT make imbalanced-learn compatible with scikit-learn 1.+#946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletionREADME.rst
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -30,7 +30,7 @@
.. |PythonMinVersion| replace:: 3.8
.. |NumPyMinVersion| replace:: 1.17.3
.. |SciPyMinVersion| replace:: 1.3.2
.. |ScikitLearnMinVersion| replace:: 1.1.0
.. |ScikitLearnMinVersion| replace:: 1.1.3
.. |MatplotlibMinVersion| replace:: 3.1.2
.. |PandasMinVersion| replace:: 1.0.5
.. |TensorflowMinVersion| replace:: 2.4.3
Expand Down
11 changes: 4 additions & 7 deletionsazure-pipelines.yml
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -51,7 +51,7 @@ jobs:
black --check --diff .
displayName: Run black
- bash: |
./build_tools/circle/linting.sh
./build_tools/azure/linting.sh
displayName: Run linting
- bash: |
mypy imblearn/
Expand DownExpand Up@@ -112,7 +112,7 @@ jobs:
ne(variables['Build.Reason'], 'Schedule')
)
matrix:
py37_conda_forge_openblas_ubuntu_1804:
py38_conda_forge_openblas_ubuntu_1804:
DISTRIB: 'conda'
CONDA_CHANNEL: 'conda-forge'
PYTHON_VERSION: '3.8'
Expand DownExpand Up@@ -141,12 +141,12 @@ jobs:
THREADPOOLCTL_VERSION: 'min'
COVERAGE: 'false'
# Linux + Python 3.8 build with OpenBLAS and without SITE_JOBLIB
py37_conda_defaults_openblas:
py38_conda_defaults_openblas:
DISTRIB: 'conda'
CONDA_CHANNEL: 'conda-forge'
PYTHON_VERSION: '3.8'
BLAS: 'openblas'
NUMPY_VERSION: '1.19.5' # we cannot get an older version of the dependencies resolution
NUMPY_VERSION: '1.21.0' # we cannot get an older version of the dependencies resolution
SCIPY_VERSION: 'min'
SKLEARN_VERSION: 'min'
MATPLOTLIB_VERSION: 'none'
Expand DownExpand Up@@ -275,6 +275,3 @@ jobs:
PYTHON_ARCH: '64'
PYTEST_VERSION: '*'
COVERAGE: 'true'
py38_pip_openblas_32bit:
PYTHON_VERSION: '3.8'
PYTHON_ARCH: '32'
3 changes: 2 additions & 1 deletionbuild_tools/azure/install.sh
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -67,7 +67,8 @@ elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
make_conda "python=$PYTHON_VERSION"
python -m pip install -U pip

python -m pip install scikit-learn pandas matplotlib
python -m pip install pandas matplotlib
python -m pip install --pre scikit-learn

elif [[ "$DISTRIB" == "conda-pip-latest-tensorflow" ]]; then
make_conda "python=$PYTHON_VERSION"
Expand Down
43 changes: 43 additions & 0 deletionsbuild_tools/azure/linting.sh
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
#!/bin/bash

set -e
# pipefail is necessary to propagate exit codes
set -o pipefail

flake8 --show-source .
echo -e "No problem detected by flake8\n"

# For docstrings and warnings of deprecated attributes to be rendered
# properly, the property decorator must come before the deprecated decorator
# (else they are treated as functions)

# do not error when grep -B1 "@property" finds nothing
set +e
bad_deprecation_property_order=`git grep -A 10 "@property" -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"`

if [ ! -z "$bad_deprecation_property_order" ]
then
echo "property decorator should come before deprecated decorator"
echo "found the following occurrencies:"
echo $bad_deprecation_property_order
exit 1
fi

# Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE

doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS|NORMALIZE_WHITESPACE)")"

if [ ! -z "$doctest_directive" ]
then
echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:"
echo "$doctest_directive"
exit 1
fi

joblib_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/fixes.py")"

if [ ! -z "$joblib_import" ]; then
echo "Use from sklearn.utils.fixes import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:"
echo "$joblib_import"
exit 1
fi
20 changes: 10 additions & 10 deletionsdoc/ensemble.rst
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -35,10 +35,10 @@ data set, this classifier will favor the majority classes::
>>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
>>> bc = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
... random_state=0)
>>> bc.fit(X_train, y_train) #doctest: +ELLIPSIS
>>> bc.fit(X_train, y_train) #doctest:
BaggingClassifier(...)
>>> y_pred = bc.predict(X_test)
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
0.77...

In :class:`BalancedBaggingClassifier`, each bootstrap sample will be further
Expand All@@ -54,10 +54,10 @@ sampling is controlled by the parameter `sampler` or the two parameters
... sampling_strategy='auto',
... replacement=False,
... random_state=0)
>>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
>>> bbc.fit(X_train, y_train) # doctest:
BalancedBaggingClassifier(...)
>>> y_pred = bbc.predict(X_test)
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
0.8...

Changing the `sampler` will give rise to different known implementation
Expand All@@ -78,10 +78,10 @@ each tree of the forest will be provided a balanced bootstrap sample

>>> from imblearn.ensemble import BalancedRandomForestClassifier
>>> brf = BalancedRandomForestClassifier(n_estimators=100, random_state=0)
>>> brf.fit(X_train, y_train) # doctest: +ELLIPSIS
>>> brf.fit(X_train, y_train) # doctest:
BalancedRandomForestClassifier(...)
>>> y_pred = brf.predict(X_test)
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
0.8...

.. _boosting:
Expand All@@ -97,10 +97,10 @@ a boosting iteration :cite:`seiffert2009rusboost`::
>>> from imblearn.ensemble import RUSBoostClassifier
>>> rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R',
... random_state=0)
>>> rusboost.fit(X_train, y_train) # doctest: +ELLIPSIS
>>> rusboost.fit(X_train, y_train) # doctest:
RUSBoostClassifier(...)
>>> y_pred = rusboost.predict(X_test)
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
0...

A specific method which uses :class:`~sklearn.ensemble.AdaBoostClassifier` as
Expand All@@ -111,10 +111,10 @@ the :class:`BalancedBaggingClassifier` API, one can construct the ensemble as::

>>> from imblearn.ensemble import EasyEnsembleClassifier
>>> eec = EasyEnsembleClassifier(random_state=0)
>>> eec.fit(X_train, y_train) # doctest: +ELLIPSIS
>>> eec.fit(X_train, y_train) # doctest:
EasyEnsembleClassifier(...)
>>> y_pred = eec.predict(X_test)
>>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS
>>> balanced_accuracy_score(y_test, y_pred) # doctest:
0.6...

.. topic:: Examples
Expand Down
2 changes: 1 addition & 1 deletiondoc/over_sampling.rst
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -40,7 +40,7 @@ a classifier::

>>> from sklearn.svm import LinearSVC
>>> clf = LinearSVC()
>>> clf.fit(X_resampled, y_resampled) # doctest : +ELLIPSIS
>>> clf.fit(X_resampled, y_resampled)
LinearSVC(...)

In the figure below, we compare the decision functions of a classifier trained
Expand Down
4 changes: 2 additions & 2 deletionsimblearn/_min_dependencies.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -4,10 +4,10 @@
NUMPY_MIN_VERSION = "1.17.3"
SCIPY_MIN_VERSION = "1.3.2"
PANDAS_MIN_VERSION = "1.0.5"
SKLEARN_MIN_VERSION = "1.1.0"
SKLEARN_MIN_VERSION = "1.1.3"
TENSORFLOW_MIN_VERSION = "2.4.3"
KERAS_MIN_VERSION = "2.4.3"
JOBLIB_MIN_VERSION = "1.0.0"
JOBLIB_MIN_VERSION = "1.1.1"
THREADPOOLCTL_MIN_VERSION = "2.0.0"
PYTEST_MIN_VERSION = "5.0.1"

Expand Down
2 changes: 1 addition & 1 deletionimblearn/combine/_smote_enn.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -91,7 +91,7 @@ class SMOTEENN(BaseSampler):

>>> from collections import Counter
>>> from sklearn.datasets import make_classification
>>> from imblearn.combine import SMOTEENN # doctest: +NORMALIZE_WHITESPACE
>>> from imblearn.combine import SMOTEENN # doctest:
>>> X, y = make_classification(n_classes=2, class_sep=2,
... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
Expand Down
2 changes: 1 addition & 1 deletionimblearn/combine/_smote_tomek.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -90,7 +90,7 @@ class SMOTETomek(BaseSampler):
>>> from collections import Counter
>>> from sklearn.datasets import make_classification
>>> from imblearn.combine import \
SMOTETomek # doctest: +NORMALIZE_WHITESPACE
SMOTETomek # doctest:
>>> X, y = make_classification(n_classes=2, class_sep=2,
... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
Expand Down
91 changes: 78 additions & 13 deletionsimblearn/ensemble/_bagging.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -4,7 +4,9 @@
# Christos Aridas
# License: MIT

import inspect
import numbers
import warnings

import numpy as np

Expand DownExpand Up@@ -41,10 +43,12 @@ class BalancedBaggingClassifier(BaggingClassifier):

Parameters
----------
base_estimator : estimator object, default=None
estimator : estimator object, default=None
The base estimator to fit on random subsets of the dataset.
If None, then the base estimator is a decision tree.

.. versionadded:: 0.10

n_estimators : int, default=10
The number of base estimators in the ensemble.

Expand DownExpand Up@@ -100,18 +104,37 @@ class BalancedBaggingClassifier(BaggingClassifier):

.. versionadded:: 0.8

base_estimator : estimator object, default=None
The base estimator to fit on random subsets of the dataset.
If None, then the base estimator is a decision tree.

.. deprecated:: 0.10
`base_estimator` was renamed to `estimator` in version 0.10 and
will be removed in 0.12.

Attributes
----------
estimator_ : estimator
The base estimator from which the ensemble is grown.

.. versionadded:: 0.10

base_estimator_ : estimator
The base estimator from which the ensemble is grown.

.. deprecated:: 1.2
`base_estimator_` is deprecated in `scikit-learn` 1.2 and will be
removed in 1.4. Use `estimator_` instead. When the minimum version
of `scikit-learn` supported by `imbalanced-learn` will reach 1.4,
this attribute will be removed.

n_features_ : int
The number of features when `fit` is performed.

.. deprecated:: 1.0
`n_features_` is deprecated in `scikit-learn` 1.0 and will be removed
in version 1.2.Depending ofthe version of `scikit-learn`installed,
youwillget be warned or not.
in version 1.2.Whentheminimumversion of `scikit-learn`supported
by `imbalanced-learn`willreach 1.2, this attribute will be removed.

estimators_ : list of estimators
The collection of fitted base estimators.
Expand DownExpand Up@@ -209,7 +232,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.metrics import confusion_matrix
>>> from imblearn.ensemble import \
BalancedBaggingClassifier # doctest: +NORMALIZE_WHITESPACE
BalancedBaggingClassifier # doctest:
>>> X, y = make_classification(n_classes=2, class_sep=2,
... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
Expand All@@ -218,7 +241,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
>>> X_train, X_test, y_train, y_test = train_test_split(X, y,
... random_state=0)
>>> bbc = BalancedBaggingClassifier(random_state=42)
>>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS
>>> bbc.fit(X_train, y_train) # doctest:
BalancedBaggingClassifier(...)
>>> y_pred = bbc.predict(X_test)
>>> print(confusion_matrix(y_test, y_pred))
Expand All@@ -229,7 +252,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
@_deprecate_positional_args
def __init__(
self,
base_estimator=None,
estimator=None,
n_estimators=10,
*,
max_samples=1.0,
Expand All@@ -244,10 +267,18 @@ def __init__(
random_state=None,
verbose=0,
sampler=None,
base_estimator="deprecated",
):
# TODO: remove when supporting scikit-learn>=1.2
bagging_classifier_signature = inspect.signature(super().__init__)
estimator_params = {"base_estimator": base_estimator}
if "estimator" in bagging_classifier_signature.parameters:
estimator_params["estimator"] = estimator
else:
self.estimator = estimator

super().__init__(
base_estimator,
**estimator_params,
n_estimators=n_estimators,
max_samples=max_samples,
max_features=max_features,
Expand DownExpand Up@@ -294,20 +325,54 @@ def _validate_estimator(self, default=DecisionTreeClassifier()):
f"n_estimators must be greater than zero, " f"got {self.n_estimators}."
)

if self.base_estimator is not None:
if self.estimator is not None and (
self.base_estimator not in [None, "deprecated"]
):
raise ValueError(
"Both `estimator` and `base_estimator` were set. Only set `estimator`."
)

if self.estimator is not None:
base_estimator = clone(self.estimator)
elif self.base_estimator not in [None, "deprecated"]:
warnings.warn(
"`base_estimator` was renamed to `estimator` in version 0.10 and "
"will be removed in 0.12.",
FutureWarning,
)
base_estimator = clone(self.base_estimator)
else:
base_estimator = clone(default)

if self.sampler_._sampling_type != "bypass":
self.sampler_.set_params(sampling_strategy=self._sampling_strategy)

self.base_estimator_ = Pipeline(
[
("sampler", self.sampler_),
("classifier", base_estimator),
]
self._estimator = Pipeline(
[("sampler", self.sampler_), ("classifier", base_estimator)]
)
try:
# scikit-learn < 1.2
self.base_estimator_ = self._estimator
except AttributeError:
pass

# TODO: remove when supporting scikit-learn>=1.4
@property
def estimator_(self):
"""Estimator used to grow the ensemble."""
return self._estimator

# TODO: remove when supporting scikit-learn>=1.2
@property
def n_features_(self):
"""Number of features when ``fit`` is performed."""
warnings.warn(
"`n_features_` was deprecated in scikit-learn 1.0. This attribute will "
"not be accessible when the minimum supported version of scikit-learn "
"is 1.2.",
FutureWarning,
)
return self.n_features_in_

def fit(self, X, y):
"""Build a Bagging ensemble of estimators from the training set (X, y).
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp