Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0b66a9b

Browse files
committed
Pushing the docs to 1.1/ for branch: 1.1.X, commit c987b5ca84610bf5251ea8fa33b48c5826942a0d
1 parent86f3ce5 commit0b66a9b

File tree

1,384 files changed

+11827
-9468
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,384 files changed

+11827
-9468
lines changed

‎1.1/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config:44afcf8dd215cc5d065a44ea3a818dd0
3+
config:d0bdad28d397ffd9f93c5892709f416d
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

‎1.1/_downloads/4cf0456267ced0f869a458ef4776d4c5/plot_release_highlights_1_1_0.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
ax.plot(X_1d,y,"o",alpha=0.5,markersize=1)
5151
forquantile,histinhist_quantiles.items():
5252
ax.plot(X_1d,hist.predict(X),label=quantile)
53-
ax.legend(loc="lower left")
53+
_=ax.legend(loc="lower left")
5454

5555

5656
# %%
@@ -96,6 +96,7 @@
9696

9797
log_reg_input_features=log_reg[:-1].get_feature_names_out()
9898
pd.Series(log_reg[-1].coef_.ravel(),index=log_reg_input_features).plot.bar()
99+
plt.tight_layout()
99100

100101

101102
# %%
@@ -161,3 +162,64 @@
161162
# - :class:`linear_model.GammaRegressor`
162163
# - :class:`linear_model.PoissonRegressor`
163164
# - :class:`linear_model.TweedieRegressor`
165+
166+
# %%
167+
# MiniBatchNMF: an online version of NMF
168+
# --------------------------------------
169+
# The new class :class:`decomposition.MiniBatchNMF` implements a faster but less
170+
# accurate version of non-negative matrix factorization (:class:`decomposition.NMF`).
171+
# :class:`MiniBatchNMF` divides the data into mini-batches and optimizes the NMF model
172+
# in an online manner by cycling over the mini-batches, making it better suited for
173+
# large datasets. In particular, it implements `partial_fit`, which can be used for
174+
# online learning when the data is not readily available from the start, or when the
175+
# data does not fit into memory.
176+
importnumpyasnp
177+
fromsklearn.decompositionimportMiniBatchNMF
178+
179+
rng=np.random.RandomState(0)
180+
n_samples,n_features,n_components=10,10,5
181+
true_W=rng.uniform(size=(n_samples,n_components))
182+
true_H=rng.uniform(size=(n_components,n_features))
183+
X=true_W @true_H
184+
185+
nmf=MiniBatchNMF(n_components=n_components,random_state=0)
186+
187+
for_inrange(10):
188+
nmf.partial_fit(X)
189+
190+
W=nmf.transform(X)
191+
H=nmf.components_
192+
X_reconstructed=W @H
193+
194+
print(
195+
f"relative reconstruction error: ",
196+
f"{np.sum((X-X_reconstructed)**2)/np.sum(X**2):.5f}",
197+
)
198+
199+
# %%
200+
# BisectingKMeans: divide and cluster
201+
# -----------------------------------
202+
# The new class :class:`cluster.BisectingKMeans` is a variant of :class:`KMeans`, using
203+
# divisive hierarchical clustering. Instead of creating all centroids at once, centroids
204+
# are picked progressively based on a previous clustering: a cluster is split into two
205+
# new clusters repeatedly until the target number of clusters is reached, giving a
206+
# hierarchical structure to the clustering.
207+
fromsklearn.datasetsimportmake_blobs
208+
fromsklearn.clusterimportKMeans,BisectingKMeans
209+
importmatplotlib.pyplotasplt
210+
211+
X,_=make_blobs(n_samples=1000,centers=2,random_state=0)
212+
213+
km=KMeans(n_clusters=5,random_state=0).fit(X)
214+
bisect_km=BisectingKMeans(n_clusters=5,random_state=0).fit(X)
215+
216+
fig,ax=plt.subplots(1,2,figsize=(10,5))
217+
ax[0].scatter(X[:,0],X[:,1],s=10,c=km.labels_)
218+
ax[0].scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],s=20,c="r")
219+
ax[0].set_title("KMeans")
220+
221+
ax[1].scatter(X[:,0],X[:,1],s=10,c=bisect_km.labels_)
222+
ax[1].scatter(
223+
bisect_km.cluster_centers_[:,0],bisect_km.cluster_centers_[:,1],s=20,c="r"
224+
)
225+
_=ax[1].set_title("BisectingKMeans")

‎1.1/_downloads/68fdea23e50d165632d4bd4e36453cd5/plot_release_highlights_1_1_0.ipynb

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"from sklearn.datasets import make_regression\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Simple regression function for X * cos(X)\nrng = np.random.RandomState(42)\nX_1d = np.linspace(0, 10, num=2000)\nX = X_1d.reshape(-1, 1)\ny = X_1d * np.cos(X_1d) + rng.normal(scale=X_1d / 3)\n\nquantiles = [0.95, 0.5, 0.05]\nparameters = dict(loss=\"quantile\", max_bins=32, max_iter=50)\nhist_quantiles = {\n f\"quantile={quantile:.2f}\": HistGradientBoostingRegressor(\n **parameters, quantile=quantile\n ).fit(X, y)\n for quantile in quantiles\n}\n\nfig, ax = plt.subplots()\nax.plot(X_1d, y,\"o\", alpha=0.5, markersize=1)\nfor quantile, hist in hist_quantiles.items():\n ax.plot(X_1d, hist.predict(X), label=quantile)\nax.legend(loc=\"lower left\")"
36+
"from sklearn.datasets import make_regression\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Simple regression function for X * cos(X)\nrng = np.random.RandomState(42)\nX_1d = np.linspace(0, 10, num=2000)\nX = X_1d.reshape(-1, 1)\ny = X_1d * np.cos(X_1d) + rng.normal(scale=X_1d / 3)\n\nquantiles = [0.95, 0.5, 0.05]\nparameters = dict(loss=\"quantile\", max_bins=32, max_iter=50)\nhist_quantiles = {\n f\"quantile={quantile:.2f}\": HistGradientBoostingRegressor(\n **parameters, quantile=quantile\n ).fit(X, y)\n for quantile in quantiles\n}\n\nfig, ax = plt.subplots()\nax.plot(X_1d, y,\"o\", alpha=0.5, markersize=1)\nfor quantile, hist in hist_quantiles.items():\n ax.plot(X_1d, hist.predict(X), label=quantile)\n_ = ax.legend(loc=\"lower left\")"
3737
]
3838
},
3939
{
@@ -69,7 +69,7 @@
6969
},
7070
"outputs": [],
7171
"source": [
72-
"import pandas as pd\n\nlog_reg_input_features = log_reg[:-1].get_feature_names_out()\npd.Series(log_reg[-1].coef_.ravel(), index=log_reg_input_features).plot.bar()"
72+
"import pandas as pd\n\nlog_reg_input_features = log_reg[:-1].get_feature_names_out()\npd.Series(log_reg[-1].coef_.ravel(), index=log_reg_input_features).plot.bar()\nplt.tight_layout()"
7373
]
7474
},
7575
{
@@ -114,6 +114,42 @@
114114
"source": [
115115
"## Performance improvements\nReductions on pairwise distances for dense float64 datasets has been refactored\nto better take advantage of non-blocking thread parallelism. For example,\n:meth:`neighbors.NearestNeighbors.kneighbors` and\n:meth:`neighbors.NearestNeighbors.radius_neighbors` can respectively be up to \u00d720 and\n\u00d75 faster than previously. In summary, the following functions and estimators\nnow benefit from improved performance:\n\n- :func:`metrics.pairwise_distances_argmin`\n- :func:`metrics.pairwise_distances_argmin_min`\n- :class:`cluster.AffinityPropagation`\n- :class:`cluster.Birch`\n- :class:`cluster.MeanShift`\n- :class:`cluster.OPTICS`\n- :class:`cluster.SpectralClustering`\n- :func:`feature_selection.mutual_info_regression`\n- :class:`neighbors.KNeighborsClassifier`\n- :class:`neighbors.KNeighborsRegressor`\n- :class:`neighbors.RadiusNeighborsClassifier`\n- :class:`neighbors.RadiusNeighborsRegressor`\n- :class:`neighbors.LocalOutlierFactor`\n- :class:`neighbors.NearestNeighbors`\n- :class:`manifold.Isomap`\n- :class:`manifold.LocallyLinearEmbedding`\n- :class:`manifold.TSNE`\n- :func:`manifold.trustworthiness`\n- :class:`semi_supervised.LabelPropagation`\n- :class:`semi_supervised.LabelSpreading`\n\nTo know more about the technical details of this work, you can read\n`this suite of blog posts <https://blog.scikit-learn.org/technical/performances/>`_.\n\nMoreover, the computation of loss functions has been refactored using\nCython resulting in performance improvements for the following estimators:\n\n- :class:`linear_model.LogisticRegression`\n- :class:`linear_model.GammaRegressor`\n- :class:`linear_model.PoissonRegressor`\n- :class:`linear_model.TweedieRegressor`\n\n"
116116
]
117+
},
118+
{
119+
"cell_type":"markdown",
120+
"metadata": {},
121+
"source": [
122+
"## MiniBatchNMF: an online version of NMF\nThe new class :class:`decomposition.MiniBatchNMF` implements a faster but less\naccurate version of non-negative matrix factorization (:class:`decomposition.NMF`).\n:class:`MiniBatchNMF` divides the data into mini-batches and optimizes the NMF model\nin an online manner by cycling over the mini-batches, making it better suited for\nlarge datasets. In particular, it implements `partial_fit`, which can be used for\nonline learning when the data is not readily available from the start, or when the\ndata does not fit into memory.\n\n"
123+
]
124+
},
125+
{
126+
"cell_type":"code",
127+
"execution_count":null,
128+
"metadata": {
129+
"collapsed":false
130+
},
131+
"outputs": [],
132+
"source": [
133+
"import numpy as np\nfrom sklearn.decomposition import MiniBatchNMF\n\nrng = np.random.RandomState(0)\nn_samples, n_features, n_components = 10, 10, 5\ntrue_W = rng.uniform(size=(n_samples, n_components))\ntrue_H = rng.uniform(size=(n_components, n_features))\nX = true_W @ true_H\n\nnmf = MiniBatchNMF(n_components=n_components, random_state=0)\n\nfor _ in range(10):\n nmf.partial_fit(X)\n\nW = nmf.transform(X)\nH = nmf.components_\nX_reconstructed = W @ H\n\nprint(\n f\"relative reconstruction error:\",\n f\"{np.sum((X - X_reconstructed) ** 2) / np.sum(X**2):.5f}\",\n)"
134+
]
135+
},
136+
{
137+
"cell_type":"markdown",
138+
"metadata": {},
139+
"source": [
140+
"## BisectingKMeans: divide and cluster\nThe new class :class:`cluster.BisectingKMeans` is a variant of :class:`KMeans`, using\ndivisive hierarchical clustering. Instead of creating all centroids at once, centroids\nare picked progressively based on a previous clustering: a cluster is split into two\nnew clusters repeatedly until the target number of clusters is reached, giving a\nhierarchical structure to the clustering.\n\n"
141+
]
142+
},
143+
{
144+
"cell_type":"code",
145+
"execution_count":null,
146+
"metadata": {
147+
"collapsed":false
148+
},
149+
"outputs": [],
150+
"source": [
151+
"from sklearn.datasets import make_blobs\nfrom sklearn.cluster import KMeans, BisectingKMeans\nimport matplotlib.pyplot as plt\n\nX, _ = make_blobs(n_samples=1000, centers=2, random_state=0)\n\nkm = KMeans(n_clusters=5, random_state=0).fit(X)\nbisect_km = BisectingKMeans(n_clusters=5, random_state=0).fit(X)\n\nfig, ax = plt.subplots(1, 2, figsize=(10, 5))\nax[0].scatter(X[:, 0], X[:, 1], s=10, c=km.labels_)\nax[0].scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1], s=20, c=\"r\")\nax[0].set_title(\"KMeans\")\n\nax[1].scatter(X[:, 0], X[:, 1], s=10, c=bisect_km.labels_)\nax[1].scatter(\n bisect_km.cluster_centers_[:, 0], bisect_km.cluster_centers_[:, 1], s=20, c=\"r\"\n)\n_ = ax[1].set_title(\"BisectingKMeans\")"
152+
]
117153
}
118154
],
119155
"metadata": {
Binary file not shown.

‎1.1/_downloads/scikit-learn-docs.zip

136 KB
Binary file not shown.
158 Bytes
-113 Bytes
-1 Bytes
-24 Bytes
-2 Bytes
-119 Bytes
-181 Bytes
179 Bytes
-27 Bytes
3.05 KB
219 Bytes
11 Bytes
142 Bytes
8 Bytes
95 Bytes
-22 Bytes
-104 Bytes
47 Bytes
131 Bytes
75 Bytes
31 Bytes
771 Bytes
-820 Bytes
-35 Bytes
-24 Bytes
-1 Bytes

‎1.1/_sources/auto_examples/applications/plot_cyclical_feature_engineering.rst.txt

Lines changed: 2 additions & 2 deletions

‎1.1/_sources/auto_examples/applications/plot_digits_denoising.rst.txt

Lines changed: 2 additions & 2 deletions

‎1.1/_sources/auto_examples/applications/plot_face_recognition.rst.txt

Lines changed: 6 additions & 6 deletions

‎1.1/_sources/auto_examples/applications/plot_model_complexity_influence.rst.txt

Lines changed: 16 additions & 16 deletions

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp