Commit0b66a9b

committed

Pushing the docs to 1.1/ for branch: 1.1.X, commit c987b5ca84610bf5251ea8fa33b48c5826942a0d

1 parent86f3ce5 commit0b66a9bCopy full SHA for 0b66a9b

File tree

1,384 files changed

+11827

-9468

lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,384 files changed

+11827

-9468

lines changed

`‎1.1/.buildinfo`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`	`1`	`# Sphinx build info version 1`
`2`	`2`	`# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.`
`3`		`-config:44afcf8dd215cc5d065a44ea3a818dd0`
	`3`	`+config:d0bdad28d397ffd9f93c5892709f416d`
`4`	`4`	`tags: 645f666f9bcd5a90fca523b33c5a78b7`

`‎1.1/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip`

2.33 KB

Binary file not shown.

`‎1.1/_downloads/4cf0456267ced0f869a458ef4776d4c5/plot_release_highlights_1_1_0.py`

Lines changed: 63 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@`
`50`	`50`	`ax.plot(X_1d,y,"o",alpha=0.5,markersize=1)`
`51`	`51`	`forquantile,histinhist_quantiles.items():`
`52`	`52`	`ax.plot(X_1d,hist.predict(X),label=quantile)`
`53`		`-ax.legend(loc="lower left")`
	`53`	`+_=ax.legend(loc="lower left")`
`54`	`54`
`55`	`55`
`56`	`56`	`# %%`
`@@ -96,6 +96,7 @@`
`96`	`96`
`97`	`97`	`log_reg_input_features=log_reg[:-1].get_feature_names_out()`
`98`	`98`	`pd.Series(log_reg[-1].coef_.ravel(),index=log_reg_input_features).plot.bar()`
	`99`	`+plt.tight_layout()`
`99`	`100`
`100`	`101`
`101`	`102`	`# %%`
`@@ -161,3 +162,64 @@`
`161`	`162`	# - :class:`linear_model.GammaRegressor`
`162`	`163`	# - :class:`linear_model.PoissonRegressor`
`163`	`164`	# - :class:`linear_model.TweedieRegressor`
	`165`	`+`
	`166`	`+# %%`
	`167`	`+# MiniBatchNMF: an online version of NMF`
	`168`	`+# --------------------------------------`
	`169`	+# The new class :class:`decomposition.MiniBatchNMF` implements a faster but less
	`170`	+# accurate version of non-negative matrix factorization (:class:`decomposition.NMF`).
	`171`	+# :class:`MiniBatchNMF` divides the data into mini-batches and optimizes the NMF model
	`172`	`+# in an online manner by cycling over the mini-batches, making it better suited for`
	`173`	+# large datasets. In particular, it implements `partial_fit`, which can be used for
	`174`	`+# online learning when the data is not readily available from the start, or when the`
	`175`	`+# data does not fit into memory.`
	`176`	`+importnumpyasnp`
	`177`	`+fromsklearn.decompositionimportMiniBatchNMF`
	`178`	`+`
	`179`	`+rng=np.random.RandomState(0)`
	`180`	`+n_samples,n_features,n_components=10,10,5`
	`181`	`+true_W=rng.uniform(size=(n_samples,n_components))`
	`182`	`+true_H=rng.uniform(size=(n_components,n_features))`
	`183`	`+X=true_W @true_H`
	`184`	`+`
	`185`	`+nmf=MiniBatchNMF(n_components=n_components,random_state=0)`
	`186`	`+`
	`187`	`+for_inrange(10):`
	`188`	`+nmf.partial_fit(X)`
	`189`	`+`
	`190`	`+W=nmf.transform(X)`
	`191`	`+H=nmf.components_`
	`192`	`+X_reconstructed=W @H`
	`193`	`+`
	`194`	`+print(`
	`195`	`+f"relative reconstruction error: ",`
	`196`	`+f"{np.sum((X-X_reconstructed)2)/np.sum(X2):.5f}",`
	`197`	`+)`
	`198`	`+`
	`199`	`+# %%`
	`200`	`+# BisectingKMeans: divide and cluster`
	`201`	`+# -----------------------------------`
	`202`	+# The new class :class:`cluster.BisectingKMeans` is a variant of :class:`KMeans`, using
	`203`	`+# divisive hierarchical clustering. Instead of creating all centroids at once, centroids`
	`204`	`+# are picked progressively based on a previous clustering: a cluster is split into two`
	`205`	`+# new clusters repeatedly until the target number of clusters is reached, giving a`
	`206`	`+# hierarchical structure to the clustering.`
	`207`	`+fromsklearn.datasetsimportmake_blobs`
	`208`	`+fromsklearn.clusterimportKMeans,BisectingKMeans`
	`209`	`+importmatplotlib.pyplotasplt`
	`210`	`+`
	`211`	`+X,_=make_blobs(n_samples=1000,centers=2,random_state=0)`
	`212`	`+`
	`213`	`+km=KMeans(n_clusters=5,random_state=0).fit(X)`
	`214`	`+bisect_km=BisectingKMeans(n_clusters=5,random_state=0).fit(X)`
	`215`	`+`
	`216`	`+fig,ax=plt.subplots(1,2,figsize=(10,5))`
	`217`	`+ax[0].scatter(X[:,0],X[:,1],s=10,c=km.labels_)`
	`218`	`+ax[0].scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],s=20,c="r")`
	`219`	`+ax[0].set_title("KMeans")`
	`220`	`+`
	`221`	`+ax[1].scatter(X[:,0],X[:,1],s=10,c=bisect_km.labels_)`
	`222`	`+ax[1].scatter(`
	`223`	`+bisect_km.cluster_centers_[:,0],bisect_km.cluster_centers_[:,1],s=20,c="r"`
	`224`	`+)`
	`225`	`+_=ax[1].set_title("BisectingKMeans")`

`‎1.1/_downloads/68fdea23e50d165632d4bd4e36453cd5/plot_release_highlights_1_1_0.ipynb`

Lines changed: 38 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@`
`33`	`33`	`},`
`34`	`34`	`"outputs": [],`
`35`	`35`	`"source": [`
`36`		-"from sklearn.datasets import make_regression\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Simple regression function for X * cos(X)\nrng = np.random.RandomState(42)\nX_1d = np.linspace(0, 10, num=2000)\nX = X_1d.reshape(-1, 1)\ny = X_1d * np.cos(X_1d) + rng.normal(scale=X_1d / 3)\n\nquantiles = [0.95, 0.5, 0.05]\nparameters = dict(loss=\"quantile\", max_bins=32, max_iter=50)\nhist_quantiles = {\n f\"quantile={quantile:.2f}\": HistGradientBoostingRegressor(\n **parameters, quantile=quantile\n ).fit(X, y)\n for quantile in quantiles\n}\n\nfig, ax = plt.subplots()\nax.plot(X_1d, y,\"o\", alpha=0.5, markersize=1)\nfor quantile, hist in hist_quantiles.items():\n ax.plot(X_1d, hist.predict(X), label=quantile)\nax.legend(loc=\"lower left\")"
	`36`	+"from sklearn.datasets import make_regression\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Simple regression function for X * cos(X)\nrng = np.random.RandomState(42)\nX_1d = np.linspace(0, 10, num=2000)\nX = X_1d.reshape(-1, 1)\ny = X_1d * np.cos(X_1d) + rng.normal(scale=X_1d / 3)\n\nquantiles = [0.95, 0.5, 0.05]\nparameters = dict(loss=\"quantile\", max_bins=32, max_iter=50)\nhist_quantiles = {\n f\"quantile={quantile:.2f}\": HistGradientBoostingRegressor(\n **parameters, quantile=quantile\n ).fit(X, y)\n for quantile in quantiles\n}\n\nfig, ax = plt.subplots()\nax.plot(X_1d, y,\"o\", alpha=0.5, markersize=1)\nfor quantile, hist in hist_quantiles.items():\n ax.plot(X_1d, hist.predict(X), label=quantile)\n_ = ax.legend(loc=\"lower left\")"
`37`	`37`	`]`
`38`	`38`	`},`
`39`	`39`	`{`
`@@ -69,7 +69,7 @@`
`69`	`69`	`},`
`70`	`70`	`"outputs": [],`
`71`	`71`	`"source": [`
`72`		`-"import pandas as pd\n\nlog_reg_input_features = log_reg[:-1].get_feature_names_out()\npd.Series(log_reg[-1].coef_.ravel(), index=log_reg_input_features).plot.bar()"`
	`72`	`+"import pandas as pd\n\nlog_reg_input_features = log_reg[:-1].get_feature_names_out()\npd.Series(log_reg[-1].coef_.ravel(), index=log_reg_input_features).plot.bar()\nplt.tight_layout()"`
`73`	`73`	`]`
`74`	`74`	`},`
`75`	`75`	`{`
`@@ -114,6 +114,42 @@`
`114`	`114`	`"source": [`
`115`	`115`	"## Performance improvements\nReductions on pairwise distances for dense float64 datasets has been refactored\nto better take advantage of non-blocking thread parallelism. For example,\n:meth:`neighbors.NearestNeighbors.kneighbors` and\n:meth:`neighbors.NearestNeighbors.radius_neighbors` can respectively be up to \u00d720 and\n\u00d75 faster than previously. In summary, the following functions and estimators\nnow benefit from improved performance:\n\n- :func:`metrics.pairwise_distances_argmin`\n- :func:`metrics.pairwise_distances_argmin_min`\n- :class:`cluster.AffinityPropagation`\n- :class:`cluster.Birch`\n- :class:`cluster.MeanShift`\n- :class:`cluster.OPTICS`\n- :class:`cluster.SpectralClustering`\n- :func:`feature_selection.mutual_info_regression`\n- :class:`neighbors.KNeighborsClassifier`\n- :class:`neighbors.KNeighborsRegressor`\n- :class:`neighbors.RadiusNeighborsClassifier`\n- :class:`neighbors.RadiusNeighborsRegressor`\n- :class:`neighbors.LocalOutlierFactor`\n- :class:`neighbors.NearestNeighbors`\n- :class:`manifold.Isomap`\n- :class:`manifold.LocallyLinearEmbedding`\n- :class:`manifold.TSNE`\n- :func:`manifold.trustworthiness`\n- :class:`semi_supervised.LabelPropagation`\n- :class:`semi_supervised.LabelSpreading`\n\nTo know more about the technical details of this work, you can read\n`this suite of blog posts <https://blog.scikit-learn.org/technical/performances/>`_.\n\nMoreover, the computation of loss functions has been refactored using\nCython resulting in performance improvements for the following estimators:\n\n- :class:`linear_model.LogisticRegression`\n- :class:`linear_model.GammaRegressor`\n- :class:`linear_model.PoissonRegressor`\n- :class:`linear_model.TweedieRegressor`\n\n"
`116`	`116`	`]`
	`117`	`+ },`
	`118`	`+ {`
	`119`	`+"cell_type":"markdown",`
	`120`	`+"metadata": {},`
	`121`	`+"source": [`
	`122`	+"## MiniBatchNMF: an online version of NMF\nThe new class :class:`decomposition.MiniBatchNMF` implements a faster but less\naccurate version of non-negative matrix factorization (:class:`decomposition.NMF`).\n:class:`MiniBatchNMF` divides the data into mini-batches and optimizes the NMF model\nin an online manner by cycling over the mini-batches, making it better suited for\nlarge datasets. In particular, it implements `partial_fit`, which can be used for\nonline learning when the data is not readily available from the start, or when the\ndata does not fit into memory.\n\n"
	`123`	`+ ]`
	`124`	`+ },`
	`125`	`+ {`
	`126`	`+"cell_type":"code",`
	`127`	`+"execution_count":null,`
	`128`	`+"metadata": {`
	`129`	`+"collapsed":false`
	`130`	`+ },`
	`131`	`+"outputs": [],`
	`132`	`+"source": [`
	`133`	+"import numpy as np\nfrom sklearn.decomposition import MiniBatchNMF\n\nrng = np.random.RandomState(0)\nn_samples, n_features, n_components = 10, 10, 5\ntrue_W = rng.uniform(size=(n_samples, n_components))\ntrue_H = rng.uniform(size=(n_components, n_features))\nX = true_W @ true_H\n\nnmf = MiniBatchNMF(n_components=n_components, random_state=0)\n\nfor _ in range(10):\n nmf.partial_fit(X)\n\nW = nmf.transform(X)\nH = nmf.components_\nX_reconstructed = W @ H\n\nprint(\n f\"relative reconstruction error:\",\n f\"{np.sum((X - X_reconstructed) 2) / np.sum(X2):.5f}\",\n)"
	`134`	`+ ]`
	`135`	`+ },`
	`136`	`+ {`
	`137`	`+"cell_type":"markdown",`
	`138`	`+"metadata": {},`
	`139`	`+"source": [`
	`140`	+"## BisectingKMeans: divide and cluster\nThe new class :class:`cluster.BisectingKMeans` is a variant of :class:`KMeans`, using\ndivisive hierarchical clustering. Instead of creating all centroids at once, centroids\nare picked progressively based on a previous clustering: a cluster is split into two\nnew clusters repeatedly until the target number of clusters is reached, giving a\nhierarchical structure to the clustering.\n\n"
	`141`	`+ ]`
	`142`	`+ },`
	`143`	`+ {`
	`144`	`+"cell_type":"code",`
	`145`	`+"execution_count":null,`
	`146`	`+"metadata": {`
	`147`	`+"collapsed":false`
	`148`	`+ },`
	`149`	`+"outputs": [],`
	`150`	`+"source": [`
	`151`	+"from sklearn.datasets import make_blobs\nfrom sklearn.cluster import KMeans, BisectingKMeans\nimport matplotlib.pyplot as plt\n\nX, _ = make_blobs(n_samples=1000, centers=2, random_state=0)\n\nkm = KMeans(n_clusters=5, random_state=0).fit(X)\nbisect_km = BisectingKMeans(n_clusters=5, random_state=0).fit(X)\n\nfig, ax = plt.subplots(1, 2, figsize=(10, 5))\nax[0].scatter(X[:, 0], X[:, 1], s=10, c=km.labels_)\nax[0].scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1], s=20, c=\"r\")\nax[0].set_title(\"KMeans\")\n\nax[1].scatter(X[:, 0], X[:, 1], s=10, c=bisect_km.labels_)\nax[1].scatter(\n bisect_km.cluster_centers_[:, 0], bisect_km.cluster_centers_[:, 1], s=20, c=\"r\"\n)\n_ = ax[1].set_title(\"BisectingKMeans\")"
	`152`	`+ ]`
`117`	`153`	`}`
`118`	`154`	`],`
`119`	`155`	`"metadata": {`