kmeans_clustering.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import KMeansfrom matplotlib import pyplot# 2 features, 2 informative, 0 redundant, 1 cluster per classX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10) # 2 clustersm = KMeans(n_clusters=2) # fit the modelm.fit(X)# predict the cluster for each data pointp = m.predict(X) # unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('K-means (No. of Clusters = 3)') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
minibatch_kmeans_clustering.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import MiniBatchKMeansfrom matplotlib import pyplotX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# 3 clustersm = MiniBatchKMeans(n_clusters=3) # fit the modelm.fit(X)# predict the cluster for each data pointp = m.predict(X) # unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('Mini Batch K-means') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
time_diff_minibatch_and_kmeans.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import MiniBatchKMeansfrom sklearn.cluster import KMeansfrom matplotlib import pyplotimport timeitX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# start timer for Mini Batch K-Meanst1_mkm = timeit.default_timer() m = MiniBatchKMeans(n_clusters=2)m.fit(X)p = m.predict(X)# stop timer for Mini Batch K-Meanst2_mkm = timeit.default_timer()# start timer for K-Meanst1_km = timeit.default_timer()m = KMeans(n_clusters=2)m.fit(X)p = m.predict(X)# stop timer for K-Meanst2_km = timeit.default_timer()# print time differenceprint("Time difference between Mini Batch K-Means and K-Means = ", (t2_km-t1_km)-(t2_mkm-t1_mkm))
affinity_propagation.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import AffinityPropagationfrom matplotlib import pyplotX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# initialize the modelm = AffinityPropagation(damping=0.9)# fit the modelm.fit(X)# predict the cluster for each data pointp = m.predict(X)# unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('Affinity Propagation Clustering') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
dbscan_clustering.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import DBSCANfrom matplotlib import pyplotX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# init the modelm = DBSCAN(eps=0.05, min_samples=10)# predict the cluster for each data point after fitting the modelp = m.fit_predict(X) # unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('DBSCAN Clustering') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
optics.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import OPTICSfrom matplotlib import pyplotX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# init the modelm = OPTICS(eps=0.5, min_samples=10)# predict the cluster for each data point after fitting the modelp = m.fit_predict(X)# unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('OPTICS Clustering') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
birch.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import Birchfrom matplotlib import pyplotX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# init the model with 2 clustersm = Birch(threshold=0.05, n_clusters=2)# predict the cluster for each data point after fitting the modelp = m.fit_predict(X) # unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('Birch Clustering') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
agglomerative_clustering.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import AgglomerativeClusteringfrom matplotlib import pyplotX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# init the model with 3 clustersm = AgglomerativeClustering(n_clusters=3)# predict the cluster for each data point after fitting the modelp = m.fit_predict(X) # unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('Agglomerative Clustering') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
meanshift_clustering.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import MeanShiftfrom matplotlib import pyplotX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# init the modelm = MeanShift()# predict the cluster for each data point after fitting the modelp = m.fit_predict(X)# unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('Mean Shift Clustering') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
spectral_clustering.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.cluster import SpectralClusteringfrom matplotlib import pyplotX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# init the model with 3 clustersm = SpectralClustering(n_clusters=3)# predict the cluster for each data point after fitting the modelp = m.fit_predict(X)# unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('Spectral Clustering') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
gmm.py
import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.mixture import GaussianMixturefrom matplotlib import pyplotX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=10)# init the model with 2 componentsm = GaussianMixture(n_components=2)# predict the cluster for each data point after fitting the modelp = m.fit_predict(X)# unique clusterscl = np.unique(p)# plot the data points and cluster centersfor c in cl: r = np.where(c == p) pyplot.title('Gaussian Mixture Clustering') pyplot.scatter(X[r, 0], X[r, 1])# show the plotpyplot.show()
metrics.py
from sklearn import metricsy_true = [5, 3, 5, 4, 4, 5]y_pred = [3, 5, 5, 4, 3, 4]# homogeneity: each cluster contains only members of a single class.print(metrics.homogeneity_score(y_true, y_pred))# completeness: all members of a given class are assigned to the same cluster.print(metrics.completeness_score(y_true, y_pred))# v-measure: harmonic mean of homogeneity and completenessprint(metrics.v_measure_score(y_true, y_pred))