Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitece8d80

Browse files
authored
Clustering using PyGAD
Data clustering using the genetic algorithm. This example uses only 2 clusters with artificial (non-real) data.
1 parent05a069a commitece8d80

File tree

1 file changed

+122
-0
lines changed

1 file changed

+122
-0
lines changed

‎example_clustering_2.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
importnumpy
2+
importmatplotlib.pyplot
3+
importpygad
4+
5+
cluster1_num_samples=10
6+
cluster1_x1_start=0
7+
cluster1_x1_end=5
8+
cluster1_x2_start=2
9+
cluster1_x2_end=6
10+
cluster1_x1=numpy.random.random(size=(cluster1_num_samples))
11+
cluster1_x1=cluster1_x1* (cluster1_x1_end-cluster1_x1_start)+cluster1_x1_start
12+
cluster1_x2=numpy.random.random(size=(cluster1_num_samples))
13+
cluster1_x2=cluster1_x2* (cluster1_x2_end-cluster1_x2_start)+cluster1_x2_start
14+
15+
cluster2_num_samples=10
16+
cluster2_x1_start=10
17+
cluster2_x1_end=15
18+
cluster2_x2_start=8
19+
cluster2_x2_end=12
20+
cluster2_x1=numpy.random.random(size=(cluster2_num_samples))
21+
cluster2_x1=cluster2_x1* (cluster2_x1_end-cluster2_x1_start)+cluster2_x1_start
22+
cluster2_x2=numpy.random.random(size=(cluster2_num_samples))
23+
cluster2_x2=cluster2_x2* (cluster2_x2_end-cluster2_x2_start)+cluster2_x2_start
24+
25+
c1=numpy.array([cluster1_x1,cluster1_x2]).T
26+
c2=numpy.array([cluster2_x1,cluster2_x2]).T
27+
28+
data=numpy.concatenate((c1,c2),axis=0)
29+
30+
matplotlib.pyplot.scatter(cluster1_x1,cluster1_x2)
31+
matplotlib.pyplot.scatter(cluster2_x1,cluster2_x2)
32+
matplotlib.pyplot.title("Optimal Clustering")
33+
matplotlib.pyplot.show()
34+
35+
defeuclidean_distance(X,Y):
36+
"""
37+
Calculate the euclidean distance between X and Y. It accepts:
38+
:X should be a matrix of size (N, f) where N is the number of samples and f is the number of features for each sample.
39+
:Y should be of size f. In other words, it is a single sample.
40+
41+
Returns a vector of N elements with the distances between the N samples and the Y.
42+
"""
43+
44+
returnnumpy.sqrt(numpy.sum(numpy.power(X-Y,2),axis=1))
45+
46+
defcluster_data(solution,solution_idx):
47+
"""
48+
Clusters the data based on the current solution.
49+
"""
50+
51+
globalnum_cluster,data
52+
feature_vector_length=data.shape[1]
53+
cluster_centers= []# A list of size (C, f) where C is the number of clusters and f is the number of features representing each sample.
54+
all_clusters_dists= []# A list of size (C, N) where C is the number of clusters and N is the number of data samples. It holds the distances between each cluster center and all the data samples.
55+
clusters= []# A list with C elements where each element holds the indices of the samples within a cluster.
56+
clusters_sum_dist= []# A list with C elements where each element represents the sum of distances of the samples with a cluster.
57+
58+
forclust_idxinrange(num_clusters):
59+
# Return the current cluster center.
60+
cluster_centers.append(solution[feature_vector_length*clust_idx:feature_vector_length*(clust_idx+1)])
61+
# Calculate the distance (e.g. euclidean) between the current cluster center and all samples.
62+
cluster_center_dists=euclidean_distance(data,cluster_centers[clust_idx])
63+
all_clusters_dists.append(numpy.array(cluster_center_dists))
64+
65+
cluster_centers=numpy.array(cluster_centers)
66+
all_clusters_dists=numpy.array(all_clusters_dists)
67+
68+
# A 1D array that, for each sample, holds the index of the cluster with the smallest distance.
69+
# In other words, the array holds the sample's cluster index.
70+
cluster_indices=numpy.argmin(all_clusters_dists,axis=0)
71+
forclust_idxinrange(num_clusters):
72+
clusters.append(numpy.where(cluster_indices==clust_idx)[0])
73+
# Calculate the sum of distances for the cluster.
74+
iflen(clusters[clust_idx])==0:
75+
# In case the cluster is empty (i.e. has zero samples).
76+
clusters_sum_dist.append(0)
77+
else:
78+
# When the cluster is not empty (i.e. has at least 1 sample).
79+
clusters_sum_dist.append(numpy.sum(all_clusters_dists[clust_idx,clusters[clust_idx]]))
80+
# clusters_sum_dist.append(numpy.sum(euclidean_distance(data[clusters[clust_idx], :], cluster_centers[clust_idx])))
81+
82+
clusters_sum_dist=numpy.array(clusters_sum_dist)
83+
84+
returncluster_centers,all_clusters_dists,cluster_indices,clusters,clusters_sum_dist
85+
86+
deffitness_func(solution,solution_idx):
87+
_,_,_,_,clusters_sum_dist=cluster_data(solution,solution_idx)
88+
89+
# The tiny value 0.00000001 is added to the denominator in case the average distance is 0.
90+
fitness=1.0/ (numpy.sum(clusters_sum_dist)+0.00000001)
91+
92+
returnfitness
93+
94+
num_clusters=2
95+
num_genes=num_clusters*data.shape[1]
96+
97+
ga_instance=pygad.GA(num_generations=100,
98+
sol_per_pop=10,
99+
num_parents_mating=5,
100+
init_range_low=-6,
101+
init_range_high=20,
102+
keep_parents=2,
103+
num_genes=num_genes,
104+
fitness_func=fitness_func,
105+
suppress_warnings=True)
106+
107+
ga_instance.run()
108+
109+
best_solution,best_solution_fitness,best_solution_idx=ga_instance.best_solution()
110+
print("Best solution is {bs}".format(bs=best_solution))
111+
print("Fitness of the best solution is {bsf}".format(bsf=best_solution_fitness))
112+
print("Best solution found after {gen} generations".format(gen=ga_instance.best_solution_generation))
113+
114+
cluster_centers,all_clusters_dists,cluster_indices,clusters,clusters_sum_dist=cluster_data(best_solution,best_solution_idx)
115+
116+
forcluster_idxinrange(num_clusters):
117+
cluster_x=data[clusters[cluster_idx],0]
118+
cluster_y=data[clusters[cluster_idx],1]
119+
matplotlib.pyplot.scatter(cluster_x,cluster_y)
120+
matplotlib.pyplot.scatter(cluster_centers[cluster_idx,0],cluster_centers[cluster_idx,1],linewidths=5)
121+
matplotlib.pyplot.title("Clustering using PyGAD")
122+
matplotlib.pyplot.show()

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp