码迷,mamicode.com
首页 > 编程语言 > 详细

kmeans算法

时间:2016-07-20 09:08:42      阅读:409      评论:0      收藏:0      [点我收藏+]

标签:

 1 # coding:utf-8
 2 import numpy as np
 3 import matplotlib.pyplot as plt
 4 
 5 def dis(x, y): #计算距离
 6     return np.sum(np.power(y - x, 2))
 7 
 8 def dataN(length,k):#生成数据
 9     z=range(k)
10     c=[5]*length
11     a1= [np.sin(i*2*np.pi/k) for i in range(k)]
12     a2= [np.cos(i*2*np.pi/k) for i in range(k)]
13     x=[[[i*j + np.random.uniform(0,5)]for i in c]for j in a1]
14     y=[[[i*j + np.random.uniform(0,5)]for i in c]for j in a2]
15     return x,y,z
16 
17 def showP(x,y,z):#原始点作图
18     plt.figure(1)
19     color=[or, ob, og, ok, ^r, +r, sr, dr, <r, pr]
20     for j in z:
21         for i in xrange(length):
22             plt.plot(x[j][i], y[j][i],color[j])
23 
24 def initCentroids(dataSet, k):#初始化中心点
25     n, d = dataSet.shape
26     centroids = np.zeros((k, d))
27     for i in range(k):
28         index = int(np.random.uniform(0, n))
29         centroids[i] = dataSet[index]
30     return centroids
31 
32 def kmeans(dataSet, k): #kmeans算法
33     n = dataSet.shape[0]
34     clusterAssment = np.mat(np.zeros((n, 2)))
35     clusterChanged = True
36     centroids = initCentroids(dataSet, k)
37     while clusterChanged:
38         clusterChanged = False
39         for i in xrange(n):
40             distance=[[dis(centroids[j], dataSet[i])] for j in range(k)]
41             minDist= min(distance)
42             minIndex=distance.index(minDist)
43             if clusterAssment[i, 0] != minIndex:
44                 clusterChanged = True
45                 clusterAssment[i] = minIndex, minDist[0]
46         for j in range(k):
47             pointsInCluster = dataSet[np.nonzero(clusterAssment[:, 0]== j)[0]]
48             centroids[j] = np.mean(pointsInCluster, axis = 0)
49     return centroids, clusterAssment
50 
51 def showCluster(dataSet, k, centroids, clusterAssment):#结果作图
52     plt.figure(2)
53     n=len(dataSet)
54     mark = [or, ob, og, ok, ^r, +r, sr, dr, <r, pr]
55     for i in xrange(n):
56         markIndex = int(clusterAssment[i, 0])
57         plt.plot(dataSet[i, 0], dataSet[i, 1], mark[markIndex])
58     mark = [Dr, Db, Dg, Dk, ^b, +b, sb, db, <b, pb]
59     for i in range(k):
60         plt.plot(centroids[i, 0], centroids[i, 1], mark[i], markersize =8)
61     plt.show()
62 
63 length=200
64 k=8  #k<=8
65 x,y,z=dataN(length,k)
66 showP(x,y,z)
67 
68 dataSet=np.mat(zip(np.reshape(x,(1,length*k))[0],np.reshape(y,(1,length*k))[0]))
69 centroids, clusterAssment = kmeans(dataSet, k)
70 showCluster(dataSet, k, centroids, clusterAssment)

技术分享技术分享

kmeans算法

标签:

原文地址:http://www.cnblogs.com/qw12/p/5686940.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!