标签:
K-means算法
#include<iostream>
#include<fstream>
#include<stdlib.h>
#include<math.h>
using namespace std;
#define K 4 /*数据维度*/
#define C 8 /*聚类数*/
#define N 150 /*样本数*/
#define IterMax 5/*最大迭代数*/
#define IterTherhold 0.0000001 /*结束条件*/
/*样本数据*/
typedef struct{
double p[K];
int Lable;
double dis[K];
}Data;
Data dat[N];
double cluster[C][K] = {0.0};
double oldfitness = 0.0;
double fitness = 0.0;
bool is_equal(int rand_num[], int n, int index)
{
for(int i = 0; i < n; i++) {
if(rand_num[i] == index) {
return true;
}
}
return false;
}
void input_data()
{
ifstream in("test.data", ios::in);
int i = 0;
while(i < N) {
for(int k = 0; k < K; k++){
in >> dat[i].p[k];
}
i++;
}
}
//初始化质心
void Init_center()
{
int rand_num[C] ={0} ;
int i = 0;
while(i < C) {
int index = rand()%N;
if(!is_equal(rand_num, i, index)) {
rand_num[i++] = index;
}
}
for(int i = 0; i < K ; i++) {
for(int j = 0; j < C; j++) {
cluster[j][i] = dat[rand_num[j]].p[i];
}
}
}
double Eulid_dis(int x, int y) {
double distance = 0.0;
for(int i = 0 ; i < K ; i++) {
distance += pow(dat[x].p[i] - cluster[y][i], 2);
}
distance = sqrt(distance);
return distance;
}
void Make_new_cluster()
{
double bias = 0.0;
for(int i = 0; i < N; i++) {
double mindis = dat[i].dis[0];
dat[i].Lable = 0;
for(int j = 1; j < C; j++) {
if(mindis > dat[i].dis[j]) {
mindis = dat[i].dis[j];
dat[i].Lable = j;
}
}
}
for(int i = 0; i < N; i++) {
bias += dat[i].dis[dat[i].Lable];
}
oldfitness = fitness;
fitness = bias;
}
void calculate_distance()
{
for(int i = 0; i < N; i++) {
for(int j = 0; j < C; j++) {
dat[i].dis[j] = Eulid_dis(i, j);
}
}
}
void Make_new_center()
{
for(int i = 0; i < C; i++) {
for(int k = 0; k < K; k++) {
double tmp = 0.0;
int total = 0;
for(int j = 0; j < N; j++) {
if(dat[j].Lable == i) {
tmp += dat[j].p[k];
total++;
}
}
if(total > 0) {
cluster[i][k] = tmp/total;
}
}
}
}
/************************************
* 主函数 *
************************************/
int main()
{
input_data();
Init_center();
int i = 0;
double differ = 1.0;
while(i < IterMax && differ > IterTherhold) {
calculate_distance();
Make_new_cluster();
Make_new_center();
differ = abs(oldfitness - fitness);
cout << fitness << endl;
i++;
}
for (int i = 0; i < C; ++i) {
for(int j = 0; j < K ; j++) {
cout << cluster[i][j] << "\t";
}
cout << endl;
}
return 0;
}
标签:
原文地址:http://www.cnblogs.com/LyningCoder/p/4301087.html