K-Mean算法相对简单,下面给出具体的程序实现与运行结果:
//保留上一次计算的质心
double *ccb = NULL;
//K-Mean算法
int k_mean(s_Sample *sample, int countc, double *cc)
{
if (sample == 0)
{
return -1;
}
if (countc <= 1)
{
return -1;
}
if (cc == NULL)
{
return -1;
}
//申请内存保留上一次计算的质心
ccb = malloc(sizeof(double) * sample->countf * countc);
//循环
while (1)
{
//确定分类
for (int i = 0; i < sample->countx; i++)
{
//计算距离并确定分类
sample->y[i] = k_mean_class(&sample->x[i * sample->countf], sample->countf, countc, cc);
}
//保留上一次计算的质心
memcpy(ccb, cc, sizeof(double) * sample->countf * countc);
//质心值清空
memset(cc, 0, sizeof(double) * sample->countf * countc);
//重新计算质心
k_mean_cc(sample, countc, cc);
//判断质心是否收敛
for (int i = 0; i < countc; i++)
{
double dis = k_mean_distance(&cc[i * sample->countf], &ccb[i * sample->countf], sample->countf);
//判断质心是否收敛
if (dis > 0.001)
{
//如未收敛则继续循环
goto _label_continue;
}
}
//如果收敛则跳出循环
_label_break: break;
//如未收敛则继续循环
_label_continue: continue;
}
return 0;
}
//计算距离参考值
double k_mean_distance(double *x, double *x2, int countf)
{
double dis = 0;
for (int i = 0; i < countf; i++)
{
dis += pow(x[i] - x2[i], 2);
}
return dis;
}
//计算样本与各个聚类质心的距离,并找到距离最小的一个,确定其分类
int k_mean_class(double *x, int countf, int countc, double *cc)
{
if (x == NULL)
{
return -1;
}
if (countf <= 0)
{
return -1;
}
if (countc <= 1)
{
return -1;
}
if (cc == NULL)
{
return -1;
}
double dis_min = 99999999;
int k = 0;
//循环计算距离
for (int i = 0; i < countc; i++)
{
double dis = 0;
for (int j = 0; j < countf; j++)
{
dis += pow(x[j] - cc[i * countf + j], 2);
}
//找到最小距离,并确定分类
if (dis < dis_min)
{
dis_min = dis;
k = i;
}
}
return k;
}
//重新计算质心
int k_mean_cc(s_Sample *sample, int countc, double *cc)
{
if (sample == 0)
{
return -1;
}
if (countc <= 1)
{
return -1;
}
if (cc == NULL)
{
return -1;
}
//重新计算质心
for (int j = 0; j < countc; j++)
{
double h = 0;
for (int i = 0; i < sample->countx; i++)
{
//计算质心位置sum
if (sample->y[i] == j)
{
for (int e = 0; e < sample->countf; e++)
{
cc[j * sample->countf + e] += sample->x[i * sample->countf + e];
}
h++;
}
}
//计算质心位置sum/count
if (h > 0)
{
for (int e = 0; e < sample->countf; e++)
{
cc[j * sample->countf + e] /= h;
}
}
}
return 0;
}
样本数据如下:
60,2
2,7
4,5
7,8
1,3
8,6
2,9
3,8
8,7
2,4
4,3
6,7
1,7
9,1
2,6
3,5
3,9
4,8
8,9
5,3
2,9
8,3
1,6
4,3
7,9
3,8
6,3
1,9
1,9
5,0
5,5
14,18
18,10
10,11
16,16
16,15
10,18
17,16
10,11
10,11
19,19
13,15
17,17
16,16
16,10
15,11
12,13
16,11
12,10
19,18
13,19
19,15
13,13
16,16
19,13
15,18
14,11
18,10
16,11
13,15
18,14
运行结果:
cluster centroid 1: 4.17, 5.97
cluster centroid 2: 15.00, 14.03
2.000000 7.000000 0
4.000000 5.000000 0
7.000000 8.000000 0
1.000000 3.000000 0
8.000000 6.000000 0
2.000000 9.000000 0
3.000000 8.000000 0
8.000000 7.000000 0
2.000000 4.000000 0
4.000000 3.000000 0
6.000000 7.000000 0
1.000000 7.000000 0
9.000000 1.000000 0
2.000000 6.000000 0
3.000000 5.000000 0
3.000000 9.000000 0
4.000000 8.000000 0
8.000000 9.000000 0
5.000000 3.000000 0
2.000000 9.000000 0
8.000000 3.000000 0
1.000000 6.000000 0
4.000000 3.000000 0
7.000000 9.000000 0
3.000000 8.000000 0
6.000000 3.000000 0
1.000000 9.000000 0
1.000000 9.000000 0
5.000000 0.000000 0
5.000000 5.000000 0
14.000000 18.000000 1
18.000000 10.000000 1
10.000000 11.000000 1
16.000000 16.000000 1
16.000000 15.000000 1
10.000000 18.000000 1
17.000000 16.000000 1
10.000000 11.000000 1
10.000000 11.000000 1
19.000000 19.000000 1
13.000000 15.000000 1
17.000000 17.000000 1
16.000000 16.000000 1
16.000000 10.000000 1
15.000000 11.000000 1
12.000000 13.000000 1
16.000000 11.000000 1
12.000000 10.000000 1
19.000000 18.000000 1
13.000000 19.000000 1
19.000000 15.000000 1
13.000000 13.000000 1
16.000000 16.000000 1
19.000000 13.000000 1
15.000000 18.000000 1
14.000000 11.000000 1
18.000000 10.000000 1
16.000000 11.000000 1
13.000000 15.000000 1
18.000000 14.000000 1
Copyright © 2015-2023 问渠网 辽ICP备15013245号