机器学习笔记

    返回首页    发表留言
本文作者:李德强
          第二节 程序实现
 
 

        K-Mean算法相对简单,下面给出具体的程序实现与运行结果:

//保留上一次计算的质心
double *ccb = NULL;

//K-Mean算法
int k_mean(s_Sample *sample, int countc, double *cc)
{
    if (sample == 0)
    {
        return -1;
    }

    if (countc <= 1)
    {
        return -1;
    }

    if (cc == NULL)
    {
        return -1;
    }

    //申请内存保留上一次计算的质心
    ccb = malloc(sizeof(double) * sample->countf * countc);

    //循环
    while (1)
    {
        //确定分类
        for (int i = 0; i < sample->countx; i++)
        {
            //计算距离并确定分类
            sample->y[i] = k_mean_class(&sample->x[i * sample->countf], sample->countf, countc, cc);
        }

        //保留上一次计算的质心
        memcpy(ccb, cc, sizeof(double) * sample->countf * countc);
        //质心值清空
        memset(cc, 0, sizeof(double) * sample->countf * countc);
        //重新计算质心
        k_mean_cc(sample, countc, cc);

        //判断质心是否收敛
        for (int i = 0; i < countc; i++)
        {
            double dis = k_mean_distance(&cc[i * sample->countf], &ccb[i * sample->countf], sample->countf);
            //判断质心是否收敛
            if (dis > 0.001)
            {
                //如未收敛则继续循环
                goto _label_continue;
            }
        }

        //如果收敛则跳出循环
        _label_break: break;
        //如未收敛则继续循环
        _label_continue: continue;
    }

    return 0;
}

//计算距离参考值
double k_mean_distance(double *x, double *x2, int countf)
{
    double dis = 0;
    for (int i = 0; i < countf; i++)
    {
        dis += pow(x[i] - x2[i], 2);
    }
    return dis;
}

//计算样本与各个聚类质心的距离,并找到距离最小的一个,确定其分类
int k_mean_class(double *x, int countf, int countc, double *cc)
{
    if (x == NULL)
    {
        return -1;
    }

    if (countf <= 0)
    {
        return -1;
    }

    if (countc <= 1)
    {
        return -1;
    }

    if (cc == NULL)
    {
        return -1;
    }

    double dis_min = 99999999;
    int k = 0;
    //循环计算距离
    for (int i = 0; i < countc; i++)
    {
        double dis = 0;
        for (int j = 0; j < countf; j++)
        {
            dis += pow(x[j] - cc[i * countf + j], 2);
        }
        //找到最小距离,并确定分类
        if (dis < dis_min)
        {
            dis_min = dis;
            k = i;
        }
    }

    return k;
}

//重新计算质心
int k_mean_cc(s_Sample *sample, int countc, double *cc)
{
    if (sample == 0)
    {
        return -1;
    }

    if (countc <= 1)
    {
        return -1;
    }

    if (cc == NULL)
    {
        return -1;
    }

    //重新计算质心
    for (int j = 0; j < countc; j++)
    {
        double h = 0;
        for (int i = 0; i < sample->countx; i++)
        {
            //计算质心位置sum
            if (sample->y[i] == j)
            {
                for (int e = 0; e < sample->countf; e++)
                {
                    cc[j * sample->countf + e] += sample->x[i * sample->countf + e];
                }
                h++;
            }
        }
        //计算质心位置sum/count
        if (h > 0)
        {
            for (int e = 0; e < sample->countf; e++)
            {
                cc[j * sample->countf + e] /= h;
            }
        }
    }

    return 0;
}

        样本数据如下:

60,2
2,7
4,5
7,8
1,3
8,6
2,9
3,8
8,7
2,4
4,3
6,7
1,7
9,1
2,6
3,5
3,9
4,8
8,9
5,3
2,9
8,3
1,6
4,3
7,9
3,8
6,3
1,9
1,9
5,0
5,5
14,18
18,10
10,11
16,16
16,15
10,18
17,16
10,11
10,11
19,19
13,15
17,17
16,16
16,10
15,11
12,13
16,11
12,10
19,18
13,19
19,15
13,13
16,16
19,13
15,18
14,11
18,10
16,11
13,15
18,14

        运行结果:

cluster centroid 1: 4.17, 5.97
cluster centroid 2: 15.00, 14.03
2.000000 7.000000 0
4.000000 5.000000 0
7.000000 8.000000 0
1.000000 3.000000 0
8.000000 6.000000 0
2.000000 9.000000 0
3.000000 8.000000 0
8.000000 7.000000 0
2.000000 4.000000 0
4.000000 3.000000 0
6.000000 7.000000 0
1.000000 7.000000 0
9.000000 1.000000 0
2.000000 6.000000 0
3.000000 5.000000 0
3.000000 9.000000 0
4.000000 8.000000 0
8.000000 9.000000 0
5.000000 3.000000 0
2.000000 9.000000 0
8.000000 3.000000 0
1.000000 6.000000 0
4.000000 3.000000 0
7.000000 9.000000 0
3.000000 8.000000 0
6.000000 3.000000 0
1.000000 9.000000 0
1.000000 9.000000 0
5.000000 0.000000 0
5.000000 5.000000 0
14.000000 18.000000 1
18.000000 10.000000 1
10.000000 11.000000 1
16.000000 16.000000 1
16.000000 15.000000 1
10.000000 18.000000 1
17.000000 16.000000 1
10.000000 11.000000 1
10.000000 11.000000 1
19.000000 19.000000 1
13.000000 15.000000 1
17.000000 17.000000 1
16.000000 16.000000 1
16.000000 10.000000 1
15.000000 11.000000 1
12.000000 13.000000 1
16.000000 11.000000 1
12.000000 10.000000 1
19.000000 18.000000 1
13.000000 19.000000 1
19.000000 15.000000 1
13.000000 13.000000 1
16.000000 16.000000 1
19.000000 13.000000 1
15.000000 18.000000 1
14.000000 11.000000 1
18.000000 10.000000 1
16.000000 11.000000 1
13.000000 15.000000 1
18.000000 14.000000 1

 

    返回首页    返回顶部
  看不清?点击刷新

 

  Copyright © 2015-2018 问渠网 辽ICP备15013245号