机器学习笔记

    返回首页    发表留言
本文作者:李德强
          第三节 程序实现
 
 
typedef struct s_NBFeatureNode
{
	//特征取值
	int nbf_val;
	//特征值概率P(Fn|C)
	double nbf_pfc;
	//特征值出现的个数
	int nbf_count;
	//下一个特征取值
	struct s_NBFeatureNode *next;
} s_NBFeatureNode;

typedef struct s_NBFeature
{
	struct s_NBFeatureNode *header;
} s_NBFeature;

typedef struct s_NBayes
{
	//样本分类取值
	int y;
	//样本分类概率
	double pc;
	//所有特征列表
	s_NBFeature *nbfeatures;

} s_NBayes;


s_NBayes *nbayes = NULL;
int nbayes_count = 0;
int nbayes_countfeature = 0;

//计算特征值出现于某分类中的概率
int nb_feature_pc_insert(s_NBFeature *nbf, int x)
{
	if (nbf == NULL)
	{
		return -1;
	}

	//如果没有取值则作为头节点
	if (nbf->header == NULL)
	{
		s_NBFeatureNode *pnew = malloc(sizeof(s_NBFeatureNode));
		//概率值
		pnew->nbf_pfc = 0;
		//特征取值
		pnew->nbf_val = x;
		//出现次数
		pnew->nbf_count = 1;
		pnew->next = NULL;
		nbf->header = pnew;
		return 0;
	}

	s_NBFeatureNode *p = nbf->header;
	//如果特征值相同
	if (p->nbf_val == x)
	{
		//出现次数加1
		p->nbf_count++;
		return 0;
	}

	while (p->next != NULL)
	{
		//如果特征值相同
		if (p->next->nbf_val == x)
		{
			//出现次数加1
			p->next->nbf_count++;
			return 0;
		}
		p = p->next;
	}

	//没有找到特征值,创建新节点到链表尾
	s_NBFeatureNode *pnew = malloc(sizeof(s_NBFeatureNode));
	//概率值
	pnew->nbf_pfc = 0;
	//特征值
	pnew->nbf_val = x;
	//出现次数
	pnew->nbf_count = 1;
	pnew->next = NULL;
	p->next = pnew;

	return 0;
}

//为每个分类的特征计算不同取值的概率
int nb_feature_pc(s_NBFeature *nbf, int y, int ycount, int *sample_x, int csample, int cfeature, int feature)
{
	if (nbf == NULL)
	{
		return -1;
	}

	if (sample_x == NULL)
	{
		return -1;
	}

	//在所有训练样本中计算
	for (int i = 0; i < csample; i++)
	{
		//找到分类值相同的样本
		if (sample_x[i * cfeature + (cfeature - 1)] == y)
		{
			//计算此取值的个数
			int x = sample_x[i * cfeature + feature];
			nb_feature_pc_insert(nbf, x);
		}

	}

	//通过取值的个数计算概率
	s_NBFeatureNode *p = nbf->header;
	while (p != NULL)
	{
		//计算概率
		p->nbf_pfc = (double) p->nbf_count / (double) ycount;
		p = p->next;
	}
	return 0;
}

//相互贝叶斯分类器
int nb_probability(int *sample_x, int csample, int cfeature)
{
	if (sample_x == NULL)
	{
		return -1;
	}

	//计算每一种取值有多少个样本
	s_List list;
	list_init(&list);
	for (int i = 0; i < csample; i++)
	{
		list_insert(&list, sample_x[i * cfeature + (cfeature - 1)]);
	}

	s_ListNode *p = list.header;
	//计算出有多少种分类
	while (p != NULL)
	{
		nbayes_count++;
		p = p->next;
	}

	//特征数
	nbayes_countfeature = cfeature;
	//多种分类取值
	nbayes = malloc(sizeof(s_NBayes) * nbayes_count);
	//计算每种分类概率P(C)
	p = list.header;
	for (int i = 0; i < nbayes_count && p != NULL; i++)
	{
		//分类取值
		nbayes[i].y = p->key;
		//概率P(C)为相同取值样本数除以样本总数
		nbayes[i].pc = (double) p->count / (double) csample;
		//构建每一个分类的特征取值概率
		nbayes[i].nbfeatures = malloc(sizeof(s_NBFeature) * (cfeature - 1));

		//计算此分类的每一个特征的每一种取值数及概率
		for (int j = 0; j < cfeature - 1; j++)
		{
			nbayes[i].nbfeatures[j].header = NULL;
			nb_feature_pc(&nbayes[i].nbfeatures[j], p->key, p->count, sample_x, csample, cfeature, j);
		}

		p = p->next;
	}

	list_destroy(&list);

	return 0;
}

//释放资源内存
int nb_distory()
{
	if (nbayes == NULL)
	{
		return -1;
	}

	for (int i = 0; i < nbayes_count; i++)
	{
		if (nbayes[i].nbfeatures != NULL)
		{
			for (int j = 0; j < nbayes_countfeature - 1; j++)
			{
				if (nbayes[i].nbfeatures[j].header != NULL)
				{
					s_NBFeatureNode *p = nbayes[i].nbfeatures[j].header;
					while (p != NULL)
					{
						s_NBFeatureNode *pdel = p;
						p = p->next;
						free(pdel);
					}
				}
			}
			free(nbayes[i].nbfeatures);
		}
	}
	free(nbayes);
}

//计算目标样本的概率
double nb_probability_px(int *x, int cfeature)
{
	if (x == NULL)
	{
		return -1;
	}

	//从所有分类中找出目标分类,并计算概率
	for (int i = 0; i < nbayes_count; i++)
	{
		//计算分类概率
		if (x[cfeature - 1] == nbayes[i].y)
		{
			//分类概率
			double px = nbayes[i].pc;
			//处理每一个特征
			for (int j = 0; j < cfeature - 1; j++)
			{
				//处理特征的某一个取值
				s_NBFeatureNode *p = nbayes[i].nbfeatures[j].header;
				int find = 0;
				while (p != NULL)
				{
					//找出特征值并计算概率
					if (p->nbf_val == x[j])
					{
						px *= p->nbf_pfc;
						find = 1;
						break;
					}
					p = p->next;
				}
				//如果没找到概率为0
				if (!find)
				{
					px *= 0;
				}
			}
			return px;
		}
	}

	return -1;
}

 

    返回首页    返回顶部
  看不清?点击刷新

 

  Copyright © 2015-2018 问渠网 辽ICP备15013245号