typedef struct s_NBFeatureNode { //特征取值 int nbf_val; //特征值概率P(Fn|C) double nbf_pfc; //特征值出现的个数 int nbf_count; //下一个特征取值 struct s_NBFeatureNode *next; } s_NBFeatureNode; typedef struct s_NBFeature { struct s_NBFeatureNode *header; } s_NBFeature; typedef struct s_NBayes { //样本分类取值 int y; //样本分类概率 double pc; //所有特征列表 s_NBFeature *nbfeatures; } s_NBayes; s_NBayes *nbayes = NULL; int nbayes_count = 0; int nbayes_countfeature = 0; //计算特征值出现于某分类中的概率 int nb_feature_pc_insert(s_NBFeature *nbf, int x) { if (nbf == NULL) { return -1; } //如果没有取值则作为头节点 if (nbf->header == NULL) { s_NBFeatureNode *pnew = malloc(sizeof(s_NBFeatureNode)); //概率值 pnew->nbf_pfc = 0; //特征取值 pnew->nbf_val = x; //出现次数 pnew->nbf_count = 1; pnew->next = NULL; nbf->header = pnew; return 0; } s_NBFeatureNode *p = nbf->header; //如果特征值相同 if (p->nbf_val == x) { //出现次数加1 p->nbf_count++; return 0; } while (p->next != NULL) { //如果特征值相同 if (p->next->nbf_val == x) { //出现次数加1 p->next->nbf_count++; return 0; } p = p->next; } //没有找到特征值,创建新节点到链表尾 s_NBFeatureNode *pnew = malloc(sizeof(s_NBFeatureNode)); //概率值 pnew->nbf_pfc = 0; //特征值 pnew->nbf_val = x; //出现次数 pnew->nbf_count = 1; pnew->next = NULL; p->next = pnew; return 0; } //为每个分类的特征计算不同取值的概率 int nb_feature_pc(s_NBFeature *nbf, int y, int ycount, int *sample_x, int csample, int cfeature, int feature) { if (nbf == NULL) { return -1; } if (sample_x == NULL) { return -1; } //在所有训练样本中计算 for (int i = 0; i < csample; i++) { //找到分类值相同的样本 if (sample_x[i * cfeature + (cfeature - 1)] == y) { //计算此取值的个数 int x = sample_x[i * cfeature + feature]; nb_feature_pc_insert(nbf, x); } } //通过取值的个数计算概率 s_NBFeatureNode *p = nbf->header; while (p != NULL) { //计算概率 p->nbf_pfc = (double) p->nbf_count / (double) ycount; p = p->next; } return 0; } //相互贝叶斯分类器 int nb_probability(int *sample_x, int csample, int cfeature) { if (sample_x == NULL) { return -1; } //计算每一种取值有多少个样本 s_List list; list_init(&list); for (int i = 0; i < csample; i++) { list_insert(&list, sample_x[i * cfeature + (cfeature - 1)]); } s_ListNode *p = list.header; //计算出有多少种分类 while (p != NULL) { nbayes_count++; p = p->next; } //特征数 nbayes_countfeature = cfeature; //多种分类取值 nbayes = malloc(sizeof(s_NBayes) * nbayes_count); //计算每种分类概率P(C) p = list.header; for (int i = 0; i < nbayes_count && p != NULL; i++) { //分类取值 nbayes[i].y = p->key; //概率P(C)为相同取值样本数除以样本总数 nbayes[i].pc = (double) p->count / (double) csample; //构建每一个分类的特征取值概率 nbayes[i].nbfeatures = malloc(sizeof(s_NBFeature) * (cfeature - 1)); //计算此分类的每一个特征的每一种取值数及概率 for (int j = 0; j < cfeature - 1; j++) { nbayes[i].nbfeatures[j].header = NULL; nb_feature_pc(&nbayes[i].nbfeatures[j], p->key, p->count, sample_x, csample, cfeature, j); } p = p->next; } list_destroy(&list); return 0; } //释放资源内存 int nb_distory() { if (nbayes == NULL) { return -1; } for (int i = 0; i < nbayes_count; i++) { if (nbayes[i].nbfeatures != NULL) { for (int j = 0; j < nbayes_countfeature - 1; j++) { if (nbayes[i].nbfeatures[j].header != NULL) { s_NBFeatureNode *p = nbayes[i].nbfeatures[j].header; while (p != NULL) { s_NBFeatureNode *pdel = p; p = p->next; free(pdel); } } } free(nbayes[i].nbfeatures); } } free(nbayes); } //计算目标样本的概率 double nb_probability_px(int *x, int cfeature) { if (x == NULL) { return -1; } //从所有分类中找出目标分类,并计算概率 for (int i = 0; i < nbayes_count; i++) { //计算分类概率 if (x[cfeature - 1] == nbayes[i].y) { //分类概率 double px = nbayes[i].pc; //处理每一个特征 for (int j = 0; j < cfeature - 1; j++) { //处理特征的某一个取值 s_NBFeatureNode *p = nbayes[i].nbfeatures[j].header; int find = 0; while (p != NULL) { //找出特征值并计算概率 if (p->nbf_val == x[j]) { px *= p->nbf_pfc; find = 1; break; } p = p->next; } //如果没找到概率为0 if (!find) { px *= 0; } } return px; } } return -1; }
Copyright © 2015-2023 问渠网 辽ICP备15013245号