有了数学公式和SMO算法我们就可以编写出支持向量机的程序代码了,这里采用的核函数是原始核,即:内积。有兴趣的读者可以使用其它核函数,比如高斯核等等:
#define C (1.0) #define T (0.001) #define EPS (1.0E-12) //核函数——内积 double svm_kernel(int n, double* x1, double* x2) { double s = 0; for (int i = 0; i < n; i++) { s += x1[i] * x2[i]; } return s; } //平价函数 double svm_parity(s_Sample *sample, double *alpha, double b, double *x) { double s = 0; for (int i = 0; i < sample->countx; i++) { if (alpha[i] > 0) { double *x_i = &sample->x[i * sample->countf]; double y_i = sample->y[i]; s += alpha[i] * y_i * svm_kernel(sample->countf, x_i, x); } } s -= b; return s; } //寻找maximum fabs(e1-e2)的样本 int svm_alpha_choice_first(s_Sample *sample, double *alpha, double *b, int ind1, double e1, double *svm_e) { double e2 = 0; double tmax = 0; double temp = 0; int ind2 = -1; for (int i = 0; i < sample->countx; i++) { if (alpha[i] > 0 && alpha[i] < C) { e2 = svm_e[i]; temp = fabs(e1 - e2); if (temp > tmax) { tmax = temp; ind2 = i; } } } if (ind2 >= 0 && svm_smo(sample, alpha, b, ind1, ind2, svm_e)) { return 1; } return 0; } //如果没找到alpha2,那么从随机位置查找不满足条件的样本 int svm_alpha_choice_second(s_Sample *sample, double *alpha, double *b, int ind1, double *svm_e) { int t = rand() % sample->countx; int ind2 = -1; for (int i = 0; i < sample->countx; i++) { ind2 = (i + t) % sample->countx; if ((alpha[ind2] > 0 && alpha[ind2] < C) && svm_smo(sample, alpha, b, ind1, ind2, svm_e)) { return 1; } } return 0; } //如果没找到alpha2,则从随机位置查找整个样本 int svm_alpha_choice_random(s_Sample *sample, double *alpha, double *b, int ind1, double *svm_e) { int t = rand() % sample->countx; int ind2 = -1; for (int i = 0; i < sample->countx; i++) { ind2 = (i + t) % sample->countx; if (svm_smo(sample, alpha, b, ind1, ind2, svm_e)) { return 1; } } return 0; } //根据是否违背KKT条件,选择两个alpha。 int svm_alpha_choice(s_Sample *sample, double *alpha, double *b, int ind1, double *svm_e) { double y1 = sample->y[ind1]; double alpha1 = alpha[ind1]; double e1 = 0; if (alpha1 > 0 && alpha1 < C) { e1 = svm_e[ind1]; } else { //计算误差 double *x1 = &sample->x[ind1 * sample->countf]; e1 = svm_parity(sample, alpha, *b, x1) - y1; } double r1 = y1 * e1; //违反KKT条件的判断 if ((r1 > T && alpha1 > 0) || (r1 < -T && alpha1 < C)) { if (svm_alpha_choice_first(sample, alpha, b, ind1, e1, svm_e)) { return 1; } if (svm_alpha_choice_second(sample, alpha, b, ind1, svm_e)) { return 1; } if (svm_alpha_choice_random(sample, alpha, b, ind1, svm_e)) { return 1; } } return 0; } int svm_smo(s_Sample *sample, double *alpha, double *b, int ind1, int ind2, double *svm_e) { if (ind1 == ind2) { return 0; } double alpha1 = alpha[ind1]; double alpha2 = alpha[ind2]; int y1 = (int) sample->y[ind1]; int y2 = (int) sample->y[ind2]; double e1 = 0; double e2 = 0; if (alpha1 > 0 && alpha1 < C) { e1 = svm_e[ind1]; } else { double *x_1 = &sample->x[ind1 * sample->countf]; e1 = svm_parity(sample, alpha, *b, x_1) - y1; } if (alpha2 > 0 && alpha2 < C) { e2 = svm_e[ind2]; } else { double *x_2 = &sample->x[ind2 * sample->countf]; e2 = svm_parity(sample, alpha, *b, x_2) - y2; } int s = y1 * y2; double L = 0; double H = C; //计算乘子的上下限 if (s == 1) { double gamma = alpha1 + alpha2; if (gamma > C) { L = gamma - C; H = C; } else { L = 0; H = gamma; } } else { double gamma = alpha1 - alpha2; if (gamma > 0) { L = 0; H = C - gamma; } else { L = -gamma; H = C; } } if (L == H) { return 0; } //计算eta double *x_1 = &sample->x[ind1 * sample->countf]; double *x_2 = &sample->x[ind2 * sample->countf]; double k11 = svm_kernel(sample->countf, x_1, x_1); double k22 = svm_kernel(sample->countf, x_2, x_2); double k12 = svm_kernel(sample->countf, x_1, x_2); double eta = 2 * k12 - k11 - k22; //两个乘子的新值 double a1 = 0; double a2 = 0; if (eta < -T) { //计算新的alpha2 a2 = alpha2 + y2 * (e2 - e1) / eta; //调整a2,使其处于可行域 if (a2 < L) { a2 = L; } else if (a2 > H) { a2 = H; } } //分别从端点H,L求目标函数值Lobj,Hobj,然后设a2为所求得最大目标函数值 else { double c1 = eta / 2; double c2 = y2 * (e1 - e2) - eta * alpha2; double Lobj = c1 * L * L + c2 * L; double Hobj = c1 * H * H + c2 * H; if (Lobj > Hobj + EPS) { a2 = L; } else if (Hobj > Lobj + EPS) { a2 = H; } else { a2 = alpha2; } } if (fabs(a2 - alpha2) < EPS) { return 0; } //计算新的a1 a1 = alpha1 - s * (a2 - alpha2); //调整a1,使其符合条件 if (a1 < 0) { a2 += s * a1; a1 = 0; } else if (a1 > C) { a2 += s * (a1 - C); a1 = C; } //更新阀值b double b1, b2, bnew; if (a1 > 0 && a1 < C) { bnew = (*b) + e1 + y1 * (a1 - alpha1) * k11 + y2 * (a2 - alpha2) * k12; } else { if (a2 > 0 && a2 < C) { bnew = (*b) + e2 + y1 * (a1 - alpha1) * k12 + y2 * (a2 - alpha2) * k22; } else { b1 = (*b) + e1 + y1 * (a1 - alpha1) * k11 + y2 * (a2 - alpha2) * k12; b2 = (*b) + e2 + y1 * (a1 - alpha1) * k12 + y2 * (a2 - alpha2) * k22; bnew = (b1 + b2) / 2; } } double delta_b = bnew - (*b); *b = bnew; //对于线性情况,要更新权向量,这里不用了 //更新error_cache,对于更新后的a1,a2,对应的位置i1,i2的svm_e[i1] = svm_e[i2] = 0 double t1 = y1 * (a1 - alpha1); double t2 = y2 * (a2 - alpha2); for (int i = 0; i < sample->countx; i++) { if (alpha[i] > 0 && alpha[i] < C) { double *xi_1 = &sample->x[ind1 * sample->countf]; double *xi_2 = &sample->x[ind2 * sample->countf]; double *xi = &sample->x[i * sample->countf]; svm_e[i] += t1 * svm_kernel(sample->countf, xi_1, xi) + t2 * (svm_kernel(sample->countf, xi_2, xi)) - delta_b; } } svm_e[ind1] = 0; svm_e[ind2] = 0; alpha[ind1] = a1; alpha[ind2] = a2; return 1; } void svm(s_Sample *sample) { srand((int) time(0)); double alpha[sample->countx]; double svm_e[sample->countx]; memset(alpha, 0, sizeof(alpha)); memset(svm_e, 0, sizeof(svm_e)); double b = 0; int num = 0; int exam = 1; while (num > 0 || exam == 1) { num = 0; if (exam == 1) { for (int i = 0; i < sample->countx; i++) { //检查所有样本 num += svm_alpha_choice(sample, alpha, &b, i, svm_e); } } else { for (int i = 0; i < sample->countx; i++) { if (alpha[i] != 0 && alpha[i] != C) { //寻找所有非边界样本的lagrange乘子 num += svm_alpha_choice(sample, alpha, &b, i, svm_e); } } } if (exam == 1) { exam = 0; } else if (!num) { exam = 1; } } for (int i = 0; i < sample->countx; i++) { if (alpha[i] > 0 && alpha[i] <= C) { printf("alpha%d = %lf\n", i, alpha[i]); } } double x[2][2] = { { 0, 0 }, { 5, 5 } }; double y[2] = { -1, 1 }; double y0 = svm_parity(sample, alpha, b, x[0]); double y1 = svm_parity(sample, alpha, b, x[1]); printf("%lf %lf\n", y0, y1); }
我们简单的给程序提供几个训练数据(当然,在实际使用过程中我们的训练数据可能非常多,我们还需要优化程序的计算性能):
8,3 0,0,-1 0,2,-1 1,1,-1 2,0,-1 1,3,1 2,2,1 3,1,1 3,3,1
运行结果:
alpha1 = 0.420000 alpha2 = 0.200000 alpha3 = 0.380000 alpha4 = 0.400000 alpha5 = 0.240000 alpha6 = 0.360000 -3 7
通过判断计算结果的符号可以得出其分类,即在超平面两侧(如果我们采用了非原始核函数,此超平面可能不是平面越直线)。
Copyright © 2015-2023 问渠网 辽ICP备15013245号