PCA算法本身并不复杂:
int main(int argc, char* argv[]) { //读取训练样本 s_Sample* sample = sample_load("data/data2.txt"); printf("原始样本:\n"); sample_display(sample); //定义中间变量 s_Matrix mcov; s_Matrix eigen_values; s_Matrix eigen_vectors; s_Matrix eigen_vectors_sub; s_Matrix mat_sample; s_Matrix mat_sample_avg; s_Matrix mat_result; matrix_init(&mat_sample, sample->countx, sample->countf); matrix_init(&mat_sample_avg, sample->countx, sample->countf); matrix_init(&mcov, sample->countf, sample->countf); //构造样本矩阵 for (int i = 0; i < sample->countx; i++) { for (int j = 0; j < sample->countf; j++) { mat_sample.v[i * mat_sample.n + j] = sample->x[i * sample->countf + j]; } } printf("样本矩阵:\n"); matrix_display(&mat_sample); //计算样本平均值 pca_avg(&mat_sample_avg, &mat_sample); printf("样本减去平均值:\n"); matrix_display(&mat_sample_avg); //计算协方差矩阵 pca_cov_matrix(&mcov, &mat_sample_avg); printf("协方差矩阵:\n"); matrix_display(&mcov); //计算特征值 matrix_eigen_values(&mcov, &eigen_values); matrix_shell_sort(eigen_values.v, eigen_values.m * eigen_values.n, 1); printf("特征值降序:\n"); matrix_display(&eigen_values); //计算特征向量 matrix_eigen_vectors(&eigen_vectors, &mcov, &eigen_values); printf("特征向量:\n"); matrix_display(&eigen_vectors); //选取最大特征值对应的特征向量(此处选取的列数为原列数减1) matrix_init(&eigen_vectors_sub, eigen_vectors.m, eigen_vectors.n - 1); for (int i = 0; i < eigen_vectors_sub.m; i++) { for (int j = 0; j < eigen_vectors_sub.n; j++) { eigen_vectors_sub.v[i * eigen_vectors_sub.n + j] = eigen_vectors.v[i * eigen_vectors.n + j]; } } printf("选取特征向量:\n"); matrix_display(&eigen_vectors_sub); //矩阵相乘 matrix_init(&mat_result, sample->countx, eigen_vectors_sub.n); matrix_mult(&mat_result, &mat_sample_avg, &eigen_vectors_sub); printf("处理后样本矩阵:\n"); matrix_display(&mat_result); //释放内存 matrix_destory(&mat_result); matrix_destory(&mat_sample); matrix_destory(&mat_sample_avg); matrix_destory(&eigen_vectors_sub); matrix_destory(&eigen_vectors); matrix_destory(&eigen_values); matrix_destory(&mcov); sample_free(sample); return 0; }
运行结果:
原始样本:
-2.61 -2.75 -2.77
-0.05 -0.36 -0.14
+4.91 +4.94 +4.53
-1.39 -1.01 -1.11
+3.84 +3.59 +3.72
+2.25 +1.74 +1.95
+0.37 +0.38 -0.04
-2.44 -2.78 -2.85
+1.70 +1.25 +1.46
+2.69 +2.42 +2.72
+4.49 +3.87 +4.33
+3.69 +4.29 +3.98
+2.98 +2.83 +3.26
-0.11 -0.32 -0.35
+1.56 +1.64 +1.59
-0.62 -0.95 -0.79
+3.62 +4.24 +4.02
-3.61 -3.69 -3.64
-3.70 -3.27 -3.19
-3.38 -2.70 -3.10
样本减去平均值:
-3.32 -3.42 -3.45
-0.76 -1.03 -0.82
+4.20 +4.27 +3.85
-2.10 -1.68 -1.79
+3.13 +2.92 +3.04
+1.54 +1.07 +1.27
-0.34 -0.29 -0.72
-3.15 -3.45 -3.53
+0.99 +0.58 +0.78
+1.98 +1.75 +2.04
+3.78 +3.20 +3.65
+2.98 +3.62 +3.30
+2.27 +2.16 +2.58
-0.82 -0.99 -1.03
+0.85 +0.97 +0.91
-1.33 -1.62 -1.47
+2.91 +3.57 +3.34
-4.32 -4.36 -4.32
-4.41 -3.94 -3.87
-4.09 -3.37 -3.78
协方差矩阵:
+8.19 +7.97 +8.09
+7.97 +7.90 +7.95
+8.09 +7.95 +8.06
特征值降序:
+24.05
+0.08
+0.02
特征向量:
+0.58 -0.72 -0.37
+0.57 +0.69 -0.45
+0.58 +0.05 +0.81
选取特征向量:
+0.58 -0.72
+0.57 +0.69
+0.58 +0.05
处理后样本矩阵:
-5.88 -0.12
-1.50 -0.20
+7.11 +0.08
-3.22 +0.28
+5.25 -0.11
+2.24 -0.32
-0.78 +0.01
-5.85 -0.26
+1.36 -0.28
+3.33 -0.13
+6.14 -0.36
+5.71 +0.49
+4.05 -0.03
-1.64 -0.14
+1.58 +0.10
-2.55 -0.22
+5.67 +0.51
-7.50 -0.08
-7.06 +0.30
-6.49 +0.46
本节的程序涉及了矩阵的相关计算、特征值和特征向量的实现。最后附上矩阵计算的相关算法,有兴趣的读者可以参考:
//初始化矩阵 int matrix_init(s_Matrix* matrix, int m, int n) { if (matrix == NULL) { return -1; } if (m <= 0 || n <= 0) { return -1; } //初始化矩阵大小 matrix->m = m; matrix->n = n; //申请存放矩阵数据的内存空间 matrix->v = malloc(sizeof(num) * matrix->m * matrix->n); if (matrix->v == NULL) { return -1; } //将矩阵中所有的元素置为0 matrix_zero(matrix); return 0; } //销毁矩阵 int matrix_destory(s_Matrix* matrix) { if (matrix == NULL) { return -1; } //释放矩阵元素占用的内存空间 if (matrix->v != NULL) { free(matrix->v); } //清除矩阵大小 matrix->m = 0; matrix->n = 0; return 0; } //置空矩阵 int matrix_zero(s_Matrix* matrix) { if (matrix == NULL) { return -1; } if (matrix->v == NULL) { return -1; } if (matrix->m <= 0 || matrix->n <= 0) { return -1; } //将所有元素的值置为0 for (int i = 0; i < matrix->m; i++) { for (int j = 0; j < matrix->n; j++) { matrix->v[i * matrix->n + j] = 0; } } return 0; } //矩阵相加 int matrix_add(s_Matrix* result, s_Matrix* src, s_Matrix* src1) { if (result == NULL) { return -1; } if (result->v == NULL) { return -1; } if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } if (src1 == NULL) { return -1; } if (src1->v == NULL) { return -1; } if (src1->m <= 0 || src1->n <= 0) { return -1; } //矩阵加法行列数必须相等 if (src->m != src1->m || src->n != src1->n) { return -1; } //循环数组 for (int i = 0; i < src->m; i++) { for (int j = 0; j < src->n; j++) { //元素相加,结果存入result矩阵中 result->v[i * src->n + j] = src->v[i * src->n + j] + src1->v[i * src->n + j]; } } //结果矩阵与相加矩阵大小相等 result->m = src->m; result->n = src->n; return 0; } //矩阵加上一个数 int matrix_add_num(s_Matrix* result, s_Matrix* src, num v) { if (result == NULL) { return -1; } if (result->v == NULL) { return -1; } if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } //每一个元素都加上这个数 for (int i = 0; i < src->m; i++) { for (int j = 0; j < src->n; j++) { result->v[i * src->n + j] = src->v[i * src->n + j] + v; } } result->m = src->m; result->n = src->n; return 0; } int matrix_add_num_slef(s_Matrix* self, num v) { if (self == NULL) { return -1; } if (self->v == NULL) { return -1; } if (self->m <= 0 || self->n <= 0) { return -1; } for (int i = 0; i < self->m; i++) { for (int j = 0; j < self->n; j++) { self->v[i * self->n + j] += v; } } return 0; } //矩阵相乘 int matrix_mult(s_Matrix* result, s_Matrix* src, s_Matrix* src1) { if (result == NULL) { return -1; } if (result->v == NULL) { return -1; } if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } if (src1 == NULL) { return -1; } if (src1->v == NULL) { return -1; } if (src1->m <= 0 || src1->n <= 0) { return -1; } //只有当第一个矩阵(左侧矩阵)的列数等于第二个矩阵(右侧矩阵)的行数时,两个矩阵才能相乘 if (src->n != src1->m) { return -1; } //结果矩阵的行数为第一个矩阵的行数 result->m = src->m; //结果矩阵的列数为第二个矩阵的列数 result->n = src1->n; //循环每行 for (int i = 0; i < result->m; i++) { //循环每列 for (int j = 0; j < result->n; j++) { //计算第一个矩阵与第二个矩阵的乘法结果的累加和 num v = 0; for (int k = 0; k < src->n; k++) { //乘法结果的累加和 v += src->v[i * src->n + k] * src1->v[k * src1->n + j]; } //得到结果 result->v[i * result->n + j] = v; } } return 0; } //转置矩阵 int matrix_transposition(s_Matrix* result, s_Matrix* src) { if (result == NULL) { return -1; } if (result->v == NULL) { return -1; } if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } //行数等于原列数 result->m = src->n; //列数等于原行数 result->n = src->m; //循环所有元素 for (int i = 0; i < src->m; i++) { for (int j = 0; j < src->n; j++) { //列号变列号,列号变行号 result->v[j * result->n + i] = src->v[i * src->n + j]; } } return 0; } int matrix_mult_num(s_Matrix* result, s_Matrix* src, num v) { if (result == NULL) { return -1; } if (result->v == NULL) { return -1; } if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } result->m = src->m; result->n = src->n; for (int i = 0; i < src->m; i++) { for (int j = 0; j < src->n; j++) { result->v[i * result->n + j] = src->v[i * src->n + j] * v; } } return 0; } int matrix_mult_num_self(s_Matrix* self, num v) { if (self == NULL) { return -1; } if (self->v == NULL) { return -1; } if (self->m <= 0 || self->n <= 0) { return -1; } for (int i = 0; i < self->m; i++) { for (int j = 0; j < self->n; j++) { self->v[i * self->n + j] *= v; } } return 0; } //计算行列式,直接计算法 int matrix_deter(num* result, s_Matrix* src) { if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } //行列式的行数与列数必须相同 if (src->m != src->n) { return -1; } if (result == NULL) { return -1; } if (src->m == 1) { *result = src->v[0]; return 0; } //如果是2阶行列式 if (src->m == 2) { //直接计算 *result = src->v[0] * src->v[3] - src->v[1] * src->v[2]; return 0; } num s = 0; for (int i = 0; i < src->m; i++) { num s1 = 1; for (int j = 0, k = i; j < src->m; j++, k++) { int ind = k * src->m + k; s1 *= src->v[ind % src->m]; } s += s1; } for (int i = 0; i < src->m; i++) { num s1 = 1; for (int j = 0, k = i + src->m; j < src->m; j++, k--) { int ind = k * src->m + k; s1 *= src->v[ind % src->m]; } s -= s1; } return s; } //计算行列式,伴随矩阵法 int matrix_determinant(num* result, s_Matrix* src) { if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } //行列式的行数与列数必须相同 if (src->m != src->n) { return -1; } if (result == NULL) { return -1; } if (src->m == 1) { *result = src->v[0]; return 0; } //如果是2阶行列式 if (src->m == 2) { //直接计算 *result = src->v[0] * src->v[3] - src->v[1] * src->v[2]; return 0; } num s = 0; //计算其中一行 for (int k = 0; k < src->m; k++) { num det = 0; s_Matrix cofactor; matrix_init(&cofactor, src->m - 1, src->n - 1); //计算代数余子式 for (int i = 1, ci = 0; i < src->m; i++, ci++) { for (int j = 0, cj = 0; j < src->n; j++) { if (j != k) { cofactor.v[ci * cofactor.n + cj] = src->v[i * src->n + j]; cj++; } } } //递归计算代数余子式 matrix_determinant(&det, &cofactor); matrix_destory(&cofactor); //当前元素乘以其代数余子式的累加和 s += src->v[k] * pow(-1, 0 + k) * det; } //返回行列式的值 *result = s; return 0; } //伴随矩阵 int matrix_adjoint(s_Matrix* result, s_Matrix* src) { if (result == NULL) { return -1; } if (result->v == NULL) { return -1; } if (result->m <= 0 || result->n <= 0) { return -1; } if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } //必须是方阵 if (src->m != src->n) { return -1; } result->m = src->m; result->n = src->n; if (src->m == 1) { result->v[0] = 1; return 0; } int cm = src->m - 1; int cn = src->n - 1; s_Matrix cofactor; matrix_init(&cofactor, cm, cn); //计算每一个元素的代数余子式并作为这个元素所在位置的值 for (int i = 0; i < src->m; i++) { for (int j = 0; j < src->n; j++) { for (int k = 0, ck = 0; k < src->m; k++) { //抹去第i行 if (k != i) { for (int l = 0, cl = 0; l < src->n; l++) { //抹去第j列 if (l != j) { //计算值 cofactor.v[ck * cofactor.n + cl] = src->v[k * src->n + l]; cl++; } } ck++; } } num det = 0; //计算代数余子式 matrix_determinant(&det, &cofactor); //用代数余子式做为新元素 result->v[j * result->n + i] = pow(-1, i + j) * det; } } matrix_destory(&cofactor); return 0; } int matrix_inv(s_Matrix* result, s_Matrix* src) { if (result == NULL) { return -1; } if (result->v == NULL) { return -1; } if (result->m <= 0 || result->n <= 0) { return -1; } if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } if (src->m != src->n) { return -1; } num det = 0; matrix_determinant(&det, src); if (det == 0) { return -1; } s_Matrix adjoint; matrix_init(&adjoint, src->m, src->n); matrix_adjoint(&adjoint, src); matrix_mult_num(result, &adjoint, 1.0 / det); matrix_destory(&adjoint); return 0; } //逆矩阵 int matrix_inverse(s_Matrix* result, s_Matrix* src) { if (src == NULL) { return -1; } if (src->v == NULL) { return -1; } if (src->m <= 0 || src->n <= 0) { return -1; } if (src->m != src->n) { return -1; } if (result == NULL) { return -1; } if (result->v == NULL) { return -1; } if (result->m <= 0 || result->n <= 0) { return -1; } //必须是方阵 if (result->m != src->n) { return -1; } //一阶方阵 if (src->m == 1) { result->v[0] = 1.0 / src->v[0]; return 0; } int err = 0; int n = src->m; s_Matrix temp; matrix_init(&temp, n, n * 2); //(A, E),矩阵大小由m,n变为m,2n for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { temp.v[i * (n * 2) + j] = src->v[i * n + j]; } temp.v[i * (n * 2) + (i + n)] = 1; } //对首行做初等变换 for (int i = 0; i < n; i++) { for (int row = 0; row < n; row++) { //将当前对角线上的元素值保留,其它行上的值为0 if (row != i) { if (temp.v[i * (n * 2) + i] == 0) { err = 1; goto _label_inf; } //计算初等变换参数 num a = -temp.v[row * (n * 2) + i] / temp.v[i * (n * 2) + i]; //初等变换 for (int j = i; j < (n * 2); j++) { temp.v[row * (n * 2) + j] += temp.v[i * (n * 2) + j] * a; } } } } //除以对角线的值,将当前对角线上的元素变为1 for (int i = 0; i < n; i++) { num a = temp.v[i * (n * 2) + i]; if (a == 0) { err = 1; goto _label_inf; } //将当前对角线上的元素变为1 for (int j = i; j < (n * 2); j++) { temp.v[i * (n * 2) + j] /= a; } } //取得逆矩阵 for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { result->v[i * n + j] = temp.v[i * (n * 2) + (j + n)]; } } _label_inf:; matrix_destory(&temp); //如果初等变换失败 if (err) { //采用伴随矩阵法递归计算 return matrix_inv(result, src); } return 0; } //显示矩阵内容 int matrix_display(s_Matrix* matrix) { if (matrix == NULL) { return -1; } if (matrix->v == NULL) { return -1; } if (matrix->m <= 0 || matrix->n <= 0) { return -1; } //显示所有元素值 for (int i = 0; i < matrix->m; i++) { for (int j = 0; j < matrix->n; j++) { printf("%+e\t\t", matrix->v[i * matrix->n + j]); } printf("\n"); } printf("\n"); return 0; } //求A矩阵的2阶范数 num matrix_norm2(s_Matrix* matrix) { int size = matrix->m * matrix->n; num norm = 0; for (int i = 0; i < size; i++) { norm += (matrix->v[i]) * (matrix->v[i]); } return sqrt(norm); } //将A分解为Q和R int matrix_QR(s_Matrix* A, s_Matrix* Q, s_Matrix* R) { if (A == NULL) { return -1; } if (Q == NULL) { return -1; } if (R == NULL) { return -1; } if (A->m != A->n) { return -1; } num temp; s_Matrix a, b; matrix_init(&a, A->m, A->n); matrix_init(&b, A->m, A->n); matrix_init(Q, A->m, A->n); matrix_init(R, A->m, A->n); for (int j = 0; j < A->m; j++) { for (int i = 0; i < A->m; i++) { a.v[i] = b.v[i] = A->v[i * A->n + j]; } for (int k = 0; k < j; k++) { R->v[k * R->n + j] = 0; for (int m = 0; m < A->m; m++) { R->v[k * R->n + j] += a.v[m] * Q->v[m * Q->n + k]; } for (int m = 0; m < A->m; m++) { b.v[m] -= R->v[k * R->n + j] * Q->v[m * Q->n + k]; } } temp = matrix_norm2(&b); R->v[j * R->n + j] = temp; for (int i = 0; i < A->m; i++) { Q->v[i * Q->n + j] = b.v[i] / temp; } } matrix_destory(&a); matrix_destory(&b); } //复制矩阵 int matrix_copy(s_Matrix* tar, s_Matrix* src) { if (tar == NULL) { return -1; } if (src == NULL) { return -1; } matrix_init(tar, src->m, src->n); for (int i = 0; i < src->m * src->n; i++) { tar->v[i] = src->v[i]; } return 0; } //计算特征值 int matrix_eigen_values(s_Matrix* mcov, s_Matrix* eigen_values) { if (mcov == NULL) { return -1; } if (eigen_values == NULL) { return -1; } //迭代次数 int times = 50; s_Matrix temp; s_Matrix R; s_Matrix Q; matrix_init(eigen_values, mcov->m, 1); matrix_init(&R, mcov->m, mcov->n); matrix_init(&Q, mcov->m, mcov->n); matrix_copy(&temp, mcov); //使用QR分解求矩阵特征值 for (int k = 0; k < times; k++) { matrix_QR(&temp, &Q, &R); matrix_mult(&temp, &R, &Q); } //获取特征值,将之存储于eValue for (int k = 0; k < temp.n; k++) { eigen_values->v[k] = temp.v[k * temp.n + k]; } matrix_destory(&temp); matrix_destory(&R); matrix_destory(&Q); } //子表插入排序 void matrix_shell_insert(num* array, int size, int i, int m, int type) { // 子表循环 for (int j = i + m; j < size; j += m) { // 找到插入位置 if ((type == 0 && array[j] < array[j - m]) || (type == 1 && array[j] > array[j - m])) { // 互换 num t = array[j]; int k = j - m; // 后移 for (; k >= 0 && array[k] > t; k -= m) { array[k + m] = array[k]; } array[k + m] = t; } } } //希尔排序 void matrix_shell_sort(num* array, int size, int type) { // 半步长递增 for (int i = size / 2; i > 0; i /= 2) { // 执行size / 2趟子表插入排序 for (int j = 0; j < i; j++) { matrix_shell_insert(array, size, j, i, type); } } } //由特征值计算特征向量 void matrix_eigen_vectors(s_Matrix* eigen_vectors, s_Matrix* A, s_Matrix* eigen_values) { int count = 0; num eValue, sum, midSum, mid; s_Matrix temp; matrix_init(eigen_vectors, A->m, A->n); matrix_init(&temp, A->m, A->n); for (count = 0; count < A->m; count++) { //计算特征值,求解特征向量时的系数矩阵 eValue = eigen_values->v[count]; matrix_copy(&temp, A); for (int i = 0; i < temp.n; ++i) { temp.v[i * temp.n + i] -= eValue; } //经过初等变换,将temp化为阶梯型矩阵 for (int i = 0; i < temp.m - 1; i++) { mid = temp.v[i * temp.n + i]; for (int j = i; j < temp.n; j++) { temp.v[i * temp.n + j] /= mid; } for (int j = i + 1; j < temp.m; j++) { mid = temp.v[j * temp.n + i]; for (int q = i; q < temp.n; q++) { temp.v[j * temp.n + q] -= mid * temp.v[i * temp.n + q]; } } } midSum = eigen_vectors->v[(eigen_vectors->m - 1) * eigen_vectors->n + count] = 1; for (int m = temp.m - 2; m >= 0; m--) { sum = 0; for (int j = m + 1; j < temp.n; j++) { sum += temp.v[m * temp.n + j] * eigen_vectors->v[j * eigen_vectors->n + count]; } sum = -sum / temp.v[m * temp.n + m]; midSum += sum * sum; eigen_vectors->v[m * eigen_vectors->n + count] = sum; } midSum = sqrt(midSum); for (int i = 0; i < eigen_vectors->m; ++i) { eigen_vectors->v[i * eigen_vectors->n + count] /= midSum; } } matrix_destory(&temp); }
Copyright © 2015-2023 问渠网 辽ICP备15013245号