You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

242 lines
6.2KB

  1. /*
  2. * Principal component analysis
  3. * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. *
  19. */
  20. /**
  21. * @file pca.c
  22. * Principal component analysis
  23. */
  24. #include "common.h"
  25. #include "pca.h"
  26. typedef struct PCA{
  27. int count;
  28. int n;
  29. double *covariance;
  30. double *mean;
  31. }PCA;
  32. PCA *ff_pca_init(int n){
  33. PCA *pca;
  34. if(n<=0)
  35. return NULL;
  36. pca= av_mallocz(sizeof(PCA));
  37. pca->n= n;
  38. pca->count=0;
  39. pca->covariance= av_mallocz(sizeof(double)*n*n);
  40. pca->mean= av_mallocz(sizeof(double)*n);
  41. return pca;
  42. }
  43. void ff_pca_free(PCA *pca){
  44. av_freep(&pca->covariance);
  45. av_freep(&pca->mean);
  46. av_free(pca);
  47. }
  48. void ff_pca_add(PCA *pca, double *v){
  49. int i, j;
  50. const int n= pca->n;
  51. for(i=0; i<n; i++){
  52. pca->mean[i] += v[i];
  53. for(j=i; j<n; j++)
  54. pca->covariance[j + i*n] += v[i]*v[j];
  55. }
  56. pca->count++;
  57. }
  58. int ff_pca(PCA *pca, double *eigenvector, double *eigenvalue){
  59. int i, j, k, pass;
  60. const int n= pca->n;
  61. double z[n];
  62. memset(eigenvector, 0, sizeof(double)*n*n);
  63. for(j=0; j<n; j++){
  64. pca->mean[j] /= pca->count;
  65. eigenvector[j + j*n] = 1.0;
  66. for(i=0; i<=j; i++){
  67. pca->covariance[j + i*n] /= pca->count;
  68. pca->covariance[j + i*n] -= pca->mean[i] * pca->mean[j];
  69. pca->covariance[i + j*n] = pca->covariance[j + i*n];
  70. }
  71. eigenvalue[j]= pca->covariance[j + j*n];
  72. z[j]= 0;
  73. }
  74. for(pass=0; pass < 50; pass++){
  75. double sum=0;
  76. for(i=0; i<n; i++)
  77. for(j=i+1; j<n; j++)
  78. sum += fabs(pca->covariance[j + i*n]);
  79. if(sum == 0){
  80. for(i=0; i<n; i++){
  81. double maxvalue= -1;
  82. for(j=i; j<n; j++){
  83. if(eigenvalue[j] > maxvalue){
  84. maxvalue= eigenvalue[j];
  85. k= j;
  86. }
  87. }
  88. eigenvalue[k]= eigenvalue[i];
  89. eigenvalue[i]= maxvalue;
  90. for(j=0; j<n; j++){
  91. double tmp= eigenvector[k + j*n];
  92. eigenvector[k + j*n]= eigenvector[i + j*n];
  93. eigenvector[i + j*n]= tmp;
  94. }
  95. }
  96. return pass;
  97. }
  98. for(i=0; i<n; i++){
  99. for(j=i+1; j<n; j++){
  100. double covar= pca->covariance[j + i*n];
  101. double t,c,s,tau,theta, h;
  102. if(pass < 3 && fabs(covar) < sum / (5*n*n)) //FIXME why pass < 3
  103. continue;
  104. if(fabs(covar) == 0.0) //FIXME shouldnt be needed
  105. continue;
  106. if(pass >=3 && fabs((eigenvalue[j]+z[j])/covar) > (1LL<<32) && fabs((eigenvalue[i]+z[i])/covar) > (1LL<<32)){
  107. pca->covariance[j + i*n]=0.0;
  108. continue;
  109. }
  110. h= (eigenvalue[j]+z[j]) - (eigenvalue[i]+z[i]);
  111. theta=0.5*h/covar;
  112. t=1.0/(fabs(theta)+sqrt(1.0+theta*theta));
  113. if(theta < 0.0) t = -t;
  114. c=1.0/sqrt(1+t*t);
  115. s=t*c;
  116. tau=s/(1.0+c);
  117. z[i] -= t*covar;
  118. z[j] += t*covar;
  119. #define ROTATE(a,i,j,k,l) {\
  120. double g=a[j + i*n];\
  121. double h=a[l + k*n];\
  122. a[j + i*n]=g-s*(h+g*tau);\
  123. a[l + k*n]=h+s*(g-h*tau); }
  124. for(k=0; k<n; k++) {
  125. if(k!=i && k!=j){
  126. ROTATE(pca->covariance,FFMIN(k,i),FFMAX(k,i),FFMIN(k,j),FFMAX(k,j))
  127. }
  128. ROTATE(eigenvector,k,i,k,j)
  129. }
  130. pca->covariance[j + i*n]=0.0;
  131. }
  132. }
  133. for (i=0; i<n; i++) {
  134. eigenvalue[i] += z[i];
  135. z[i]=0.0;
  136. }
  137. }
  138. return -1;
  139. }
  140. #ifdef TEST
  141. #undef printf
  142. #undef random
  143. #include <stdio.h>
  144. #include <stdlib.h>
  145. int main(){
  146. PCA *pca;
  147. int i, j, k;
  148. #define LEN 8
  149. double eigenvector[LEN*LEN];
  150. double eigenvalue[LEN];
  151. pca= ff_pca_init(LEN);
  152. for(i=0; i<9000000; i++){
  153. double v[2*LEN+100];
  154. double sum=0;
  155. int pos= random()%LEN;
  156. int v2= (random()%101) - 50;
  157. v[0]= (random()%101) - 50;
  158. for(j=1; j<8; j++){
  159. if(j<=pos) v[j]= v[0];
  160. else v[j]= v2;
  161. sum += v[j];
  162. }
  163. /* for(j=0; j<LEN; j++){
  164. v[j] -= v[pos];
  165. }*/
  166. // sum += random()%10;
  167. /* for(j=0; j<LEN; j++){
  168. v[j] -= sum/LEN;
  169. }*/
  170. // lbt1(v+100,v+100,LEN);
  171. ff_pca_add(pca, v);
  172. }
  173. ff_pca(pca, eigenvector, eigenvalue);
  174. for(i=0; i<LEN; i++){
  175. pca->count= 1;
  176. pca->mean[i]= 0;
  177. // (0.5^|x|)^2 = 0.5^2|x| = 0.25^|x|
  178. // pca.covariance[i + i*LEN]= pow(0.5, fabs
  179. for(j=i; j<LEN; j++){
  180. printf("%f ", pca->covariance[i + j*LEN]);
  181. }
  182. printf("\n");
  183. }
  184. #if 1
  185. for(i=0; i<LEN; i++){
  186. double v[LEN];
  187. double error=0;
  188. memset(v, 0, sizeof(v));
  189. for(j=0; j<LEN; j++){
  190. for(k=0; k<LEN; k++){
  191. v[j] += pca->covariance[FFMIN(k,j) + FFMAX(k,j)*LEN] * eigenvector[i + k*LEN];
  192. }
  193. v[j] /= eigenvalue[i];
  194. error += fabs(v[j] - eigenvector[i + j*LEN]);
  195. }
  196. printf("%f ", error);
  197. }
  198. printf("\n");
  199. #endif
  200. for(i=0; i<LEN; i++){
  201. for(j=0; j<LEN; j++){
  202. printf("%9.6f ", eigenvector[i + j*LEN]);
  203. }
  204. printf(" %9.1f %f\n", eigenvalue[i], eigenvalue[i]/eigenvalue[0]);
  205. }
  206. return 0;
  207. }
  208. #endif