You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

232 lines
7.1KB

  1. /*
  2. Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  14. */
  15. /*
  16. based upon some outcommented c code from mpeg2dec (idct_mmx.c written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
  17. */
  18. #include <inttypes.h>
  19. #include "simple_idct.h"
  20. #if 0
  21. #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
  22. #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
  23. #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
  24. #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
  25. #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
  26. #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
  27. #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
  28. #define ROW_SHIFT 8
  29. #define COL_SHIFT 17
  30. #else
  31. #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  32. #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  33. #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  34. #define W4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  35. #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  36. #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  37. #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  38. #define ROW_SHIFT 11
  39. #define COL_SHIFT 20 // 6
  40. #endif
  41. #if 1
  42. static void inline idctRow (int16_t * row)
  43. {
  44. int a0, a1, a2, a3, b0, b1, b2, b3;
  45. const int C1 =W1;
  46. const int C2 =W2;
  47. const int C3 =W3;
  48. const int C4 =W4;
  49. const int C5 =W5;
  50. const int C6 =W6;
  51. const int C7 =W7;
  52. if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7])) {
  53. row[0] = row[1] = row[2] = row[3] = row[4] =
  54. row[5] = row[6] = row[7] = row[0]<<3;
  55. return;
  56. }
  57. a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
  58. a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
  59. a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
  60. a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
  61. b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
  62. b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
  63. b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
  64. b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
  65. row[0] = (a0 + b0) >> ROW_SHIFT;
  66. row[1] = (a1 + b1) >> ROW_SHIFT;
  67. row[2] = (a2 + b2) >> ROW_SHIFT;
  68. row[3] = (a3 + b3) >> ROW_SHIFT;
  69. row[4] = (a3 - b3) >> ROW_SHIFT;
  70. row[5] = (a2 - b2) >> ROW_SHIFT;
  71. row[6] = (a1 - b1) >> ROW_SHIFT;
  72. row[7] = (a0 - b0) >> ROW_SHIFT;
  73. }
  74. static void inline idctCol (int16_t * col)
  75. {
  76. int a0, a1, a2, a3, b0, b1, b2, b3;
  77. const int C1 =W1;
  78. const int C2 =W2;
  79. const int C3 =W3;
  80. const int C4 =W4;
  81. const int C5 =W5;
  82. const int C6 =W6;
  83. const int C7 =W7;
  84. /*
  85. if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
  86. col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
  87. col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
  88. return;
  89. }*/
  90. col[0] += (1<<(COL_SHIFT-1))/W4;
  91. a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6];
  92. a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6];
  93. a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6];
  94. a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6];
  95. b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
  96. b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
  97. b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
  98. b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
  99. col[8*0] = (a0 + b0) >> COL_SHIFT;
  100. col[8*1] = (a1 + b1) >> COL_SHIFT;
  101. col[8*2] = (a2 + b2) >> COL_SHIFT;
  102. col[8*3] = (a3 + b3) >> COL_SHIFT;
  103. col[8*4] = (a3 - b3) >> COL_SHIFT;
  104. col[8*5] = (a2 - b2) >> COL_SHIFT;
  105. col[8*6] = (a1 - b1) >> COL_SHIFT;
  106. col[8*7] = (a0 - b0) >> COL_SHIFT;
  107. }
  108. void simple_idct (short *block)
  109. {
  110. int i;
  111. for(i=0; i<8; i++)
  112. idctRow(block + 8*i);
  113. for(i=0; i<8; i++)
  114. idctCol(block + i);
  115. }
  116. #else
  117. #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  118. #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  119. #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  120. #define W4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  121. #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  122. #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  123. #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  124. #define COL_SHIFT 31 // 6
  125. static void inline idctRow (int32_t *out, int16_t * row)
  126. {
  127. int a0, a1, a2, a3, b0, b1, b2, b3;
  128. const int C1 =W1;
  129. const int C2 =W2;
  130. const int C3 =W3;
  131. const int C4 =W4;
  132. const int C5 =W5;
  133. const int C6 =W6;
  134. const int C7 =W7;
  135. /*
  136. if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7])) {
  137. row[0] = row[1] = row[2] = row[3] = row[4] =
  138. row[5] = row[6] = row[7] = row[0]<<14;
  139. return;
  140. }
  141. */
  142. a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6];
  143. a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6];
  144. a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6];
  145. a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6];
  146. b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
  147. b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
  148. b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
  149. b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
  150. out[0] = (a0 + b0);
  151. out[1] = (a1 + b1);
  152. out[2] = (a2 + b2);
  153. out[3] = (a3 + b3);
  154. out[4] = (a3 - b3);
  155. out[5] = (a2 - b2);
  156. out[6] = (a1 - b1);
  157. out[7] = (a0 - b0);
  158. }
  159. static void inline idctCol (int32_t *in, int16_t * col)
  160. {
  161. int64_t a0, a1, a2, a3, b0, b1, b2, b3;
  162. const int64_t C1 =W1;
  163. const int64_t C2 =W2;
  164. const int64_t C3 =W3;
  165. const int64_t C4 =W4;
  166. const int64_t C5 =W5;
  167. const int64_t C6 =W6;
  168. const int64_t C7 =W7;
  169. /*
  170. if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
  171. col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
  172. col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
  173. return;
  174. }*/
  175. in[0] += (1<<(COL_SHIFT-1))/W4;
  176. a0 = C4*in[8*0] + C2*in[8*2] + C4*in[8*4] + C6*in[8*6];
  177. a1 = C4*in[8*0] + C6*in[8*2] - C4*in[8*4] - C2*in[8*6];
  178. a2 = C4*in[8*0] - C6*in[8*2] - C4*in[8*4] + C2*in[8*6];
  179. a3 = C4*in[8*0] - C2*in[8*2] + C4*in[8*4] - C6*in[8*6];
  180. b0 = C1*in[8*1] + C3*in[8*3] + C5*in[8*5] + C7*in[8*7];
  181. b1 = C3*in[8*1] - C7*in[8*3] - C1*in[8*5] - C5*in[8*7];
  182. b2 = C5*in[8*1] - C1*in[8*3] + C7*in[8*5] + C3*in[8*7];
  183. b3 = C7*in[8*1] - C5*in[8*3] + C3*in[8*5] - C1*in[8*7];
  184. col[8*0] = (a0 + b0) >> COL_SHIFT;
  185. col[8*1] = (a1 + b1) >> COL_SHIFT;
  186. col[8*2] = (a2 + b2) >> COL_SHIFT;
  187. col[8*3] = (a3 + b3) >> COL_SHIFT;
  188. col[8*4] = (a3 - b3) >> COL_SHIFT;
  189. col[8*5] = (a2 - b2) >> COL_SHIFT;
  190. col[8*6] = (a1 - b1) >> COL_SHIFT;
  191. col[8*7] = (a0 - b0) >> COL_SHIFT;
  192. }
  193. void simple_idct (short *block)
  194. {
  195. int i;
  196. int32_t temp[64];
  197. for(i=0; i<8; i++)
  198. idctRow(temp+8*i, block + 8*i);
  199. for(i=0; i<8; i++)
  200. idctCol(temp+i, block + i);
  201. }
  202. #endif