You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

260 lines
6.7KB

  1. /*
  2. * Copyright (C) 2004 the ffmpeg project
  3. *
  4. * This library is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2 of the License, or (at your option) any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with this library; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. */
  18. /**
  19. * @file vp3dsp.c
  20. * Standard C DSP-oriented functions cribbed from the original VP3
  21. * source code.
  22. */
  23. #include "common.h"
  24. #include "avcodec.h"
  25. #include "dsputil.h"
  26. #include "vp3data.h"
  27. #define IdctAdjustBeforeShift 8
  28. #define xC1S7 64277
  29. #define xC2S6 60547
  30. #define xC3S5 54491
  31. #define xC4S4 46341
  32. #define xC5S3 36410
  33. #define xC6S2 25080
  34. #define xC7S1 12785
  35. void vp3_dsp_init_c(void)
  36. {
  37. /* nop */
  38. }
  39. void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix,
  40. int coeff_count, int16_t *output_data)
  41. {
  42. int32_t dequantized_data[64];
  43. int32_t *ip = dequantized_data;
  44. int16_t *op = output_data;
  45. int32_t A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
  46. int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
  47. int32_t t1, t2;
  48. int i, j;
  49. /* de-zigzag and dequantize */
  50. for (i = 0; i < coeff_count; i++) {
  51. j = dezigzag_index[i];
  52. dequantized_data[j] = dequant_matrix[i] * input_data[i];
  53. }
  54. /* Inverse DCT on the rows now */
  55. for (i = 0; i < 8; i++) {
  56. /* Check for non-zero values */
  57. if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
  58. t1 = (int32_t)(xC1S7 * ip[1]);
  59. t2 = (int32_t)(xC7S1 * ip[7]);
  60. t1 >>= 16;
  61. t2 >>= 16;
  62. A_ = t1 + t2;
  63. t1 = (int32_t)(xC7S1 * ip[1]);
  64. t2 = (int32_t)(xC1S7 * ip[7]);
  65. t1 >>= 16;
  66. t2 >>= 16;
  67. B_ = t1 - t2;
  68. t1 = (int32_t)(xC3S5 * ip[3]);
  69. t2 = (int32_t)(xC5S3 * ip[5]);
  70. t1 >>= 16;
  71. t2 >>= 16;
  72. C_ = t1 + t2;
  73. t1 = (int32_t)(xC3S5 * ip[5]);
  74. t2 = (int32_t)(xC5S3 * ip[3]);
  75. t1 >>= 16;
  76. t2 >>= 16;
  77. D_ = t1 - t2;
  78. t1 = (int32_t)(xC4S4 * (A_ - C_));
  79. t1 >>= 16;
  80. _Ad = t1;
  81. t1 = (int32_t)(xC4S4 * (B_ - D_));
  82. t1 >>= 16;
  83. _Bd = t1;
  84. _Cd = A_ + C_;
  85. _Dd = B_ + D_;
  86. t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
  87. t1 >>= 16;
  88. E_ = t1;
  89. t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
  90. t1 >>= 16;
  91. F_ = t1;
  92. t1 = (int32_t)(xC2S6 * ip[2]);
  93. t2 = (int32_t)(xC6S2 * ip[6]);
  94. t1 >>= 16;
  95. t2 >>= 16;
  96. G_ = t1 + t2;
  97. t1 = (int32_t)(xC6S2 * ip[2]);
  98. t2 = (int32_t)(xC2S6 * ip[6]);
  99. t1 >>= 16;
  100. t2 >>= 16;
  101. H_ = t1 - t2;
  102. _Ed = E_ - G_;
  103. _Gd = E_ + G_;
  104. _Add = F_ + _Ad;
  105. _Bdd = _Bd - H_;
  106. _Fd = F_ - _Ad;
  107. _Hd = _Bd + H_;
  108. /* Final sequence of operations over-write original inputs. */
  109. ip[0] = (int16_t)((_Gd + _Cd ) >> 0);
  110. ip[7] = (int16_t)((_Gd - _Cd ) >> 0);
  111. ip[1] = (int16_t)((_Add + _Hd ) >> 0);
  112. ip[2] = (int16_t)((_Add - _Hd ) >> 0);
  113. ip[3] = (int16_t)((_Ed + _Dd ) >> 0);
  114. ip[4] = (int16_t)((_Ed - _Dd ) >> 0);
  115. ip[5] = (int16_t)((_Fd + _Bdd ) >> 0);
  116. ip[6] = (int16_t)((_Fd - _Bdd ) >> 0);
  117. }
  118. ip += 8; /* next row */
  119. }
  120. ip = dequantized_data;
  121. for ( i = 0; i < 8; i++) {
  122. /* Check for non-zero values (bitwise or faster than ||) */
  123. if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
  124. ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
  125. t1 = (int32_t)(xC1S7 * ip[1*8]);
  126. t2 = (int32_t)(xC7S1 * ip[7*8]);
  127. t1 >>= 16;
  128. t2 >>= 16;
  129. A_ = t1 + t2;
  130. t1 = (int32_t)(xC7S1 * ip[1*8]);
  131. t2 = (int32_t)(xC1S7 * ip[7*8]);
  132. t1 >>= 16;
  133. t2 >>= 16;
  134. B_ = t1 - t2;
  135. t1 = (int32_t)(xC3S5 * ip[3*8]);
  136. t2 = (int32_t)(xC5S3 * ip[5*8]);
  137. t1 >>= 16;
  138. t2 >>= 16;
  139. C_ = t1 + t2;
  140. t1 = (int32_t)(xC3S5 * ip[5*8]);
  141. t2 = (int32_t)(xC5S3 * ip[3*8]);
  142. t1 >>= 16;
  143. t2 >>= 16;
  144. D_ = t1 - t2;
  145. t1 = (int32_t)(xC4S4 * (A_ - C_));
  146. t1 >>= 16;
  147. _Ad = t1;
  148. t1 = (int32_t)(xC4S4 * (B_ - D_));
  149. t1 >>= 16;
  150. _Bd = t1;
  151. _Cd = A_ + C_;
  152. _Dd = B_ + D_;
  153. t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
  154. t1 >>= 16;
  155. E_ = t1;
  156. t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
  157. t1 >>= 16;
  158. F_ = t1;
  159. t1 = (int32_t)(xC2S6 * ip[2*8]);
  160. t2 = (int32_t)(xC6S2 * ip[6*8]);
  161. t1 >>= 16;
  162. t2 >>= 16;
  163. G_ = t1 + t2;
  164. t1 = (int32_t)(xC6S2 * ip[2*8]);
  165. t2 = (int32_t)(xC2S6 * ip[6*8]);
  166. t1 >>= 16;
  167. t2 >>= 16;
  168. H_ = t1 - t2;
  169. _Ed = E_ - G_;
  170. _Gd = E_ + G_;
  171. _Add = F_ + _Ad;
  172. _Bdd = _Bd - H_;
  173. _Fd = F_ - _Ad;
  174. _Hd = _Bd + H_;
  175. _Gd += IdctAdjustBeforeShift;
  176. _Add += IdctAdjustBeforeShift;
  177. _Ed += IdctAdjustBeforeShift;
  178. _Fd += IdctAdjustBeforeShift;
  179. /* Final sequence of operations over-write original inputs. */
  180. op[0*8] = (int16_t)((_Gd + _Cd ) >> 4);
  181. op[7*8] = (int16_t)((_Gd - _Cd ) >> 4);
  182. op[1*8] = (int16_t)((_Add + _Hd ) >> 4);
  183. op[2*8] = (int16_t)((_Add - _Hd ) >> 4);
  184. op[3*8] = (int16_t)((_Ed + _Dd ) >> 4);
  185. op[4*8] = (int16_t)((_Ed - _Dd ) >> 4);
  186. op[5*8] = (int16_t)((_Fd + _Bdd ) >> 4);
  187. op[6*8] = (int16_t)((_Fd - _Bdd ) >> 4);
  188. } else {
  189. op[0*8] = 0;
  190. op[7*8] = 0;
  191. op[1*8] = 0;
  192. op[2*8] = 0;
  193. op[3*8] = 0;
  194. op[4*8] = 0;
  195. op[5*8] = 0;
  196. op[6*8] = 0;
  197. }
  198. ip++; /* next column */
  199. op++;
  200. }
  201. }