You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

312 lines
8.8KB

  1. /*
  2. * Copyright (C) 2004 the ffmpeg project
  3. *
  4. * This library is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2 of the License, or (at your option) any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with this library; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. /**
  19. * @file vp3dsp.c
  20. * Standard C DSP-oriented functions cribbed from the original VP3
  21. * source code.
  22. */
  23. #include "common.h"
  24. #include "avcodec.h"
  25. #include "dsputil.h"
  26. #define IdctAdjustBeforeShift 8
  27. #define xC1S7 64277
  28. #define xC2S6 60547
  29. #define xC3S5 54491
  30. #define xC4S4 46341
  31. #define xC5S3 36410
  32. #define xC6S2 25080
  33. #define xC7S1 12785
  34. static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
  35. {
  36. int16_t *ip = input;
  37. uint8_t *cm = cropTbl + MAX_NEG_CROP;
  38. int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
  39. int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
  40. int t1, t2;
  41. int i;
  42. /* Inverse DCT on the rows now */
  43. for (i = 0; i < 8; i++) {
  44. /* Check for non-zero values */
  45. if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
  46. t1 = (int32_t)(xC1S7 * ip[1]);
  47. t2 = (int32_t)(xC7S1 * ip[7]);
  48. t1 >>= 16;
  49. t2 >>= 16;
  50. A_ = t1 + t2;
  51. t1 = (int32_t)(xC7S1 * ip[1]);
  52. t2 = (int32_t)(xC1S7 * ip[7]);
  53. t1 >>= 16;
  54. t2 >>= 16;
  55. B_ = t1 - t2;
  56. t1 = (int32_t)(xC3S5 * ip[3]);
  57. t2 = (int32_t)(xC5S3 * ip[5]);
  58. t1 >>= 16;
  59. t2 >>= 16;
  60. C_ = t1 + t2;
  61. t1 = (int32_t)(xC3S5 * ip[5]);
  62. t2 = (int32_t)(xC5S3 * ip[3]);
  63. t1 >>= 16;
  64. t2 >>= 16;
  65. D_ = t1 - t2;
  66. t1 = (int32_t)(xC4S4 * (A_ - C_));
  67. t1 >>= 16;
  68. _Ad = t1;
  69. t1 = (int32_t)(xC4S4 * (B_ - D_));
  70. t1 >>= 16;
  71. _Bd = t1;
  72. _Cd = A_ + C_;
  73. _Dd = B_ + D_;
  74. t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
  75. t1 >>= 16;
  76. E_ = t1;
  77. t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
  78. t1 >>= 16;
  79. F_ = t1;
  80. t1 = (int32_t)(xC2S6 * ip[2]);
  81. t2 = (int32_t)(xC6S2 * ip[6]);
  82. t1 >>= 16;
  83. t2 >>= 16;
  84. G_ = t1 + t2;
  85. t1 = (int32_t)(xC6S2 * ip[2]);
  86. t2 = (int32_t)(xC2S6 * ip[6]);
  87. t1 >>= 16;
  88. t2 >>= 16;
  89. H_ = t1 - t2;
  90. _Ed = E_ - G_;
  91. _Gd = E_ + G_;
  92. _Add = F_ + _Ad;
  93. _Bdd = _Bd - H_;
  94. _Fd = F_ - _Ad;
  95. _Hd = _Bd + H_;
  96. /* Final sequence of operations over-write original inputs. */
  97. ip[0] = _Gd + _Cd ;
  98. ip[7] = _Gd - _Cd ;
  99. ip[1] = _Add + _Hd;
  100. ip[2] = _Add - _Hd;
  101. ip[3] = _Ed + _Dd ;
  102. ip[4] = _Ed - _Dd ;
  103. ip[5] = _Fd + _Bdd;
  104. ip[6] = _Fd - _Bdd;
  105. }
  106. ip += 8; /* next row */
  107. }
  108. ip = input;
  109. for ( i = 0; i < 8; i++) {
  110. /* Check for non-zero values (bitwise or faster than ||) */
  111. if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
  112. ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
  113. t1 = (int32_t)(xC1S7 * ip[1*8]);
  114. t2 = (int32_t)(xC7S1 * ip[7*8]);
  115. t1 >>= 16;
  116. t2 >>= 16;
  117. A_ = t1 + t2;
  118. t1 = (int32_t)(xC7S1 * ip[1*8]);
  119. t2 = (int32_t)(xC1S7 * ip[7*8]);
  120. t1 >>= 16;
  121. t2 >>= 16;
  122. B_ = t1 - t2;
  123. t1 = (int32_t)(xC3S5 * ip[3*8]);
  124. t2 = (int32_t)(xC5S3 * ip[5*8]);
  125. t1 >>= 16;
  126. t2 >>= 16;
  127. C_ = t1 + t2;
  128. t1 = (int32_t)(xC3S5 * ip[5*8]);
  129. t2 = (int32_t)(xC5S3 * ip[3*8]);
  130. t1 >>= 16;
  131. t2 >>= 16;
  132. D_ = t1 - t2;
  133. t1 = (int32_t)(xC4S4 * (A_ - C_));
  134. t1 >>= 16;
  135. _Ad = t1;
  136. t1 = (int32_t)(xC4S4 * (B_ - D_));
  137. t1 >>= 16;
  138. _Bd = t1;
  139. _Cd = A_ + C_;
  140. _Dd = B_ + D_;
  141. t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
  142. t1 >>= 16;
  143. E_ = t1;
  144. t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
  145. t1 >>= 16;
  146. F_ = t1;
  147. t1 = (int32_t)(xC2S6 * ip[2*8]);
  148. t2 = (int32_t)(xC6S2 * ip[6*8]);
  149. t1 >>= 16;
  150. t2 >>= 16;
  151. G_ = t1 + t2;
  152. t1 = (int32_t)(xC6S2 * ip[2*8]);
  153. t2 = (int32_t)(xC2S6 * ip[6*8]);
  154. t1 >>= 16;
  155. t2 >>= 16;
  156. H_ = t1 - t2;
  157. _Ed = E_ - G_;
  158. _Gd = E_ + G_;
  159. _Add = F_ + _Ad;
  160. _Bdd = _Bd - H_;
  161. _Fd = F_ - _Ad;
  162. _Hd = _Bd + H_;
  163. if(type==1){ //HACK
  164. _Gd += 16*128;
  165. _Add+= 16*128;
  166. _Ed += 16*128;
  167. _Fd += 16*128;
  168. }
  169. _Gd += IdctAdjustBeforeShift;
  170. _Add += IdctAdjustBeforeShift;
  171. _Ed += IdctAdjustBeforeShift;
  172. _Fd += IdctAdjustBeforeShift;
  173. /* Final sequence of operations over-write original inputs. */
  174. if(type==0){
  175. ip[0*8] = (_Gd + _Cd ) >> 4;
  176. ip[7*8] = (_Gd - _Cd ) >> 4;
  177. ip[1*8] = (_Add + _Hd ) >> 4;
  178. ip[2*8] = (_Add - _Hd ) >> 4;
  179. ip[3*8] = (_Ed + _Dd ) >> 4;
  180. ip[4*8] = (_Ed - _Dd ) >> 4;
  181. ip[5*8] = (_Fd + _Bdd ) >> 4;
  182. ip[6*8] = (_Fd - _Bdd ) >> 4;
  183. }else if(type==1){
  184. dst[0*stride] = cm[(_Gd + _Cd ) >> 4];
  185. dst[7*stride] = cm[(_Gd - _Cd ) >> 4];
  186. dst[1*stride] = cm[(_Add + _Hd ) >> 4];
  187. dst[2*stride] = cm[(_Add - _Hd ) >> 4];
  188. dst[3*stride] = cm[(_Ed + _Dd ) >> 4];
  189. dst[4*stride] = cm[(_Ed - _Dd ) >> 4];
  190. dst[5*stride] = cm[(_Fd + _Bdd ) >> 4];
  191. dst[6*stride] = cm[(_Fd - _Bdd ) >> 4];
  192. }else{
  193. dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd ) >> 4)];
  194. dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd ) >> 4)];
  195. dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)];
  196. dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)];
  197. dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd ) >> 4)];
  198. dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd ) >> 4)];
  199. dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)];
  200. dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)];
  201. }
  202. } else {
  203. if(type==0){
  204. ip[0*8] =
  205. ip[1*8] =
  206. ip[2*8] =
  207. ip[3*8] =
  208. ip[4*8] =
  209. ip[5*8] =
  210. ip[6*8] =
  211. ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
  212. }else if(type==1){
  213. dst[0*stride]=
  214. dst[1*stride]=
  215. dst[2*stride]=
  216. dst[3*stride]=
  217. dst[4*stride]=
  218. dst[5*stride]=
  219. dst[6*stride]=
  220. dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
  221. }else{
  222. if(ip[0*8]){
  223. int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
  224. dst[0*stride] = cm[dst[0*stride] + v];
  225. dst[1*stride] = cm[dst[1*stride] + v];
  226. dst[2*stride] = cm[dst[2*stride] + v];
  227. dst[3*stride] = cm[dst[3*stride] + v];
  228. dst[4*stride] = cm[dst[4*stride] + v];
  229. dst[5*stride] = cm[dst[5*stride] + v];
  230. dst[6*stride] = cm[dst[6*stride] + v];
  231. dst[7*stride] = cm[dst[7*stride] + v];
  232. }
  233. }
  234. }
  235. ip++; /* next column */
  236. dst++;
  237. }
  238. }
  239. void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
  240. idct(NULL, 0, block, 0);
  241. }
  242. void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
  243. idct(dest, line_size, block, 1);
  244. }
  245. void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
  246. idct(dest, line_size, block, 2);
  247. }