You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

460 lines
13KB

  1. /*
  2. * VC-1 and WMV3 decoder - DSP functions
  3. * Copyright (c) 2006 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. /**
  23. * @file vc1dsp.c
  24. * VC-1 and WMV3 decoder
  25. *
  26. */
  27. #include "dsputil.h"
  28. /** Apply overlap transform to horizontal edge
  29. */
  30. static void vc1_v_overlap_c(uint8_t* src, int stride, int rnd)
  31. {
  32. int i;
  33. int a, b, c, d;
  34. int d1, d2;
  35. for(i = 0; i < 8; i++) {
  36. a = src[-2*stride];
  37. b = src[-stride];
  38. c = src[0];
  39. d = src[stride];
  40. d1 = (a - d + 3 + rnd) >> 3;
  41. d2 = (a - d + b - c + 4 - rnd) >> 3;
  42. src[-2*stride] = a - d1;
  43. src[-stride] = b - d2;
  44. src[0] = c + d2;
  45. src[stride] = d + d1;
  46. src++;
  47. }
  48. }
  49. /** Apply overlap transform to vertical edge
  50. */
  51. static void vc1_h_overlap_c(uint8_t* src, int stride, int rnd)
  52. {
  53. int i;
  54. int a, b, c, d;
  55. int d1, d2;
  56. for(i = 0; i < 8; i++) {
  57. a = src[-2];
  58. b = src[-1];
  59. c = src[0];
  60. d = src[1];
  61. d1 = (a - d + 3 + rnd) >> 3;
  62. d2 = (a - d + b - c + 4 - rnd) >> 3;
  63. src[-2] = a - d1;
  64. src[-1] = b - d2;
  65. src[0] = c + d2;
  66. src[1] = d + d1;
  67. src += stride;
  68. }
  69. }
  70. /** Do inverse transform on 8x8 block
  71. */
  72. static void vc1_inv_trans_8x8_c(DCTELEM block[64])
  73. {
  74. int i;
  75. register int t1,t2,t3,t4,t5,t6,t7,t8;
  76. DCTELEM *src, *dst;
  77. src = block;
  78. dst = block;
  79. for(i = 0; i < 8; i++){
  80. t1 = 12 * (src[0] + src[4]);
  81. t2 = 12 * (src[0] - src[4]);
  82. t3 = 16 * src[2] + 6 * src[6];
  83. t4 = 6 * src[2] - 16 * src[6];
  84. t5 = t1 + t3;
  85. t6 = t2 + t4;
  86. t7 = t2 - t4;
  87. t8 = t1 - t3;
  88. t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
  89. t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
  90. t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
  91. t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
  92. dst[0] = (t5 + t1 + 4) >> 3;
  93. dst[1] = (t6 + t2 + 4) >> 3;
  94. dst[2] = (t7 + t3 + 4) >> 3;
  95. dst[3] = (t8 + t4 + 4) >> 3;
  96. dst[4] = (t8 - t4 + 4) >> 3;
  97. dst[5] = (t7 - t3 + 4) >> 3;
  98. dst[6] = (t6 - t2 + 4) >> 3;
  99. dst[7] = (t5 - t1 + 4) >> 3;
  100. src += 8;
  101. dst += 8;
  102. }
  103. src = block;
  104. dst = block;
  105. for(i = 0; i < 8; i++){
  106. t1 = 12 * (src[ 0] + src[32]);
  107. t2 = 12 * (src[ 0] - src[32]);
  108. t3 = 16 * src[16] + 6 * src[48];
  109. t4 = 6 * src[16] - 16 * src[48];
  110. t5 = t1 + t3;
  111. t6 = t2 + t4;
  112. t7 = t2 - t4;
  113. t8 = t1 - t3;
  114. t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
  115. t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
  116. t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
  117. t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
  118. dst[ 0] = (t5 + t1 + 64) >> 7;
  119. dst[ 8] = (t6 + t2 + 64) >> 7;
  120. dst[16] = (t7 + t3 + 64) >> 7;
  121. dst[24] = (t8 + t4 + 64) >> 7;
  122. dst[32] = (t8 - t4 + 64 + 1) >> 7;
  123. dst[40] = (t7 - t3 + 64 + 1) >> 7;
  124. dst[48] = (t6 - t2 + 64 + 1) >> 7;
  125. dst[56] = (t5 - t1 + 64 + 1) >> 7;
  126. src++;
  127. dst++;
  128. }
  129. }
  130. /** Do inverse transform on 8x4 part of block
  131. */
  132. static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n)
  133. {
  134. int i;
  135. register int t1,t2,t3,t4,t5,t6,t7,t8;
  136. DCTELEM *src, *dst;
  137. int off;
  138. off = n * 32;
  139. src = block + off;
  140. dst = block + off;
  141. for(i = 0; i < 4; i++){
  142. t1 = 12 * (src[0] + src[4]);
  143. t2 = 12 * (src[0] - src[4]);
  144. t3 = 16 * src[2] + 6 * src[6];
  145. t4 = 6 * src[2] - 16 * src[6];
  146. t5 = t1 + t3;
  147. t6 = t2 + t4;
  148. t7 = t2 - t4;
  149. t8 = t1 - t3;
  150. t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
  151. t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
  152. t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
  153. t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
  154. dst[0] = (t5 + t1 + 4) >> 3;
  155. dst[1] = (t6 + t2 + 4) >> 3;
  156. dst[2] = (t7 + t3 + 4) >> 3;
  157. dst[3] = (t8 + t4 + 4) >> 3;
  158. dst[4] = (t8 - t4 + 4) >> 3;
  159. dst[5] = (t7 - t3 + 4) >> 3;
  160. dst[6] = (t6 - t2 + 4) >> 3;
  161. dst[7] = (t5 - t1 + 4) >> 3;
  162. src += 8;
  163. dst += 8;
  164. }
  165. src = block + off;
  166. dst = block + off;
  167. for(i = 0; i < 8; i++){
  168. t1 = 17 * (src[ 0] + src[16]);
  169. t2 = 17 * (src[ 0] - src[16]);
  170. t3 = 22 * src[ 8];
  171. t4 = 22 * src[24];
  172. t5 = 10 * src[ 8];
  173. t6 = 10 * src[24];
  174. dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
  175. dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
  176. dst[16] = (t2 + t4 - t5 + 64) >> 7;
  177. dst[24] = (t1 - t3 - t6 + 64) >> 7;
  178. src ++;
  179. dst ++;
  180. }
  181. }
  182. /** Do inverse transform on 4x8 parts of block
  183. */
  184. static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n)
  185. {
  186. int i;
  187. register int t1,t2,t3,t4,t5,t6,t7,t8;
  188. DCTELEM *src, *dst;
  189. int off;
  190. off = n * 4;
  191. src = block + off;
  192. dst = block + off;
  193. for(i = 0; i < 8; i++){
  194. t1 = 17 * (src[0] + src[2]);
  195. t2 = 17 * (src[0] - src[2]);
  196. t3 = 22 * src[1];
  197. t4 = 22 * src[3];
  198. t5 = 10 * src[1];
  199. t6 = 10 * src[3];
  200. dst[0] = (t1 + t3 + t6 + 4) >> 3;
  201. dst[1] = (t2 - t4 + t5 + 4) >> 3;
  202. dst[2] = (t2 + t4 - t5 + 4) >> 3;
  203. dst[3] = (t1 - t3 - t6 + 4) >> 3;
  204. src += 8;
  205. dst += 8;
  206. }
  207. src = block + off;
  208. dst = block + off;
  209. for(i = 0; i < 4; i++){
  210. t1 = 12 * (src[ 0] + src[32]);
  211. t2 = 12 * (src[ 0] - src[32]);
  212. t3 = 16 * src[16] + 6 * src[48];
  213. t4 = 6 * src[16] - 16 * src[48];
  214. t5 = t1 + t3;
  215. t6 = t2 + t4;
  216. t7 = t2 - t4;
  217. t8 = t1 - t3;
  218. t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
  219. t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
  220. t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
  221. t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
  222. dst[ 0] = (t5 + t1 + 64) >> 7;
  223. dst[ 8] = (t6 + t2 + 64) >> 7;
  224. dst[16] = (t7 + t3 + 64) >> 7;
  225. dst[24] = (t8 + t4 + 64) >> 7;
  226. dst[32] = (t8 - t4 + 64 + 1) >> 7;
  227. dst[40] = (t7 - t3 + 64 + 1) >> 7;
  228. dst[48] = (t6 - t2 + 64 + 1) >> 7;
  229. dst[56] = (t5 - t1 + 64 + 1) >> 7;
  230. src++;
  231. dst++;
  232. }
  233. }
  234. /** Do inverse transform on 4x4 part of block
  235. */
  236. static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
  237. {
  238. int i;
  239. register int t1,t2,t3,t4,t5,t6;
  240. DCTELEM *src, *dst;
  241. int off;
  242. off = (n&1) * 4 + (n&2) * 16;
  243. src = block + off;
  244. dst = block + off;
  245. for(i = 0; i < 4; i++){
  246. t1 = 17 * (src[0] + src[2]);
  247. t2 = 17 * (src[0] - src[2]);
  248. t3 = 22 * src[1];
  249. t4 = 22 * src[3];
  250. t5 = 10 * src[1];
  251. t6 = 10 * src[3];
  252. dst[0] = (t1 + t3 + t6 + 4) >> 3;
  253. dst[1] = (t2 - t4 + t5 + 4) >> 3;
  254. dst[2] = (t2 + t4 - t5 + 4) >> 3;
  255. dst[3] = (t1 - t3 - t6 + 4) >> 3;
  256. src += 8;
  257. dst += 8;
  258. }
  259. src = block + off;
  260. dst = block + off;
  261. for(i = 0; i < 4; i++){
  262. t1 = 17 * (src[ 0] + src[16]);
  263. t2 = 17 * (src[ 0] - src[16]);
  264. t3 = 22 * src[ 8];
  265. t4 = 22 * src[24];
  266. t5 = 10 * src[ 8];
  267. t6 = 10 * src[24];
  268. dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
  269. dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
  270. dst[16] = (t2 + t4 - t5 + 64) >> 7;
  271. dst[24] = (t1 - t3 - t6 + 64) >> 7;
  272. src ++;
  273. dst ++;
  274. }
  275. }
  276. /* motion compensation functions */
  277. /** Filter used to interpolate fractional pel values
  278. */
  279. static always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
  280. {
  281. switch(mode){
  282. case 0: //no shift
  283. return src[0];
  284. case 1: // 1/4 shift
  285. return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6;
  286. case 2: // 1/2 shift
  287. return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4;
  288. case 3: // 3/4 shift
  289. return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6;
  290. }
  291. return 0; //should not occur
  292. }
  293. /** Function used to do motion compensation with bicubic interpolation
  294. */
  295. static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int mode, int rnd)
  296. {
  297. int i, j;
  298. uint8_t tmp[8*11], *tptr;
  299. int m, r;
  300. m = (mode & 3);
  301. r = rnd;
  302. src -= stride;
  303. tptr = tmp;
  304. for(j = 0; j < 11; j++) {
  305. for(i = 0; i < 8; i++)
  306. tptr[i] = clip_uint8(vc1_mspel_filter(src + i, 1, m, r));
  307. src += stride;
  308. tptr += 8;
  309. }
  310. r = 1 - rnd;
  311. m = (mode >> 2) & 3;
  312. tptr = tmp + 8;
  313. for(j = 0; j < 8; j++) {
  314. for(i = 0; i < 8; i++)
  315. dst[i] = clip_uint8(vc1_mspel_filter(tptr + i, 8, m, r));
  316. dst += stride;
  317. tptr += 8;
  318. }
  319. }
  320. /* pixel functions - really are entry points to vc1_mspel_mc */
  321. /* this one is defined in dsputil.c */
  322. void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
  323. static void ff_put_vc1_mspel_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  324. vc1_mspel_mc(dst, src, stride, 0x1, rnd);
  325. }
  326. static void ff_put_vc1_mspel_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  327. vc1_mspel_mc(dst, src, stride, 0x2, rnd);
  328. }
  329. static void ff_put_vc1_mspel_mc30_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  330. vc1_mspel_mc(dst, src, stride, 0x3, rnd);
  331. }
  332. static void ff_put_vc1_mspel_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  333. vc1_mspel_mc(dst, src, stride, 0x4, rnd);
  334. }
  335. static void ff_put_vc1_mspel_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  336. vc1_mspel_mc(dst, src, stride, 0x5, rnd);
  337. }
  338. static void ff_put_vc1_mspel_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  339. vc1_mspel_mc(dst, src, stride, 0x6, rnd);
  340. }
  341. static void ff_put_vc1_mspel_mc31_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  342. vc1_mspel_mc(dst, src, stride, 0x7, rnd);
  343. }
  344. static void ff_put_vc1_mspel_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  345. vc1_mspel_mc(dst, src, stride, 0x8, rnd);
  346. }
  347. static void ff_put_vc1_mspel_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  348. vc1_mspel_mc(dst, src, stride, 0x9, rnd);
  349. }
  350. static void ff_put_vc1_mspel_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  351. vc1_mspel_mc(dst, src, stride, 0xA, rnd);
  352. }
  353. static void ff_put_vc1_mspel_mc32_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  354. vc1_mspel_mc(dst, src, stride, 0xB, rnd);
  355. }
  356. static void ff_put_vc1_mspel_mc03_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  357. vc1_mspel_mc(dst, src, stride, 0xC, rnd);
  358. }
  359. static void ff_put_vc1_mspel_mc13_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  360. vc1_mspel_mc(dst, src, stride, 0xD, rnd);
  361. }
  362. static void ff_put_vc1_mspel_mc23_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  363. vc1_mspel_mc(dst, src, stride, 0xE, rnd);
  364. }
  365. static void ff_put_vc1_mspel_mc33_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  366. vc1_mspel_mc(dst, src, stride, 0xF, rnd);
  367. }
  368. void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
  369. dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
  370. dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
  371. dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
  372. dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
  373. dsp->vc1_h_overlap = vc1_h_overlap_c;
  374. dsp->vc1_v_overlap = vc1_v_overlap_c;
  375. dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c;
  376. dsp->put_vc1_mspel_pixels_tab[ 1] = ff_put_vc1_mspel_mc10_c;
  377. dsp->put_vc1_mspel_pixels_tab[ 2] = ff_put_vc1_mspel_mc20_c;
  378. dsp->put_vc1_mspel_pixels_tab[ 3] = ff_put_vc1_mspel_mc30_c;
  379. dsp->put_vc1_mspel_pixels_tab[ 4] = ff_put_vc1_mspel_mc01_c;
  380. dsp->put_vc1_mspel_pixels_tab[ 5] = ff_put_vc1_mspel_mc11_c;
  381. dsp->put_vc1_mspel_pixels_tab[ 6] = ff_put_vc1_mspel_mc21_c;
  382. dsp->put_vc1_mspel_pixels_tab[ 7] = ff_put_vc1_mspel_mc31_c;
  383. dsp->put_vc1_mspel_pixels_tab[ 8] = ff_put_vc1_mspel_mc02_c;
  384. dsp->put_vc1_mspel_pixels_tab[ 9] = ff_put_vc1_mspel_mc12_c;
  385. dsp->put_vc1_mspel_pixels_tab[10] = ff_put_vc1_mspel_mc22_c;
  386. dsp->put_vc1_mspel_pixels_tab[11] = ff_put_vc1_mspel_mc32_c;
  387. dsp->put_vc1_mspel_pixels_tab[12] = ff_put_vc1_mspel_mc03_c;
  388. dsp->put_vc1_mspel_pixels_tab[13] = ff_put_vc1_mspel_mc13_c;
  389. dsp->put_vc1_mspel_pixels_tab[14] = ff_put_vc1_mspel_mc23_c;
  390. dsp->put_vc1_mspel_pixels_tab[15] = ff_put_vc1_mspel_mc33_c;
  391. }