You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

454 lines
13KB

  1. /*
  2. * VC-1 and WMV3 decoder - DSP functions
  3. * Copyright (c) 2006 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. /**
  23. * @file vc1dsp.c
  24. * VC-1 and WMV3 decoder
  25. *
  26. */
  27. #include "dsputil.h"
  28. /** Apply overlap transform to vertical edge
  29. */
  30. static void vc1_v_overlap_c(uint8_t* src, int stride, int rnd)
  31. {
  32. int i;
  33. int a, b, c, d;
  34. for(i = 0; i < 8; i++) {
  35. a = src[-2*stride];
  36. b = src[-stride];
  37. c = src[0];
  38. d = src[stride];
  39. src[-2*stride] = clip_uint8((7*a + d + 4 - rnd) >> 3);
  40. src[-stride] = clip_uint8((-a + 7*b + c + d + 3 + rnd) >> 3);
  41. src[0] = clip_uint8((a + b + 7*c - d + 4 - rnd) >> 3);
  42. src[stride] = clip_uint8((a + 7*d + 3 + rnd) >> 3);
  43. src++;
  44. }
  45. }
  46. /** Apply overlap transform to horizontal edge
  47. */
  48. static void vc1_h_overlap_c(uint8_t* src, int stride, int rnd)
  49. {
  50. int i;
  51. int a, b, c, d;
  52. for(i = 0; i < 8; i++) {
  53. a = src[-2];
  54. b = src[-1];
  55. c = src[0];
  56. d = src[1];
  57. src[-2] = clip_uint8((7*a + d + 4 - rnd) >> 3);
  58. src[-1] = clip_uint8((-a + 7*b + c + d + 3 + rnd) >> 3);
  59. src[0] = clip_uint8((a + b + 7*c - d + 4 - rnd) >> 3);
  60. src[1] = clip_uint8((a + 7*d + 3 + rnd) >> 3);
  61. src += stride;
  62. }
  63. }
  64. /** Do inverse transform on 8x8 block
  65. */
  66. static void vc1_inv_trans_8x8_c(DCTELEM block[64])
  67. {
  68. int i;
  69. register int t1,t2,t3,t4,t5,t6,t7,t8;
  70. DCTELEM *src, *dst;
  71. src = block;
  72. dst = block;
  73. for(i = 0; i < 8; i++){
  74. t1 = 12 * (src[0] + src[4]);
  75. t2 = 12 * (src[0] - src[4]);
  76. t3 = 16 * src[2] + 6 * src[6];
  77. t4 = 6 * src[2] - 16 * src[6];
  78. t5 = t1 + t3;
  79. t6 = t2 + t4;
  80. t7 = t2 - t4;
  81. t8 = t1 - t3;
  82. t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
  83. t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
  84. t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
  85. t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
  86. dst[0] = (t5 + t1 + 4) >> 3;
  87. dst[1] = (t6 + t2 + 4) >> 3;
  88. dst[2] = (t7 + t3 + 4) >> 3;
  89. dst[3] = (t8 + t4 + 4) >> 3;
  90. dst[4] = (t8 - t4 + 4) >> 3;
  91. dst[5] = (t7 - t3 + 4) >> 3;
  92. dst[6] = (t6 - t2 + 4) >> 3;
  93. dst[7] = (t5 - t1 + 4) >> 3;
  94. src += 8;
  95. dst += 8;
  96. }
  97. src = block;
  98. dst = block;
  99. for(i = 0; i < 8; i++){
  100. t1 = 12 * (src[ 0] + src[32]);
  101. t2 = 12 * (src[ 0] - src[32]);
  102. t3 = 16 * src[16] + 6 * src[48];
  103. t4 = 6 * src[16] - 16 * src[48];
  104. t5 = t1 + t3;
  105. t6 = t2 + t4;
  106. t7 = t2 - t4;
  107. t8 = t1 - t3;
  108. t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
  109. t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
  110. t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
  111. t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
  112. dst[ 0] = (t5 + t1 + 64) >> 7;
  113. dst[ 8] = (t6 + t2 + 64) >> 7;
  114. dst[16] = (t7 + t3 + 64) >> 7;
  115. dst[24] = (t8 + t4 + 64) >> 7;
  116. dst[32] = (t8 - t4 + 64 + 1) >> 7;
  117. dst[40] = (t7 - t3 + 64 + 1) >> 7;
  118. dst[48] = (t6 - t2 + 64 + 1) >> 7;
  119. dst[56] = (t5 - t1 + 64 + 1) >> 7;
  120. src++;
  121. dst++;
  122. }
  123. }
  124. /** Do inverse transform on 8x4 part of block
  125. */
  126. static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n)
  127. {
  128. int i;
  129. register int t1,t2,t3,t4,t5,t6,t7,t8;
  130. DCTELEM *src, *dst;
  131. int off;
  132. off = n * 32;
  133. src = block + off;
  134. dst = block + off;
  135. for(i = 0; i < 4; i++){
  136. t1 = 12 * (src[0] + src[4]);
  137. t2 = 12 * (src[0] - src[4]);
  138. t3 = 16 * src[2] + 6 * src[6];
  139. t4 = 6 * src[2] - 16 * src[6];
  140. t5 = t1 + t3;
  141. t6 = t2 + t4;
  142. t7 = t2 - t4;
  143. t8 = t1 - t3;
  144. t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
  145. t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
  146. t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
  147. t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
  148. dst[0] = (t5 + t1 + 4) >> 3;
  149. dst[1] = (t6 + t2 + 4) >> 3;
  150. dst[2] = (t7 + t3 + 4) >> 3;
  151. dst[3] = (t8 + t4 + 4) >> 3;
  152. dst[4] = (t8 - t4 + 4) >> 3;
  153. dst[5] = (t7 - t3 + 4) >> 3;
  154. dst[6] = (t6 - t2 + 4) >> 3;
  155. dst[7] = (t5 - t1 + 4) >> 3;
  156. src += 8;
  157. dst += 8;
  158. }
  159. src = block + off;
  160. dst = block + off;
  161. for(i = 0; i < 8; i++){
  162. t1 = 17 * (src[ 0] + src[16]);
  163. t2 = 17 * (src[ 0] - src[16]);
  164. t3 = 22 * src[ 8];
  165. t4 = 22 * src[24];
  166. t5 = 10 * src[ 8];
  167. t6 = 10 * src[24];
  168. dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
  169. dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
  170. dst[16] = (t2 + t4 - t5 + 64) >> 7;
  171. dst[24] = (t1 - t3 - t6 + 64) >> 7;
  172. src ++;
  173. dst ++;
  174. }
  175. }
  176. /** Do inverse transform on 4x8 parts of block
  177. */
  178. static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n)
  179. {
  180. int i;
  181. register int t1,t2,t3,t4,t5,t6,t7,t8;
  182. DCTELEM *src, *dst;
  183. int off;
  184. off = n * 4;
  185. src = block + off;
  186. dst = block + off;
  187. for(i = 0; i < 8; i++){
  188. t1 = 17 * (src[0] + src[2]);
  189. t2 = 17 * (src[0] - src[2]);
  190. t3 = 22 * src[1];
  191. t4 = 22 * src[3];
  192. t5 = 10 * src[1];
  193. t6 = 10 * src[3];
  194. dst[0] = (t1 + t3 + t6 + 4) >> 3;
  195. dst[1] = (t2 - t4 + t5 + 4) >> 3;
  196. dst[2] = (t2 + t4 - t5 + 4) >> 3;
  197. dst[3] = (t1 - t3 - t6 + 4) >> 3;
  198. src += 8;
  199. dst += 8;
  200. }
  201. src = block + off;
  202. dst = block + off;
  203. for(i = 0; i < 4; i++){
  204. t1 = 12 * (src[ 0] + src[32]);
  205. t2 = 12 * (src[ 0] - src[32]);
  206. t3 = 16 * src[16] + 6 * src[48];
  207. t4 = 6 * src[16] - 16 * src[48];
  208. t5 = t1 + t3;
  209. t6 = t2 + t4;
  210. t7 = t2 - t4;
  211. t8 = t1 - t3;
  212. t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
  213. t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
  214. t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
  215. t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
  216. dst[ 0] = (t5 + t1 + 64) >> 7;
  217. dst[ 8] = (t6 + t2 + 64) >> 7;
  218. dst[16] = (t7 + t3 + 64) >> 7;
  219. dst[24] = (t8 + t4 + 64) >> 7;
  220. dst[32] = (t8 - t4 + 64 + 1) >> 7;
  221. dst[40] = (t7 - t3 + 64 + 1) >> 7;
  222. dst[48] = (t6 - t2 + 64 + 1) >> 7;
  223. dst[56] = (t5 - t1 + 64 + 1) >> 7;
  224. src++;
  225. dst++;
  226. }
  227. }
  228. /** Do inverse transform on 4x4 part of block
  229. */
  230. static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
  231. {
  232. int i;
  233. register int t1,t2,t3,t4,t5,t6;
  234. DCTELEM *src, *dst;
  235. int off;
  236. off = (n&1) * 4 + (n&2) * 16;
  237. src = block + off;
  238. dst = block + off;
  239. for(i = 0; i < 4; i++){
  240. t1 = 17 * (src[0] + src[2]);
  241. t2 = 17 * (src[0] - src[2]);
  242. t3 = 22 * src[1];
  243. t4 = 22 * src[3];
  244. t5 = 10 * src[1];
  245. t6 = 10 * src[3];
  246. dst[0] = (t1 + t3 + t6 + 4) >> 3;
  247. dst[1] = (t2 - t4 + t5 + 4) >> 3;
  248. dst[2] = (t2 + t4 - t5 + 4) >> 3;
  249. dst[3] = (t1 - t3 - t6 + 4) >> 3;
  250. src += 8;
  251. dst += 8;
  252. }
  253. src = block + off;
  254. dst = block + off;
  255. for(i = 0; i < 4; i++){
  256. t1 = 17 * (src[ 0] + src[16]);
  257. t2 = 17 * (src[ 0] - src[16]);
  258. t3 = 22 * src[ 8];
  259. t4 = 22 * src[24];
  260. t5 = 10 * src[ 8];
  261. t6 = 10 * src[24];
  262. dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
  263. dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
  264. dst[16] = (t2 + t4 - t5 + 64) >> 7;
  265. dst[24] = (t1 - t3 - t6 + 64) >> 7;
  266. src ++;
  267. dst ++;
  268. }
  269. }
  270. /* motion compensation functions */
  271. /** Filter used to interpolate fractional pel values
  272. */
  273. static always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
  274. {
  275. switch(mode){
  276. case 0: //no shift
  277. return src[0];
  278. case 1: // 1/4 shift
  279. return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6;
  280. case 2: // 1/2 shift
  281. return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4;
  282. case 3: // 3/4 shift
  283. return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6;
  284. }
  285. return 0; //should not occur
  286. }
  287. /** Function used to do motion compensation with bicubic interpolation
  288. */
  289. static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int mode, int rnd)
  290. {
  291. int i, j;
  292. uint8_t tmp[8*11], *tptr;
  293. int m, r;
  294. m = (mode & 3);
  295. r = rnd;
  296. src -= stride;
  297. tptr = tmp;
  298. for(j = 0; j < 11; j++) {
  299. for(i = 0; i < 8; i++)
  300. tptr[i] = clip_uint8(vc1_mspel_filter(src + i, 1, m, r));
  301. src += stride;
  302. tptr += 8;
  303. }
  304. r = 1 - rnd;
  305. m = (mode >> 2) & 3;
  306. tptr = tmp + 8;
  307. for(j = 0; j < 8; j++) {
  308. for(i = 0; i < 8; i++)
  309. dst[i] = clip_uint8(vc1_mspel_filter(tptr + i, 8, m, r));
  310. dst += stride;
  311. tptr += 8;
  312. }
  313. }
  314. /* pixel functions - really are entry points to vc1_mspel_mc */
  315. /* this one is defined in dsputil.c */
  316. void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
  317. static void ff_put_vc1_mspel_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  318. vc1_mspel_mc(dst, src, stride, 0x1, rnd);
  319. }
  320. static void ff_put_vc1_mspel_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  321. vc1_mspel_mc(dst, src, stride, 0x2, rnd);
  322. }
  323. static void ff_put_vc1_mspel_mc30_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  324. vc1_mspel_mc(dst, src, stride, 0x3, rnd);
  325. }
  326. static void ff_put_vc1_mspel_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  327. vc1_mspel_mc(dst, src, stride, 0x4, rnd);
  328. }
  329. static void ff_put_vc1_mspel_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  330. vc1_mspel_mc(dst, src, stride, 0x5, rnd);
  331. }
  332. static void ff_put_vc1_mspel_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  333. vc1_mspel_mc(dst, src, stride, 0x6, rnd);
  334. }
  335. static void ff_put_vc1_mspel_mc31_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  336. vc1_mspel_mc(dst, src, stride, 0x7, rnd);
  337. }
  338. static void ff_put_vc1_mspel_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  339. vc1_mspel_mc(dst, src, stride, 0x8, rnd);
  340. }
  341. static void ff_put_vc1_mspel_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  342. vc1_mspel_mc(dst, src, stride, 0x9, rnd);
  343. }
  344. static void ff_put_vc1_mspel_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  345. vc1_mspel_mc(dst, src, stride, 0xA, rnd);
  346. }
  347. static void ff_put_vc1_mspel_mc32_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  348. vc1_mspel_mc(dst, src, stride, 0xB, rnd);
  349. }
  350. static void ff_put_vc1_mspel_mc03_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  351. vc1_mspel_mc(dst, src, stride, 0xC, rnd);
  352. }
  353. static void ff_put_vc1_mspel_mc13_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  354. vc1_mspel_mc(dst, src, stride, 0xD, rnd);
  355. }
  356. static void ff_put_vc1_mspel_mc23_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  357. vc1_mspel_mc(dst, src, stride, 0xE, rnd);
  358. }
  359. static void ff_put_vc1_mspel_mc33_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  360. vc1_mspel_mc(dst, src, stride, 0xF, rnd);
  361. }
  362. void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
  363. dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
  364. dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
  365. dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
  366. dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
  367. dsp->vc1_h_overlap = vc1_h_overlap_c;
  368. dsp->vc1_v_overlap = vc1_v_overlap_c;
  369. dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c;
  370. dsp->put_vc1_mspel_pixels_tab[ 1] = ff_put_vc1_mspel_mc10_c;
  371. dsp->put_vc1_mspel_pixels_tab[ 2] = ff_put_vc1_mspel_mc20_c;
  372. dsp->put_vc1_mspel_pixels_tab[ 3] = ff_put_vc1_mspel_mc30_c;
  373. dsp->put_vc1_mspel_pixels_tab[ 4] = ff_put_vc1_mspel_mc01_c;
  374. dsp->put_vc1_mspel_pixels_tab[ 5] = ff_put_vc1_mspel_mc11_c;
  375. dsp->put_vc1_mspel_pixels_tab[ 6] = ff_put_vc1_mspel_mc21_c;
  376. dsp->put_vc1_mspel_pixels_tab[ 7] = ff_put_vc1_mspel_mc31_c;
  377. dsp->put_vc1_mspel_pixels_tab[ 8] = ff_put_vc1_mspel_mc02_c;
  378. dsp->put_vc1_mspel_pixels_tab[ 9] = ff_put_vc1_mspel_mc12_c;
  379. dsp->put_vc1_mspel_pixels_tab[10] = ff_put_vc1_mspel_mc22_c;
  380. dsp->put_vc1_mspel_pixels_tab[11] = ff_put_vc1_mspel_mc32_c;
  381. dsp->put_vc1_mspel_pixels_tab[12] = ff_put_vc1_mspel_mc03_c;
  382. dsp->put_vc1_mspel_pixels_tab[13] = ff_put_vc1_mspel_mc13_c;
  383. dsp->put_vc1_mspel_pixels_tab[14] = ff_put_vc1_mspel_mc23_c;
  384. dsp->put_vc1_mspel_pixels_tab[15] = ff_put_vc1_mspel_mc33_c;
  385. }