You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

452 lines
13KB

  1. /*
  2. * VC-1 and WMV3 decoder - DSP functions
  3. * Copyright (c) 2006 Konstantin Shishkov
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. *
  19. */
  20. /**
  21. * @file vc1dsp.c
  22. * VC-1 and WMV3 decoder
  23. *
  24. */
  25. #include "dsputil.h"
  26. /** Apply overlap transform to vertical edge
  27. */
  28. static void vc1_v_overlap_c(uint8_t* src, int stride, int rnd)
  29. {
  30. int i;
  31. int a, b, c, d;
  32. for(i = 0; i < 8; i++) {
  33. a = src[-2*stride];
  34. b = src[-stride];
  35. c = src[0];
  36. d = src[stride];
  37. src[-2*stride] = clip_uint8((7*a + d + 4 - rnd) >> 3);
  38. src[-stride] = clip_uint8((-a + 7*b + c + d + 3 + rnd) >> 3);
  39. src[0] = clip_uint8((a + b + 7*c - d + 4 - rnd) >> 3);
  40. src[stride] = clip_uint8((a + 7*d + 3 + rnd) >> 3);
  41. src++;
  42. }
  43. }
  44. /** Apply overlap transform to horizontal edge
  45. */
  46. static void vc1_h_overlap_c(uint8_t* src, int stride, int rnd)
  47. {
  48. int i;
  49. int a, b, c, d;
  50. for(i = 0; i < 8; i++) {
  51. a = src[-2];
  52. b = src[-1];
  53. c = src[0];
  54. d = src[1];
  55. src[-2] = clip_uint8((7*a + d + 4 - rnd) >> 3);
  56. src[-1] = clip_uint8((-a + 7*b + c + d + 3 + rnd) >> 3);
  57. src[0] = clip_uint8((a + b + 7*c - d + 4 - rnd) >> 3);
  58. src[1] = clip_uint8((a + 7*d + 3 + rnd) >> 3);
  59. src += stride;
  60. }
  61. }
  62. /** Do inverse transform on 8x8 block
  63. */
  64. static void vc1_inv_trans_8x8_c(DCTELEM block[64])
  65. {
  66. int i;
  67. register int t1,t2,t3,t4,t5,t6,t7,t8;
  68. DCTELEM *src, *dst;
  69. src = block;
  70. dst = block;
  71. for(i = 0; i < 8; i++){
  72. t1 = 12 * (src[0] + src[4]);
  73. t2 = 12 * (src[0] - src[4]);
  74. t3 = 16 * src[2] + 6 * src[6];
  75. t4 = 6 * src[2] - 16 * src[6];
  76. t5 = t1 + t3;
  77. t6 = t2 + t4;
  78. t7 = t2 - t4;
  79. t8 = t1 - t3;
  80. t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
  81. t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
  82. t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
  83. t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
  84. dst[0] = (t5 + t1 + 4) >> 3;
  85. dst[1] = (t6 + t2 + 4) >> 3;
  86. dst[2] = (t7 + t3 + 4) >> 3;
  87. dst[3] = (t8 + t4 + 4) >> 3;
  88. dst[4] = (t8 - t4 + 4) >> 3;
  89. dst[5] = (t7 - t3 + 4) >> 3;
  90. dst[6] = (t6 - t2 + 4) >> 3;
  91. dst[7] = (t5 - t1 + 4) >> 3;
  92. src += 8;
  93. dst += 8;
  94. }
  95. src = block;
  96. dst = block;
  97. for(i = 0; i < 8; i++){
  98. t1 = 12 * (src[ 0] + src[32]);
  99. t2 = 12 * (src[ 0] - src[32]);
  100. t3 = 16 * src[16] + 6 * src[48];
  101. t4 = 6 * src[16] - 16 * src[48];
  102. t5 = t1 + t3;
  103. t6 = t2 + t4;
  104. t7 = t2 - t4;
  105. t8 = t1 - t3;
  106. t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
  107. t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
  108. t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
  109. t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
  110. dst[ 0] = (t5 + t1 + 64) >> 7;
  111. dst[ 8] = (t6 + t2 + 64) >> 7;
  112. dst[16] = (t7 + t3 + 64) >> 7;
  113. dst[24] = (t8 + t4 + 64) >> 7;
  114. dst[32] = (t8 - t4 + 64 + 1) >> 7;
  115. dst[40] = (t7 - t3 + 64 + 1) >> 7;
  116. dst[48] = (t6 - t2 + 64 + 1) >> 7;
  117. dst[56] = (t5 - t1 + 64 + 1) >> 7;
  118. src++;
  119. dst++;
  120. }
  121. }
  122. /** Do inverse transform on 8x4 part of block
  123. */
  124. static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n)
  125. {
  126. int i;
  127. register int t1,t2,t3,t4,t5,t6,t7,t8;
  128. DCTELEM *src, *dst;
  129. int off;
  130. off = n * 32;
  131. src = block + off;
  132. dst = block + off;
  133. for(i = 0; i < 4; i++){
  134. t1 = 12 * (src[0] + src[4]);
  135. t2 = 12 * (src[0] - src[4]);
  136. t3 = 16 * src[2] + 6 * src[6];
  137. t4 = 6 * src[2] - 16 * src[6];
  138. t5 = t1 + t3;
  139. t6 = t2 + t4;
  140. t7 = t2 - t4;
  141. t8 = t1 - t3;
  142. t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
  143. t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
  144. t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
  145. t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
  146. dst[0] = (t5 + t1 + 4) >> 3;
  147. dst[1] = (t6 + t2 + 4) >> 3;
  148. dst[2] = (t7 + t3 + 4) >> 3;
  149. dst[3] = (t8 + t4 + 4) >> 3;
  150. dst[4] = (t8 - t4 + 4) >> 3;
  151. dst[5] = (t7 - t3 + 4) >> 3;
  152. dst[6] = (t6 - t2 + 4) >> 3;
  153. dst[7] = (t5 - t1 + 4) >> 3;
  154. src += 8;
  155. dst += 8;
  156. }
  157. src = block + off;
  158. dst = block + off;
  159. for(i = 0; i < 8; i++){
  160. t1 = 17 * (src[ 0] + src[16]);
  161. t2 = 17 * (src[ 0] - src[16]);
  162. t3 = 22 * src[ 8];
  163. t4 = 22 * src[24];
  164. t5 = 10 * src[ 8];
  165. t6 = 10 * src[24];
  166. dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
  167. dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
  168. dst[16] = (t2 + t4 - t5 + 64) >> 7;
  169. dst[24] = (t1 - t3 - t6 + 64) >> 7;
  170. src ++;
  171. dst ++;
  172. }
  173. }
  174. /** Do inverse transform on 4x8 parts of block
  175. */
  176. static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n)
  177. {
  178. int i;
  179. register int t1,t2,t3,t4,t5,t6,t7,t8;
  180. DCTELEM *src, *dst;
  181. int off;
  182. off = n * 4;
  183. src = block + off;
  184. dst = block + off;
  185. for(i = 0; i < 8; i++){
  186. t1 = 17 * (src[0] + src[2]);
  187. t2 = 17 * (src[0] - src[2]);
  188. t3 = 22 * src[1];
  189. t4 = 22 * src[3];
  190. t5 = 10 * src[1];
  191. t6 = 10 * src[3];
  192. dst[0] = (t1 + t3 + t6 + 4) >> 3;
  193. dst[1] = (t2 - t4 + t5 + 4) >> 3;
  194. dst[2] = (t2 + t4 - t5 + 4) >> 3;
  195. dst[3] = (t1 - t3 - t6 + 4) >> 3;
  196. src += 8;
  197. dst += 8;
  198. }
  199. src = block + off;
  200. dst = block + off;
  201. for(i = 0; i < 4; i++){
  202. t1 = 12 * (src[ 0] + src[32]);
  203. t2 = 12 * (src[ 0] - src[32]);
  204. t3 = 16 * src[16] + 6 * src[48];
  205. t4 = 6 * src[16] - 16 * src[48];
  206. t5 = t1 + t3;
  207. t6 = t2 + t4;
  208. t7 = t2 - t4;
  209. t8 = t1 - t3;
  210. t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
  211. t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
  212. t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
  213. t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
  214. dst[ 0] = (t5 + t1 + 64) >> 7;
  215. dst[ 8] = (t6 + t2 + 64) >> 7;
  216. dst[16] = (t7 + t3 + 64) >> 7;
  217. dst[24] = (t8 + t4 + 64) >> 7;
  218. dst[32] = (t8 - t4 + 64 + 1) >> 7;
  219. dst[40] = (t7 - t3 + 64 + 1) >> 7;
  220. dst[48] = (t6 - t2 + 64 + 1) >> 7;
  221. dst[56] = (t5 - t1 + 64 + 1) >> 7;
  222. src++;
  223. dst++;
  224. }
  225. }
  226. /** Do inverse transform on 4x4 part of block
  227. */
  228. static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
  229. {
  230. int i;
  231. register int t1,t2,t3,t4,t5,t6;
  232. DCTELEM *src, *dst;
  233. int off;
  234. off = (n&1) * 4 + (n&2) * 16;
  235. src = block + off;
  236. dst = block + off;
  237. for(i = 0; i < 4; i++){
  238. t1 = 17 * (src[0] + src[2]);
  239. t2 = 17 * (src[0] - src[2]);
  240. t3 = 22 * src[1];
  241. t4 = 22 * src[3];
  242. t5 = 10 * src[1];
  243. t6 = 10 * src[3];
  244. dst[0] = (t1 + t3 + t6 + 4) >> 3;
  245. dst[1] = (t2 - t4 + t5 + 4) >> 3;
  246. dst[2] = (t2 + t4 - t5 + 4) >> 3;
  247. dst[3] = (t1 - t3 - t6 + 4) >> 3;
  248. src += 8;
  249. dst += 8;
  250. }
  251. src = block + off;
  252. dst = block + off;
  253. for(i = 0; i < 4; i++){
  254. t1 = 17 * (src[ 0] + src[16]);
  255. t2 = 17 * (src[ 0] - src[16]);
  256. t3 = 22 * src[ 8];
  257. t4 = 22 * src[24];
  258. t5 = 10 * src[ 8];
  259. t6 = 10 * src[24];
  260. dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
  261. dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
  262. dst[16] = (t2 + t4 - t5 + 64) >> 7;
  263. dst[24] = (t1 - t3 - t6 + 64) >> 7;
  264. src ++;
  265. dst ++;
  266. }
  267. }
  268. /* motion compensation functions */
  269. /** Filter used to interpolate fractional pel values
  270. */
  271. static always_inline uint8_t vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
  272. {
  273. switch(mode){
  274. case 0: //no shift
  275. return src[0];
  276. case 1: // 1/4 shift
  277. return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6;
  278. case 2: // 1/2 shift
  279. return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4;
  280. case 3: // 3/4 shift
  281. return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6;
  282. }
  283. return 0; //should not occur
  284. }
  285. /** Function used to do motion compensation with bicubic interpolation
  286. */
  287. static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int mode, int rnd)
  288. {
  289. int i, j;
  290. uint8_t tmp[8*11], *tptr;
  291. int m, r;
  292. m = (mode & 3);
  293. r = rnd;
  294. src -= stride;
  295. tptr = tmp;
  296. for(j = 0; j < 11; j++) {
  297. for(i = 0; i < 8; i++)
  298. tptr[i] = vc1_mspel_filter(src + i, 1, m, r);
  299. src += stride;
  300. tptr += 8;
  301. }
  302. r = 1 - rnd;
  303. m = (mode >> 2) & 3;
  304. tptr = tmp + 8;
  305. for(j = 0; j < 8; j++) {
  306. for(i = 0; i < 8; i++)
  307. dst[i] = vc1_mspel_filter(tptr + i, 8, m, r);
  308. dst += stride;
  309. tptr += 8;
  310. }
  311. }
  312. /* pixel functions - really are entry points to vc1_mspel_mc */
  313. /* this one is defined in dsputil.c */
  314. void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
  315. static void ff_put_vc1_mspel_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  316. vc1_mspel_mc(dst, src, stride, 0x1, rnd);
  317. }
  318. static void ff_put_vc1_mspel_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  319. vc1_mspel_mc(dst, src, stride, 0x2, rnd);
  320. }
  321. static void ff_put_vc1_mspel_mc30_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  322. vc1_mspel_mc(dst, src, stride, 0x3, rnd);
  323. }
  324. static void ff_put_vc1_mspel_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  325. vc1_mspel_mc(dst, src, stride, 0x4, rnd);
  326. }
  327. static void ff_put_vc1_mspel_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  328. vc1_mspel_mc(dst, src, stride, 0x5, rnd);
  329. }
  330. static void ff_put_vc1_mspel_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  331. vc1_mspel_mc(dst, src, stride, 0x6, rnd);
  332. }
  333. static void ff_put_vc1_mspel_mc31_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  334. vc1_mspel_mc(dst, src, stride, 0x7, rnd);
  335. }
  336. static void ff_put_vc1_mspel_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  337. vc1_mspel_mc(dst, src, stride, 0x8, rnd);
  338. }
  339. static void ff_put_vc1_mspel_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  340. vc1_mspel_mc(dst, src, stride, 0x9, rnd);
  341. }
  342. static void ff_put_vc1_mspel_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  343. vc1_mspel_mc(dst, src, stride, 0xA, rnd);
  344. }
  345. static void ff_put_vc1_mspel_mc32_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  346. vc1_mspel_mc(dst, src, stride, 0xB, rnd);
  347. }
  348. static void ff_put_vc1_mspel_mc03_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  349. vc1_mspel_mc(dst, src, stride, 0xC, rnd);
  350. }
  351. static void ff_put_vc1_mspel_mc13_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  352. vc1_mspel_mc(dst, src, stride, 0xD, rnd);
  353. }
  354. static void ff_put_vc1_mspel_mc23_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  355. vc1_mspel_mc(dst, src, stride, 0xE, rnd);
  356. }
  357. static void ff_put_vc1_mspel_mc33_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
  358. vc1_mspel_mc(dst, src, stride, 0xF, rnd);
  359. }
  360. void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
  361. dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
  362. dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
  363. dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
  364. dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
  365. dsp->vc1_h_overlap = vc1_h_overlap_c;
  366. dsp->vc1_v_overlap = vc1_v_overlap_c;
  367. dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c;
  368. dsp->put_vc1_mspel_pixels_tab[ 1] = ff_put_vc1_mspel_mc10_c;
  369. dsp->put_vc1_mspel_pixels_tab[ 2] = ff_put_vc1_mspel_mc20_c;
  370. dsp->put_vc1_mspel_pixels_tab[ 3] = ff_put_vc1_mspel_mc30_c;
  371. dsp->put_vc1_mspel_pixels_tab[ 4] = ff_put_vc1_mspel_mc01_c;
  372. dsp->put_vc1_mspel_pixels_tab[ 5] = ff_put_vc1_mspel_mc11_c;
  373. dsp->put_vc1_mspel_pixels_tab[ 6] = ff_put_vc1_mspel_mc21_c;
  374. dsp->put_vc1_mspel_pixels_tab[ 7] = ff_put_vc1_mspel_mc31_c;
  375. dsp->put_vc1_mspel_pixels_tab[ 8] = ff_put_vc1_mspel_mc02_c;
  376. dsp->put_vc1_mspel_pixels_tab[ 9] = ff_put_vc1_mspel_mc12_c;
  377. dsp->put_vc1_mspel_pixels_tab[10] = ff_put_vc1_mspel_mc22_c;
  378. dsp->put_vc1_mspel_pixels_tab[11] = ff_put_vc1_mspel_mc32_c;
  379. dsp->put_vc1_mspel_pixels_tab[12] = ff_put_vc1_mspel_mc03_c;
  380. dsp->put_vc1_mspel_pixels_tab[13] = ff_put_vc1_mspel_mc13_c;
  381. dsp->put_vc1_mspel_pixels_tab[14] = ff_put_vc1_mspel_mc23_c;
  382. dsp->put_vc1_mspel_pixels_tab[15] = ff_put_vc1_mspel_mc33_c;
  383. }