You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

881 lines
34KB

  1. /*
  2. * VC-1 and WMV3 decoder
  3. * Copyright (c) 2011 Mashiat Sarker Shakkhar
  4. * Copyright (c) 2006-2007 Konstantin Shishkov
  5. * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. /**
  24. * @file
  25. * VC-1 and WMV3 block decoding routines
  26. */
  27. #include "avcodec.h"
  28. #include "h264chroma.h"
  29. #include "mathops.h"
  30. #include "mpegvideo.h"
  31. #include "vc1.h"
  32. static av_always_inline void vc1_scale_luma(uint8_t *srcY,
  33. int k, int linesize)
  34. {
  35. int i, j;
  36. for (j = 0; j < k; j++) {
  37. for (i = 0; i < k; i++)
  38. srcY[i] = ((srcY[i] - 128) >> 1) + 128;
  39. srcY += linesize;
  40. }
  41. }
  42. static av_always_inline void vc1_scale_chroma(uint8_t *srcU, uint8_t *srcV,
  43. int k, int uvlinesize)
  44. {
  45. int i, j;
  46. for (j = 0; j < k; j++) {
  47. for (i = 0; i < k; i++) {
  48. srcU[i] = ((srcU[i] - 128) >> 1) + 128;
  49. srcV[i] = ((srcV[i] - 128) >> 1) + 128;
  50. }
  51. srcU += uvlinesize;
  52. srcV += uvlinesize;
  53. }
  54. }
  55. static av_always_inline void vc1_lut_scale_luma(uint8_t *srcY,
  56. uint8_t *lut1, uint8_t *lut2,
  57. int k, int linesize)
  58. {
  59. int i, j;
  60. for (j = 0; j < k; j += 2) {
  61. for (i = 0; i < k; i++)
  62. srcY[i] = lut1[srcY[i]];
  63. srcY += linesize;
  64. if (j + 1 == k)
  65. break;
  66. for (i = 0; i < k; i++)
  67. srcY[i] = lut2[srcY[i]];
  68. srcY += linesize;
  69. }
  70. }
  71. static av_always_inline void vc1_lut_scale_chroma(uint8_t *srcU, uint8_t *srcV,
  72. uint8_t *lut1, uint8_t *lut2,
  73. int k, int uvlinesize)
  74. {
  75. int i, j;
  76. for (j = 0; j < k; j += 2) {
  77. for (i = 0; i < k; i++) {
  78. srcU[i] = lut1[srcU[i]];
  79. srcV[i] = lut1[srcV[i]];
  80. }
  81. srcU += uvlinesize;
  82. srcV += uvlinesize;
  83. if (j + 1 == k)
  84. break;
  85. for (i = 0; i < k; i++) {
  86. srcU[i] = lut2[srcU[i]];
  87. srcV[i] = lut2[srcV[i]];
  88. }
  89. srcU += uvlinesize;
  90. srcV += uvlinesize;
  91. }
  92. }
  93. static const uint8_t popcount4[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
  94. static av_always_inline int get_luma_mv(VC1Context *v, int dir, int16_t *tx, int16_t *ty)
  95. {
  96. MpegEncContext *s = &v->s;
  97. int idx = v->mv_f[dir][s->block_index[0] + v->blocks_off] |
  98. (v->mv_f[dir][s->block_index[1] + v->blocks_off] << 1) |
  99. (v->mv_f[dir][s->block_index[2] + v->blocks_off] << 2) |
  100. (v->mv_f[dir][s->block_index[3] + v->blocks_off] << 3);
  101. static const uint8_t index2[16] = { 0, 0, 0, 0x23, 0, 0x13, 0x03, 0, 0, 0x12, 0x02, 0, 0x01, 0, 0, 0 };
  102. int opp_count = popcount4[idx];
  103. switch (opp_count) {
  104. case 0:
  105. case 4:
  106. *tx = median4(s->mv[dir][0][0], s->mv[dir][1][0], s->mv[dir][2][0], s->mv[dir][3][0]);
  107. *ty = median4(s->mv[dir][0][1], s->mv[dir][1][1], s->mv[dir][2][1], s->mv[dir][3][1]);
  108. break;
  109. case 1:
  110. *tx = mid_pred(s->mv[dir][idx < 2][0], s->mv[dir][1 + (idx < 4)][0], s->mv[dir][2 + (idx < 8)][0]);
  111. *ty = mid_pred(s->mv[dir][idx < 2][1], s->mv[dir][1 + (idx < 4)][1], s->mv[dir][2 + (idx < 8)][1]);
  112. break;
  113. case 3:
  114. *tx = mid_pred(s->mv[dir][idx > 0xd][0], s->mv[dir][1 + (idx > 0xb)][0], s->mv[dir][2 + (idx > 0x7)][0]);
  115. *ty = mid_pred(s->mv[dir][idx > 0xd][1], s->mv[dir][1 + (idx > 0xb)][1], s->mv[dir][2 + (idx > 0x7)][1]);
  116. break;
  117. case 2:
  118. *tx = (s->mv[dir][index2[idx] >> 4][0] + s->mv[dir][index2[idx] & 0xf][0]) / 2;
  119. *ty = (s->mv[dir][index2[idx] >> 4][1] + s->mv[dir][index2[idx] & 0xf][1]) / 2;
  120. break;
  121. }
  122. return opp_count;
  123. }
  124. static av_always_inline int get_chroma_mv(VC1Context *v, int dir, int16_t *tx, int16_t *ty)
  125. {
  126. MpegEncContext *s = &v->s;
  127. int idx = !v->mb_type[0][s->block_index[0]] |
  128. (!v->mb_type[0][s->block_index[1]] << 1) |
  129. (!v->mb_type[0][s->block_index[2]] << 2) |
  130. (!v->mb_type[0][s->block_index[3]] << 3);
  131. static const uint8_t index2[16] = { 0, 0, 0, 0x01, 0, 0x02, 0x12, 0, 0, 0x03, 0x13, 0, 0x23, 0, 0, 0 };
  132. int valid_count = popcount4[idx];
  133. switch (valid_count) {
  134. case 4:
  135. *tx = median4(s->mv[dir][0][0], s->mv[dir][1][0], s->mv[dir][2][0], s->mv[dir][3][0]);
  136. *ty = median4(s->mv[dir][0][1], s->mv[dir][1][1], s->mv[dir][2][1], s->mv[dir][3][1]);
  137. break;
  138. case 3:
  139. *tx = mid_pred(s->mv[dir][idx > 0xd][0], s->mv[dir][1 + (idx > 0xb)][0], s->mv[dir][2 + (idx > 0x7)][0]);
  140. *ty = mid_pred(s->mv[dir][idx > 0xd][1], s->mv[dir][1 + (idx > 0xb)][1], s->mv[dir][2 + (idx > 0x7)][1]);
  141. break;
  142. case 2:
  143. *tx = (s->mv[dir][index2[idx] >> 4][0] + s->mv[dir][index2[idx] & 0xf][0]) / 2;
  144. *ty = (s->mv[dir][index2[idx] >> 4][1] + s->mv[dir][index2[idx] & 0xf][1]) / 2;
  145. break;
  146. default:
  147. return 0;
  148. }
  149. return valid_count;
  150. }
  151. /** Do motion compensation over 1 macroblock
  152. * Mostly adapted hpel_motion and qpel_motion from mpegvideo.c
  153. */
  154. void ff_vc1_mc_1mv(VC1Context *v, int dir)
  155. {
  156. MpegEncContext *s = &v->s;
  157. H264ChromaContext *h264chroma = &v->h264chroma;
  158. uint8_t *srcY, *srcU, *srcV;
  159. int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
  160. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  161. int i;
  162. uint8_t (*luty)[256], (*lutuv)[256];
  163. int use_ic;
  164. if ((!v->field_mode ||
  165. (v->ref_field_type[dir] == 1 && v->cur_field_type == 1)) &&
  166. !v->s.last_picture.f->data[0])
  167. return;
  168. mx = s->mv[dir][0][0];
  169. my = s->mv[dir][0][1];
  170. // store motion vectors for further use in B frames
  171. if (s->pict_type == AV_PICTURE_TYPE_P) {
  172. for (i = 0; i < 4; i++) {
  173. s->current_picture.motion_val[1][s->block_index[i] + v->blocks_off][0] = mx;
  174. s->current_picture.motion_val[1][s->block_index[i] + v->blocks_off][1] = my;
  175. }
  176. }
  177. uvmx = (mx + ((mx & 3) == 3)) >> 1;
  178. uvmy = (my + ((my & 3) == 3)) >> 1;
  179. v->luma_mv[s->mb_x][0] = uvmx;
  180. v->luma_mv[s->mb_x][1] = uvmy;
  181. if (v->field_mode &&
  182. v->cur_field_type != v->ref_field_type[dir]) {
  183. my = my - 2 + 4 * v->cur_field_type;
  184. uvmy = uvmy - 2 + 4 * v->cur_field_type;
  185. }
  186. // fastuvmc shall be ignored for interlaced frame picture
  187. if (v->fastuvmc && (v->fcm != ILACE_FRAME)) {
  188. uvmx = uvmx + ((uvmx < 0) ? (uvmx & 1) : -(uvmx & 1));
  189. uvmy = uvmy + ((uvmy < 0) ? (uvmy & 1) : -(uvmy & 1));
  190. }
  191. if (!dir) {
  192. if (v->field_mode && (v->cur_field_type != v->ref_field_type[dir]) && v->second_field) {
  193. srcY = s->current_picture.f->data[0];
  194. srcU = s->current_picture.f->data[1];
  195. srcV = s->current_picture.f->data[2];
  196. luty = v->curr_luty;
  197. lutuv = v->curr_lutuv;
  198. use_ic = *v->curr_use_ic;
  199. } else {
  200. srcY = s->last_picture.f->data[0];
  201. srcU = s->last_picture.f->data[1];
  202. srcV = s->last_picture.f->data[2];
  203. luty = v->last_luty;
  204. lutuv = v->last_lutuv;
  205. use_ic = v->last_use_ic;
  206. }
  207. } else {
  208. srcY = s->next_picture.f->data[0];
  209. srcU = s->next_picture.f->data[1];
  210. srcV = s->next_picture.f->data[2];
  211. luty = v->next_luty;
  212. lutuv = v->next_lutuv;
  213. use_ic = v->next_use_ic;
  214. }
  215. if (!srcY || !srcU) {
  216. av_log(v->s.avctx, AV_LOG_ERROR, "Referenced frame missing.\n");
  217. return;
  218. }
  219. src_x = s->mb_x * 16 + (mx >> 2);
  220. src_y = s->mb_y * 16 + (my >> 2);
  221. uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
  222. uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
  223. if (v->profile != PROFILE_ADVANCED) {
  224. src_x = av_clip( src_x, -16, s->mb_width * 16);
  225. src_y = av_clip( src_y, -16, s->mb_height * 16);
  226. uvsrc_x = av_clip(uvsrc_x, -8, s->mb_width * 8);
  227. uvsrc_y = av_clip(uvsrc_y, -8, s->mb_height * 8);
  228. } else {
  229. src_x = av_clip( src_x, -17, s->avctx->coded_width);
  230. src_y = av_clip( src_y, -18, s->avctx->coded_height + 1);
  231. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  232. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  233. }
  234. srcY += src_y * s->linesize + src_x;
  235. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  236. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  237. if (v->field_mode && v->ref_field_type[dir]) {
  238. srcY += s->current_picture_ptr->f->linesize[0];
  239. srcU += s->current_picture_ptr->f->linesize[1];
  240. srcV += s->current_picture_ptr->f->linesize[2];
  241. }
  242. /* for grayscale we should not try to read from unknown area */
  243. if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY) {
  244. srcU = s->sc.edge_emu_buffer + 18 * s->linesize;
  245. srcV = s->sc.edge_emu_buffer + 18 * s->linesize;
  246. }
  247. if (v->rangeredfrm || use_ic
  248. || s->h_edge_pos < 22 || v_edge_pos < 22
  249. || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx&3) - 16 - s->mspel * 3
  250. || (unsigned)(src_y - 1) > v_edge_pos - (my&3) - 16 - 3) {
  251. uint8_t *ubuf = s->sc.edge_emu_buffer + 19 * s->linesize;
  252. uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
  253. const int k = 17 + s->mspel * 2;
  254. srcY -= s->mspel * (1 + s->linesize);
  255. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcY,
  256. s->linesize, s->linesize,
  257. k, k,
  258. src_x - s->mspel, src_y - s->mspel,
  259. s->h_edge_pos, v_edge_pos);
  260. srcY = s->sc.edge_emu_buffer;
  261. s->vdsp.emulated_edge_mc(ubuf, srcU,
  262. s->uvlinesize, s->uvlinesize,
  263. 8 + 1, 8 + 1,
  264. uvsrc_x, uvsrc_y,
  265. s->h_edge_pos >> 1, v_edge_pos >> 1);
  266. s->vdsp.emulated_edge_mc(vbuf, srcV,
  267. s->uvlinesize, s->uvlinesize,
  268. 8 + 1, 8 + 1,
  269. uvsrc_x, uvsrc_y,
  270. s->h_edge_pos >> 1, v_edge_pos >> 1);
  271. srcU = ubuf;
  272. srcV = vbuf;
  273. /* if we deal with range reduction we need to scale source blocks */
  274. if (v->rangeredfrm) {
  275. vc1_scale_luma(srcY, k, s->linesize);
  276. vc1_scale_chroma(srcU, srcV, 9, s->uvlinesize);
  277. }
  278. /* if we deal with intensity compensation we need to scale source blocks */
  279. if (use_ic) {
  280. vc1_lut_scale_luma(srcY,
  281. luty[v->field_mode ? v->ref_field_type[dir] : ((0 + src_y - s->mspel) & 1)],
  282. luty[v->field_mode ? v->ref_field_type[dir] : ((1 + src_y - s->mspel) & 1)],
  283. k, s->linesize);
  284. vc1_lut_scale_chroma(srcU, srcV,
  285. lutuv[v->field_mode ? v->ref_field_type[dir] : ((0 + uvsrc_y) & 1)],
  286. lutuv[v->field_mode ? v->ref_field_type[dir] : ((1 + uvsrc_y) & 1)],
  287. 9, s->uvlinesize);
  288. }
  289. srcY += s->mspel * (1 + s->linesize);
  290. }
  291. if (s->mspel) {
  292. dxy = ((my & 3) << 2) | (mx & 3);
  293. v->vc1dsp.put_vc1_mspel_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, v->rnd);
  294. } else { // hpel mc - always used for luma
  295. dxy = (my & 2) | ((mx & 2) >> 1);
  296. if (!v->rnd)
  297. s->hdsp.put_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
  298. else
  299. s->hdsp.put_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
  300. }
  301. if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
  302. return;
  303. /* Chroma MC always uses qpel bilinear */
  304. uvmx = (uvmx & 3) << 1;
  305. uvmy = (uvmy & 3) << 1;
  306. if (!v->rnd) {
  307. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  308. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  309. } else {
  310. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  311. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  312. }
  313. }
  314. /** Do motion compensation for 4-MV macroblock - luminance block
  315. */
  316. void ff_vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg)
  317. {
  318. MpegEncContext *s = &v->s;
  319. uint8_t *srcY;
  320. int dxy, mx, my, src_x, src_y;
  321. int off;
  322. int fieldmv = (v->fcm == ILACE_FRAME) ? v->blk_mv_type[s->block_index[n]] : 0;
  323. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  324. uint8_t (*luty)[256];
  325. int use_ic;
  326. if ((!v->field_mode ||
  327. (v->ref_field_type[dir] == 1 && v->cur_field_type == 1)) &&
  328. !v->s.last_picture.f->data[0])
  329. return;
  330. mx = s->mv[dir][n][0];
  331. my = s->mv[dir][n][1];
  332. if (!dir) {
  333. if (v->field_mode && (v->cur_field_type != v->ref_field_type[dir]) && v->second_field) {
  334. srcY = s->current_picture.f->data[0];
  335. luty = v->curr_luty;
  336. use_ic = *v->curr_use_ic;
  337. } else {
  338. srcY = s->last_picture.f->data[0];
  339. luty = v->last_luty;
  340. use_ic = v->last_use_ic;
  341. }
  342. } else {
  343. srcY = s->next_picture.f->data[0];
  344. luty = v->next_luty;
  345. use_ic = v->next_use_ic;
  346. }
  347. if (!srcY) {
  348. av_log(v->s.avctx, AV_LOG_ERROR, "Referenced frame missing.\n");
  349. return;
  350. }
  351. if (v->field_mode) {
  352. if (v->cur_field_type != v->ref_field_type[dir])
  353. my = my - 2 + 4 * v->cur_field_type;
  354. }
  355. if (s->pict_type == AV_PICTURE_TYPE_P && n == 3 && v->field_mode) {
  356. int opp_count = get_luma_mv(v, 0,
  357. &s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0],
  358. &s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1]);
  359. int k, f = opp_count > 2;
  360. for (k = 0; k < 4; k++)
  361. v->mv_f[1][s->block_index[k] + v->blocks_off] = f;
  362. }
  363. if (v->fcm == ILACE_FRAME) { // not sure if needed for other types of picture
  364. int qx, qy;
  365. int width = s->avctx->coded_width;
  366. int height = s->avctx->coded_height >> 1;
  367. if (s->pict_type == AV_PICTURE_TYPE_P) {
  368. s->current_picture.motion_val[1][s->block_index[n] + v->blocks_off][0] = mx;
  369. s->current_picture.motion_val[1][s->block_index[n] + v->blocks_off][1] = my;
  370. }
  371. qx = (s->mb_x * 16) + (mx >> 2);
  372. qy = (s->mb_y * 8) + (my >> 3);
  373. if (qx < -17)
  374. mx -= 4 * (qx + 17);
  375. else if (qx > width)
  376. mx -= 4 * (qx - width);
  377. if (qy < -18)
  378. my -= 8 * (qy + 18);
  379. else if (qy > height + 1)
  380. my -= 8 * (qy - height - 1);
  381. }
  382. if ((v->fcm == ILACE_FRAME) && fieldmv)
  383. off = ((n > 1) ? s->linesize : 0) + (n & 1) * 8;
  384. else
  385. off = s->linesize * 4 * (n & 2) + (n & 1) * 8;
  386. src_x = s->mb_x * 16 + (n & 1) * 8 + (mx >> 2);
  387. if (!fieldmv)
  388. src_y = s->mb_y * 16 + (n & 2) * 4 + (my >> 2);
  389. else
  390. src_y = s->mb_y * 16 + ((n > 1) ? 1 : 0) + (my >> 2);
  391. if (v->profile != PROFILE_ADVANCED) {
  392. src_x = av_clip(src_x, -16, s->mb_width * 16);
  393. src_y = av_clip(src_y, -16, s->mb_height * 16);
  394. } else {
  395. src_x = av_clip(src_x, -17, s->avctx->coded_width);
  396. if (v->fcm == ILACE_FRAME) {
  397. if (src_y & 1)
  398. src_y = av_clip(src_y, -17, s->avctx->coded_height + 1);
  399. else
  400. src_y = av_clip(src_y, -18, s->avctx->coded_height);
  401. } else {
  402. src_y = av_clip(src_y, -18, s->avctx->coded_height + 1);
  403. }
  404. }
  405. srcY += src_y * s->linesize + src_x;
  406. if (v->field_mode && v->ref_field_type[dir])
  407. srcY += s->current_picture_ptr->f->linesize[0];
  408. if (fieldmv) {
  409. if (!(src_y & 1))
  410. v_edge_pos--;
  411. else
  412. src_y -= (src_y < 4);
  413. }
  414. if (v->rangeredfrm || use_ic
  415. || s->h_edge_pos < 13 || v_edge_pos < 23
  416. || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx & 3) - 8 - s->mspel * 2
  417. || (unsigned)(src_y - (s->mspel << fieldmv)) > v_edge_pos - (my & 3) - ((8 + s->mspel * 2) << fieldmv)) {
  418. const int k = 9 + s->mspel * 2;
  419. srcY -= s->mspel * (1 + (s->linesize << fieldmv));
  420. /* check emulate edge stride and offset */
  421. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcY,
  422. s->linesize, s->linesize,
  423. k, k << fieldmv,
  424. src_x - s->mspel, src_y - (s->mspel << fieldmv),
  425. s->h_edge_pos, v_edge_pos);
  426. srcY = s->sc.edge_emu_buffer;
  427. /* if we deal with range reduction we need to scale source blocks */
  428. if (v->rangeredfrm) {
  429. vc1_scale_luma(srcY, k, s->linesize << fieldmv);
  430. }
  431. /* if we deal with intensity compensation we need to scale source blocks */
  432. if (use_ic) {
  433. vc1_lut_scale_luma(srcY,
  434. luty[v->field_mode ? v->ref_field_type[dir] : (((0<<fieldmv)+src_y - (s->mspel << fieldmv)) & 1)],
  435. luty[v->field_mode ? v->ref_field_type[dir] : (((1<<fieldmv)+src_y - (s->mspel << fieldmv)) & 1)],
  436. k, s->linesize << fieldmv);
  437. }
  438. srcY += s->mspel * (1 + (s->linesize << fieldmv));
  439. }
  440. if (s->mspel) {
  441. dxy = ((my & 3) << 2) | (mx & 3);
  442. if (avg)
  443. v->vc1dsp.avg_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
  444. else
  445. v->vc1dsp.put_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
  446. } else { // hpel mc - always used for luma
  447. dxy = (my & 2) | ((mx & 2) >> 1);
  448. if (!v->rnd)
  449. s->hdsp.put_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize, 8);
  450. else
  451. s->hdsp.put_no_rnd_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize, 8);
  452. }
  453. }
  454. /** Do motion compensation for 4-MV macroblock - both chroma blocks
  455. */
  456. void ff_vc1_mc_4mv_chroma(VC1Context *v, int dir)
  457. {
  458. MpegEncContext *s = &v->s;
  459. H264ChromaContext *h264chroma = &v->h264chroma;
  460. uint8_t *srcU, *srcV;
  461. int uvmx, uvmy, uvsrc_x, uvsrc_y;
  462. int16_t tx, ty;
  463. int chroma_ref_type;
  464. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  465. uint8_t (*lutuv)[256];
  466. int use_ic;
  467. if (!v->field_mode && !v->s.last_picture.f->data[0])
  468. return;
  469. if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
  470. return;
  471. /* calculate chroma MV vector from four luma MVs */
  472. if (!v->field_mode || !v->numref) {
  473. int valid_count = get_chroma_mv(v, dir, &tx, &ty);
  474. if (!valid_count) {
  475. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0;
  476. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = 0;
  477. v->luma_mv[s->mb_x][0] = v->luma_mv[s->mb_x][1] = 0;
  478. return; //no need to do MC for intra blocks
  479. }
  480. chroma_ref_type = v->ref_field_type[dir];
  481. } else {
  482. int opp_count = get_luma_mv(v, dir, &tx, &ty);
  483. chroma_ref_type = v->cur_field_type ^ (opp_count > 2);
  484. }
  485. if (v->field_mode && chroma_ref_type == 1 && v->cur_field_type == 1 && !v->s.last_picture.f->data[0])
  486. return;
  487. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = tx;
  488. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = ty;
  489. uvmx = (tx + ((tx & 3) == 3)) >> 1;
  490. uvmy = (ty + ((ty & 3) == 3)) >> 1;
  491. v->luma_mv[s->mb_x][0] = uvmx;
  492. v->luma_mv[s->mb_x][1] = uvmy;
  493. if (v->fastuvmc) {
  494. uvmx = uvmx + ((uvmx < 0) ? (uvmx & 1) : -(uvmx & 1));
  495. uvmy = uvmy + ((uvmy < 0) ? (uvmy & 1) : -(uvmy & 1));
  496. }
  497. // Field conversion bias
  498. if (v->cur_field_type != chroma_ref_type)
  499. uvmy += 2 - 4 * chroma_ref_type;
  500. uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
  501. uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
  502. if (v->profile != PROFILE_ADVANCED) {
  503. uvsrc_x = av_clip(uvsrc_x, -8, s->mb_width * 8);
  504. uvsrc_y = av_clip(uvsrc_y, -8, s->mb_height * 8);
  505. } else {
  506. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  507. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  508. }
  509. if (!dir) {
  510. if (v->field_mode && (v->cur_field_type != chroma_ref_type) && v->second_field) {
  511. srcU = s->current_picture.f->data[1];
  512. srcV = s->current_picture.f->data[2];
  513. lutuv = v->curr_lutuv;
  514. use_ic = *v->curr_use_ic;
  515. } else {
  516. srcU = s->last_picture.f->data[1];
  517. srcV = s->last_picture.f->data[2];
  518. lutuv = v->last_lutuv;
  519. use_ic = v->last_use_ic;
  520. }
  521. } else {
  522. srcU = s->next_picture.f->data[1];
  523. srcV = s->next_picture.f->data[2];
  524. lutuv = v->next_lutuv;
  525. use_ic = v->next_use_ic;
  526. }
  527. if (!srcU) {
  528. av_log(v->s.avctx, AV_LOG_ERROR, "Referenced frame missing.\n");
  529. return;
  530. }
  531. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  532. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  533. if (v->field_mode) {
  534. if (chroma_ref_type) {
  535. srcU += s->current_picture_ptr->f->linesize[1];
  536. srcV += s->current_picture_ptr->f->linesize[2];
  537. }
  538. }
  539. if (v->rangeredfrm || use_ic
  540. || s->h_edge_pos < 18 || v_edge_pos < 18
  541. || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 9
  542. || (unsigned)uvsrc_y > (v_edge_pos >> 1) - 9) {
  543. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcU,
  544. s->uvlinesize, s->uvlinesize,
  545. 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
  546. s->h_edge_pos >> 1, v_edge_pos >> 1);
  547. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16, srcV,
  548. s->uvlinesize, s->uvlinesize,
  549. 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
  550. s->h_edge_pos >> 1, v_edge_pos >> 1);
  551. srcU = s->sc.edge_emu_buffer;
  552. srcV = s->sc.edge_emu_buffer + 16;
  553. /* if we deal with range reduction we need to scale source blocks */
  554. if (v->rangeredfrm) {
  555. vc1_scale_chroma(srcU, srcV, 9, s->uvlinesize);
  556. }
  557. /* if we deal with intensity compensation we need to scale source blocks */
  558. if (use_ic) {
  559. vc1_lut_scale_chroma(srcU, srcV,
  560. lutuv[v->field_mode ? chroma_ref_type : ((0 + uvsrc_y) & 1)],
  561. lutuv[v->field_mode ? chroma_ref_type : ((1 + uvsrc_y) & 1)],
  562. 9, s->uvlinesize);
  563. }
  564. }
  565. /* Chroma MC always uses qpel bilinear */
  566. uvmx = (uvmx & 3) << 1;
  567. uvmy = (uvmy & 3) << 1;
  568. if (!v->rnd) {
  569. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  570. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  571. } else {
  572. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  573. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  574. }
  575. }
  576. /** Do motion compensation for 4-MV interlaced frame chroma macroblock (both U and V)
  577. */
  578. void ff_vc1_mc_4mv_chroma4(VC1Context *v, int dir, int dir2, int avg)
  579. {
  580. MpegEncContext *s = &v->s;
  581. H264ChromaContext *h264chroma = &v->h264chroma;
  582. uint8_t *srcU, *srcV;
  583. int uvsrc_x, uvsrc_y;
  584. int uvmx_field[4], uvmy_field[4];
  585. int i, off, tx, ty;
  586. int fieldmv = v->blk_mv_type[s->block_index[0]];
  587. static const uint8_t s_rndtblfield[16] = { 0, 0, 1, 2, 4, 4, 5, 6, 2, 2, 3, 8, 6, 6, 7, 12 };
  588. int v_dist = fieldmv ? 1 : 4; // vertical offset for lower sub-blocks
  589. int v_edge_pos = s->v_edge_pos >> 1;
  590. int use_ic;
  591. uint8_t (*lutuv)[256];
  592. if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
  593. return;
  594. for (i = 0; i < 4; i++) {
  595. int d = i < 2 ? dir: dir2;
  596. tx = s->mv[d][i][0];
  597. uvmx_field[i] = (tx + ((tx & 3) == 3)) >> 1;
  598. ty = s->mv[d][i][1];
  599. if (fieldmv)
  600. uvmy_field[i] = (ty >> 4) * 8 + s_rndtblfield[ty & 0xF];
  601. else
  602. uvmy_field[i] = (ty + ((ty & 3) == 3)) >> 1;
  603. }
  604. for (i = 0; i < 4; i++) {
  605. off = (i & 1) * 4 + ((i & 2) ? v_dist * s->uvlinesize : 0);
  606. uvsrc_x = s->mb_x * 8 + (i & 1) * 4 + (uvmx_field[i] >> 2);
  607. uvsrc_y = s->mb_y * 8 + ((i & 2) ? v_dist : 0) + (uvmy_field[i] >> 2);
  608. // FIXME: implement proper pull-back (see vc1cropmv.c, vc1CROPMV_ChromaPullBack())
  609. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  610. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  611. if (i < 2 ? dir : dir2) {
  612. srcU = s->next_picture.f->data[1];
  613. srcV = s->next_picture.f->data[2];
  614. lutuv = v->next_lutuv;
  615. use_ic = v->next_use_ic;
  616. } else {
  617. srcU = s->last_picture.f->data[1];
  618. srcV = s->last_picture.f->data[2];
  619. lutuv = v->last_lutuv;
  620. use_ic = v->last_use_ic;
  621. }
  622. if (!srcU)
  623. return;
  624. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  625. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  626. uvmx_field[i] = (uvmx_field[i] & 3) << 1;
  627. uvmy_field[i] = (uvmy_field[i] & 3) << 1;
  628. if (fieldmv) {
  629. if (!(uvsrc_y & 1))
  630. v_edge_pos = (s->v_edge_pos >> 1) - 1;
  631. else
  632. uvsrc_y -= (uvsrc_y < 2);
  633. }
  634. if (use_ic
  635. || s->h_edge_pos < 10 || v_edge_pos < (5 << fieldmv)
  636. || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 5
  637. || (unsigned)uvsrc_y > v_edge_pos - (5 << fieldmv)) {
  638. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcU,
  639. s->uvlinesize, s->uvlinesize,
  640. 5, (5 << fieldmv), uvsrc_x, uvsrc_y,
  641. s->h_edge_pos >> 1, v_edge_pos);
  642. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16, srcV,
  643. s->uvlinesize, s->uvlinesize,
  644. 5, (5 << fieldmv), uvsrc_x, uvsrc_y,
  645. s->h_edge_pos >> 1, v_edge_pos);
  646. srcU = s->sc.edge_emu_buffer;
  647. srcV = s->sc.edge_emu_buffer + 16;
  648. /* if we deal with intensity compensation we need to scale source blocks */
  649. if (use_ic) {
  650. vc1_lut_scale_chroma(srcU, srcV,
  651. lutuv[(uvsrc_y + (0 << fieldmv)) & 1],
  652. lutuv[(uvsrc_y + (1 << fieldmv)) & 1],
  653. 5, s->uvlinesize << fieldmv);
  654. }
  655. }
  656. if (avg) {
  657. if (!v->rnd) {
  658. h264chroma->avg_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  659. h264chroma->avg_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  660. } else {
  661. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  662. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  663. }
  664. } else {
  665. if (!v->rnd) {
  666. h264chroma->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  667. h264chroma->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  668. } else {
  669. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  670. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  671. }
  672. }
  673. }
  674. }
  675. /** Motion compensation for direct or interpolated blocks in B-frames
  676. */
  677. void ff_vc1_interp_mc(VC1Context *v)
  678. {
  679. MpegEncContext *s = &v->s;
  680. H264ChromaContext *h264chroma = &v->h264chroma;
  681. uint8_t *srcY, *srcU, *srcV;
  682. int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
  683. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  684. int use_ic = v->next_use_ic;
  685. if (!v->field_mode && !v->s.next_picture.f->data[0])
  686. return;
  687. mx = s->mv[1][0][0];
  688. my = s->mv[1][0][1];
  689. uvmx = (mx + ((mx & 3) == 3)) >> 1;
  690. uvmy = (my + ((my & 3) == 3)) >> 1;
  691. if (v->field_mode && v->cur_field_type != v->ref_field_type[1]) {
  692. my = my - 2 + 4 * v->cur_field_type;
  693. uvmy = uvmy - 2 + 4 * v->cur_field_type;
  694. }
  695. if (v->fastuvmc) {
  696. uvmx = uvmx + ((uvmx < 0) ? -(uvmx & 1) : (uvmx & 1));
  697. uvmy = uvmy + ((uvmy < 0) ? -(uvmy & 1) : (uvmy & 1));
  698. }
  699. srcY = s->next_picture.f->data[0];
  700. srcU = s->next_picture.f->data[1];
  701. srcV = s->next_picture.f->data[2];
  702. src_x = s->mb_x * 16 + (mx >> 2);
  703. src_y = s->mb_y * 16 + (my >> 2);
  704. uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
  705. uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
  706. if (v->profile != PROFILE_ADVANCED) {
  707. src_x = av_clip( src_x, -16, s->mb_width * 16);
  708. src_y = av_clip( src_y, -16, s->mb_height * 16);
  709. uvsrc_x = av_clip(uvsrc_x, -8, s->mb_width * 8);
  710. uvsrc_y = av_clip(uvsrc_y, -8, s->mb_height * 8);
  711. } else {
  712. src_x = av_clip( src_x, -17, s->avctx->coded_width);
  713. src_y = av_clip( src_y, -18, s->avctx->coded_height + 1);
  714. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  715. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  716. }
  717. srcY += src_y * s->linesize + src_x;
  718. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  719. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  720. if (v->field_mode && v->ref_field_type[1]) {
  721. srcY += s->current_picture_ptr->f->linesize[0];
  722. srcU += s->current_picture_ptr->f->linesize[1];
  723. srcV += s->current_picture_ptr->f->linesize[2];
  724. }
  725. /* for grayscale we should not try to read from unknown area */
  726. if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY) {
  727. srcU = s->sc.edge_emu_buffer + 18 * s->linesize;
  728. srcV = s->sc.edge_emu_buffer + 18 * s->linesize;
  729. }
  730. if (v->rangeredfrm || s->h_edge_pos < 22 || v_edge_pos < 22 || use_ic
  731. || (unsigned)(src_x - 1) > s->h_edge_pos - (mx & 3) - 16 - 3
  732. || (unsigned)(src_y - 1) > v_edge_pos - (my & 3) - 16 - 3) {
  733. uint8_t *ubuf = s->sc.edge_emu_buffer + 19 * s->linesize;
  734. uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
  735. const int k = 17 + s->mspel * 2;
  736. srcY -= s->mspel * (1 + s->linesize);
  737. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcY,
  738. s->linesize, s->linesize,
  739. k, k,
  740. src_x - s->mspel, src_y - s->mspel,
  741. s->h_edge_pos, v_edge_pos);
  742. srcY = s->sc.edge_emu_buffer;
  743. s->vdsp.emulated_edge_mc(ubuf, srcU,
  744. s->uvlinesize, s->uvlinesize,
  745. 8 + 1, 8 + 1,
  746. uvsrc_x, uvsrc_y,
  747. s->h_edge_pos >> 1, v_edge_pos >> 1);
  748. s->vdsp.emulated_edge_mc(vbuf, srcV,
  749. s->uvlinesize, s->uvlinesize,
  750. 8 + 1, 8 + 1,
  751. uvsrc_x, uvsrc_y,
  752. s->h_edge_pos >> 1, v_edge_pos >> 1);
  753. srcU = ubuf;
  754. srcV = vbuf;
  755. /* if we deal with range reduction we need to scale source blocks */
  756. if (v->rangeredfrm) {
  757. vc1_scale_luma(srcY, k, s->linesize);
  758. vc1_scale_chroma(srcU, srcV, 9, s->uvlinesize);
  759. }
  760. if (use_ic) {
  761. uint8_t (*luty )[256] = v->next_luty;
  762. uint8_t (*lutuv)[256] = v->next_lutuv;
  763. vc1_lut_scale_luma(srcY,
  764. luty[v->field_mode ? v->ref_field_type[1] : ((0+src_y - s->mspel) & 1)],
  765. luty[v->field_mode ? v->ref_field_type[1] : ((1+src_y - s->mspel) & 1)],
  766. k, s->linesize);
  767. vc1_lut_scale_chroma(srcU, srcV,
  768. lutuv[v->field_mode ? v->ref_field_type[1] : ((0+uvsrc_y) & 1)],
  769. lutuv[v->field_mode ? v->ref_field_type[1] : ((1+uvsrc_y) & 1)],
  770. 9, s->uvlinesize);
  771. }
  772. srcY += s->mspel * (1 + s->linesize);
  773. }
  774. if (s->mspel) {
  775. dxy = ((my & 3) << 2) | (mx & 3);
  776. v->vc1dsp.avg_vc1_mspel_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, v->rnd);
  777. } else { // hpel mc
  778. dxy = (my & 2) | ((mx & 2) >> 1);
  779. if (!v->rnd)
  780. s->hdsp.avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
  781. else
  782. s->hdsp.avg_no_rnd_pixels_tab[dxy](s->dest[0], srcY, s->linesize, 16);
  783. }
  784. if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
  785. return;
  786. /* Chroma MC always uses qpel blilinear */
  787. uvmx = (uvmx & 3) << 1;
  788. uvmy = (uvmy & 3) << 1;
  789. if (!v->rnd) {
  790. h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  791. h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  792. } else {
  793. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  794. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  795. }
  796. }