You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

879 lines
34KB

  1. /*
  2. * VC-1 and WMV3 decoder
  3. * Copyright (c) 2011 Mashiat Sarker Shakkhar
  4. * Copyright (c) 2006-2007 Konstantin Shishkov
  5. * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. /**
  24. * @file
  25. * VC-1 and WMV3 block decoding routines
  26. */
  27. #include "avcodec.h"
  28. #include "h264chroma.h"
  29. #include "mathops.h"
  30. #include "mpegvideo.h"
  31. #include "vc1.h"
  32. static av_always_inline void vc1_scale_luma(uint8_t *srcY,
  33. int k, int linesize)
  34. {
  35. int i, j;
  36. for (j = 0; j < k; j++) {
  37. for (i = 0; i < k; i++)
  38. srcY[i] = ((srcY[i] - 128) >> 1) + 128;
  39. srcY += linesize;
  40. }
  41. }
  42. static av_always_inline void vc1_scale_chroma(uint8_t *srcU, uint8_t *srcV,
  43. int k, int uvlinesize)
  44. {
  45. int i, j;
  46. for (j = 0; j < k; j++) {
  47. for (i = 0; i < k; i++) {
  48. srcU[i] = ((srcU[i] - 128) >> 1) + 128;
  49. srcV[i] = ((srcV[i] - 128) >> 1) + 128;
  50. }
  51. srcU += uvlinesize;
  52. srcV += uvlinesize;
  53. }
  54. }
  55. static av_always_inline void vc1_lut_scale_luma(uint8_t *srcY,
  56. uint8_t *lut1, uint8_t *lut2,
  57. int k, int linesize)
  58. {
  59. int i, j;
  60. for (j = 0; j < k; j += 2) {
  61. for (i = 0; i < k; i++)
  62. srcY[i] = lut1[srcY[i]];
  63. srcY += linesize;
  64. if (j + 1 == k)
  65. break;
  66. for (i = 0; i < k; i++)
  67. srcY[i] = lut2[srcY[i]];
  68. srcY += linesize;
  69. }
  70. }
  71. static av_always_inline void vc1_lut_scale_chroma(uint8_t *srcU, uint8_t *srcV,
  72. uint8_t *lut1, uint8_t *lut2,
  73. int k, int uvlinesize)
  74. {
  75. int i, j;
  76. for (j = 0; j < k; j += 2) {
  77. for (i = 0; i < k; i++) {
  78. srcU[i] = lut1[srcU[i]];
  79. srcV[i] = lut1[srcV[i]];
  80. }
  81. srcU += uvlinesize;
  82. srcV += uvlinesize;
  83. if (j + 1 == k)
  84. break;
  85. for (i = 0; i < k; i++) {
  86. srcU[i] = lut2[srcU[i]];
  87. srcV[i] = lut2[srcV[i]];
  88. }
  89. srcU += uvlinesize;
  90. srcV += uvlinesize;
  91. }
  92. }
  93. static const uint8_t popcount4[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
  94. static av_always_inline int get_luma_mv(VC1Context *v, int dir, int16_t *tx, int16_t *ty)
  95. {
  96. MpegEncContext *s = &v->s;
  97. int idx = v->mv_f[dir][s->block_index[0] + v->blocks_off] |
  98. (v->mv_f[dir][s->block_index[1] + v->blocks_off] << 1) |
  99. (v->mv_f[dir][s->block_index[2] + v->blocks_off] << 2) |
  100. (v->mv_f[dir][s->block_index[3] + v->blocks_off] << 3);
  101. static const uint8_t index2[16] = { 0, 0, 0, 0x23, 0, 0x13, 0x03, 0, 0, 0x12, 0x02, 0, 0x01, 0, 0, 0 };
  102. int opp_count = popcount4[idx];
  103. switch (opp_count) {
  104. case 0:
  105. case 4:
  106. *tx = median4(s->mv[dir][0][0], s->mv[dir][1][0], s->mv[dir][2][0], s->mv[dir][3][0]);
  107. *ty = median4(s->mv[dir][0][1], s->mv[dir][1][1], s->mv[dir][2][1], s->mv[dir][3][1]);
  108. break;
  109. case 1:
  110. *tx = mid_pred(s->mv[dir][idx < 2][0], s->mv[dir][1 + (idx < 4)][0], s->mv[dir][2 + (idx < 8)][0]);
  111. *ty = mid_pred(s->mv[dir][idx < 2][1], s->mv[dir][1 + (idx < 4)][1], s->mv[dir][2 + (idx < 8)][1]);
  112. break;
  113. case 3:
  114. *tx = mid_pred(s->mv[dir][idx > 0xd][0], s->mv[dir][1 + (idx > 0xb)][0], s->mv[dir][2 + (idx > 0x7)][0]);
  115. *ty = mid_pred(s->mv[dir][idx > 0xd][1], s->mv[dir][1 + (idx > 0xb)][1], s->mv[dir][2 + (idx > 0x7)][1]);
  116. break;
  117. case 2:
  118. *tx = (s->mv[dir][index2[idx] >> 4][0] + s->mv[dir][index2[idx] & 0xf][0]) / 2;
  119. *ty = (s->mv[dir][index2[idx] >> 4][1] + s->mv[dir][index2[idx] & 0xf][1]) / 2;
  120. break;
  121. }
  122. return opp_count;
  123. }
  124. static av_always_inline int get_chroma_mv(VC1Context *v, int dir, int16_t *tx, int16_t *ty)
  125. {
  126. MpegEncContext *s = &v->s;
  127. int idx = !v->mb_type[0][s->block_index[0]] |
  128. (!v->mb_type[0][s->block_index[1]] << 1) |
  129. (!v->mb_type[0][s->block_index[2]] << 2) |
  130. (!v->mb_type[0][s->block_index[3]] << 3);
  131. static const uint8_t index2[16] = { 0, 0, 0, 0x01, 0, 0x02, 0x12, 0, 0, 0x03, 0x13, 0, 0x23, 0, 0, 0 };
  132. int valid_count = popcount4[idx];
  133. switch (valid_count) {
  134. case 4:
  135. *tx = median4(s->mv[dir][0][0], s->mv[dir][1][0], s->mv[dir][2][0], s->mv[dir][3][0]);
  136. *ty = median4(s->mv[dir][0][1], s->mv[dir][1][1], s->mv[dir][2][1], s->mv[dir][3][1]);
  137. break;
  138. case 3:
  139. *tx = mid_pred(s->mv[dir][idx > 0xd][0], s->mv[dir][1 + (idx > 0xb)][0], s->mv[dir][2 + (idx > 0x7)][0]);
  140. *ty = mid_pred(s->mv[dir][idx > 0xd][1], s->mv[dir][1 + (idx > 0xb)][1], s->mv[dir][2 + (idx > 0x7)][1]);
  141. break;
  142. case 2:
  143. *tx = (s->mv[dir][index2[idx] >> 4][0] + s->mv[dir][index2[idx] & 0xf][0]) / 2;
  144. *ty = (s->mv[dir][index2[idx] >> 4][1] + s->mv[dir][index2[idx] & 0xf][1]) / 2;
  145. break;
  146. default:
  147. return 0;
  148. }
  149. return valid_count;
  150. }
  151. /** Do motion compensation over 1 macroblock
  152. * Mostly adapted hpel_motion and qpel_motion from mpegvideo.c
  153. */
  154. void ff_vc1_mc_1mv(VC1Context *v, int dir)
  155. {
  156. MpegEncContext *s = &v->s;
  157. H264ChromaContext *h264chroma = &v->h264chroma;
  158. uint8_t *srcY, *srcU, *srcV;
  159. int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
  160. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  161. int i;
  162. uint8_t (*luty)[256], (*lutuv)[256];
  163. int use_ic;
  164. if ((!v->field_mode ||
  165. (v->ref_field_type[dir] == 1 && v->cur_field_type == 1)) &&
  166. !v->s.last_picture.f->data[0])
  167. return;
  168. mx = s->mv[dir][0][0];
  169. my = s->mv[dir][0][1];
  170. // store motion vectors for further use in B frames
  171. if (s->pict_type == AV_PICTURE_TYPE_P) {
  172. for (i = 0; i < 4; i++) {
  173. s->current_picture.motion_val[1][s->block_index[i] + v->blocks_off][0] = mx;
  174. s->current_picture.motion_val[1][s->block_index[i] + v->blocks_off][1] = my;
  175. }
  176. }
  177. uvmx = (mx + ((mx & 3) == 3)) >> 1;
  178. uvmy = (my + ((my & 3) == 3)) >> 1;
  179. v->luma_mv[s->mb_x][0] = uvmx;
  180. v->luma_mv[s->mb_x][1] = uvmy;
  181. if (v->field_mode &&
  182. v->cur_field_type != v->ref_field_type[dir]) {
  183. my = my - 2 + 4 * v->cur_field_type;
  184. uvmy = uvmy - 2 + 4 * v->cur_field_type;
  185. }
  186. // fastuvmc shall be ignored for interlaced frame picture
  187. if (v->fastuvmc && (v->fcm != ILACE_FRAME)) {
  188. uvmx = uvmx + ((uvmx < 0) ? (uvmx & 1) : -(uvmx & 1));
  189. uvmy = uvmy + ((uvmy < 0) ? (uvmy & 1) : -(uvmy & 1));
  190. }
  191. if (!dir) {
  192. if (v->field_mode && (v->cur_field_type != v->ref_field_type[dir]) && v->second_field) {
  193. srcY = s->current_picture.f->data[0];
  194. srcU = s->current_picture.f->data[1];
  195. srcV = s->current_picture.f->data[2];
  196. luty = v->curr_luty;
  197. lutuv = v->curr_lutuv;
  198. use_ic = *v->curr_use_ic;
  199. } else {
  200. srcY = s->last_picture.f->data[0];
  201. srcU = s->last_picture.f->data[1];
  202. srcV = s->last_picture.f->data[2];
  203. luty = v->last_luty;
  204. lutuv = v->last_lutuv;
  205. use_ic = v->last_use_ic;
  206. }
  207. } else {
  208. srcY = s->next_picture.f->data[0];
  209. srcU = s->next_picture.f->data[1];
  210. srcV = s->next_picture.f->data[2];
  211. luty = v->next_luty;
  212. lutuv = v->next_lutuv;
  213. use_ic = v->next_use_ic;
  214. }
  215. if (!srcY || !srcU) {
  216. av_log(v->s.avctx, AV_LOG_ERROR, "Referenced frame missing.\n");
  217. return;
  218. }
  219. src_x = s->mb_x * 16 + (mx >> 2);
  220. src_y = s->mb_y * 16 + (my >> 2);
  221. uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
  222. uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
  223. if (v->profile != PROFILE_ADVANCED) {
  224. src_x = av_clip( src_x, -16, s->mb_width * 16);
  225. src_y = av_clip( src_y, -16, s->mb_height * 16);
  226. uvsrc_x = av_clip(uvsrc_x, -8, s->mb_width * 8);
  227. uvsrc_y = av_clip(uvsrc_y, -8, s->mb_height * 8);
  228. } else {
  229. src_x = av_clip( src_x, -17, s->avctx->coded_width);
  230. src_y = av_clip( src_y, -18, s->avctx->coded_height + 1);
  231. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  232. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  233. }
  234. srcY += src_y * s->linesize + src_x;
  235. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  236. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  237. if (v->field_mode && v->ref_field_type[dir]) {
  238. srcY += s->current_picture_ptr->f->linesize[0];
  239. srcU += s->current_picture_ptr->f->linesize[1];
  240. srcV += s->current_picture_ptr->f->linesize[2];
  241. }
  242. /* for grayscale we should not try to read from unknown area */
  243. if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY) {
  244. srcU = s->edge_emu_buffer + 18 * s->linesize;
  245. srcV = s->edge_emu_buffer + 18 * s->linesize;
  246. }
  247. if (v->rangeredfrm || use_ic
  248. || s->h_edge_pos < 22 || v_edge_pos < 22
  249. || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx&3) - 16 - s->mspel * 3
  250. || (unsigned)(src_y - 1) > v_edge_pos - (my&3) - 16 - 3) {
  251. uint8_t *ubuf = s->edge_emu_buffer + 19 * s->linesize;
  252. uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
  253. const int k = 17 + s->mspel * 2;
  254. srcY -= s->mspel * (1 + s->linesize);
  255. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY,
  256. s->linesize, s->linesize,
  257. k, k,
  258. src_x - s->mspel, src_y - s->mspel,
  259. s->h_edge_pos, v_edge_pos);
  260. srcY = s->edge_emu_buffer;
  261. s->vdsp.emulated_edge_mc(ubuf, srcU,
  262. s->uvlinesize, s->uvlinesize,
  263. 8 + 1, 8 + 1,
  264. uvsrc_x, uvsrc_y,
  265. s->h_edge_pos >> 1, v_edge_pos >> 1);
  266. s->vdsp.emulated_edge_mc(vbuf, srcV,
  267. s->uvlinesize, s->uvlinesize,
  268. 8 + 1, 8 + 1,
  269. uvsrc_x, uvsrc_y,
  270. s->h_edge_pos >> 1, v_edge_pos >> 1);
  271. srcU = ubuf;
  272. srcV = vbuf;
  273. /* if we deal with range reduction we need to scale source blocks */
  274. if (v->rangeredfrm) {
  275. vc1_scale_luma(srcY, k, s->linesize);
  276. vc1_scale_chroma(srcU, srcV, 9, s->uvlinesize);
  277. }
  278. /* if we deal with intensity compensation we need to scale source blocks */
  279. if (use_ic) {
  280. vc1_lut_scale_luma(srcY,
  281. luty[v->field_mode ? v->ref_field_type[dir] : ((0 + src_y - s->mspel) & 1)],
  282. luty[v->field_mode ? v->ref_field_type[dir] : ((1 + src_y - s->mspel) & 1)],
  283. k, s->linesize);
  284. vc1_lut_scale_chroma(srcU, srcV,
  285. lutuv[v->field_mode ? v->ref_field_type[dir] : ((0 + uvsrc_y) & 1)],
  286. lutuv[v->field_mode ? v->ref_field_type[dir] : ((1 + uvsrc_y) & 1)],
  287. 9, s->uvlinesize);
  288. }
  289. srcY += s->mspel * (1 + s->linesize);
  290. }
  291. if (s->mspel) {
  292. dxy = ((my & 3) << 2) | (mx & 3);
  293. v->vc1dsp.put_vc1_mspel_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, v->rnd);
  294. } else { // hpel mc - always used for luma
  295. dxy = (my & 2) | ((mx & 2) >> 1);
  296. if (!v->rnd)
  297. s->hdsp.put_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
  298. else
  299. s->hdsp.put_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
  300. }
  301. if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY) return;
  302. /* Chroma MC always uses qpel bilinear */
  303. uvmx = (uvmx & 3) << 1;
  304. uvmy = (uvmy & 3) << 1;
  305. if (!v->rnd) {
  306. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  307. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  308. } else {
  309. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  310. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  311. }
  312. }
  313. /** Do motion compensation for 4-MV macroblock - luminance block
  314. */
  315. void ff_vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg)
  316. {
  317. MpegEncContext *s = &v->s;
  318. uint8_t *srcY;
  319. int dxy, mx, my, src_x, src_y;
  320. int off;
  321. int fieldmv = (v->fcm == ILACE_FRAME) ? v->blk_mv_type[s->block_index[n]] : 0;
  322. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  323. uint8_t (*luty)[256];
  324. int use_ic;
  325. if ((!v->field_mode ||
  326. (v->ref_field_type[dir] == 1 && v->cur_field_type == 1)) &&
  327. !v->s.last_picture.f->data[0])
  328. return;
  329. mx = s->mv[dir][n][0];
  330. my = s->mv[dir][n][1];
  331. if (!dir) {
  332. if (v->field_mode && (v->cur_field_type != v->ref_field_type[dir]) && v->second_field) {
  333. srcY = s->current_picture.f->data[0];
  334. luty = v->curr_luty;
  335. use_ic = *v->curr_use_ic;
  336. } else {
  337. srcY = s->last_picture.f->data[0];
  338. luty = v->last_luty;
  339. use_ic = v->last_use_ic;
  340. }
  341. } else {
  342. srcY = s->next_picture.f->data[0];
  343. luty = v->next_luty;
  344. use_ic = v->next_use_ic;
  345. }
  346. if (!srcY) {
  347. av_log(v->s.avctx, AV_LOG_ERROR, "Referenced frame missing.\n");
  348. return;
  349. }
  350. if (v->field_mode) {
  351. if (v->cur_field_type != v->ref_field_type[dir])
  352. my = my - 2 + 4 * v->cur_field_type;
  353. }
  354. if (s->pict_type == AV_PICTURE_TYPE_P && n == 3 && v->field_mode) {
  355. int opp_count = get_luma_mv(v, 0,
  356. &s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0],
  357. &s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1]);
  358. int k, f = opp_count > 2;
  359. for (k = 0; k < 4; k++)
  360. v->mv_f[1][s->block_index[k] + v->blocks_off] = f;
  361. }
  362. if (v->fcm == ILACE_FRAME) { // not sure if needed for other types of picture
  363. int qx, qy;
  364. int width = s->avctx->coded_width;
  365. int height = s->avctx->coded_height >> 1;
  366. if (s->pict_type == AV_PICTURE_TYPE_P) {
  367. s->current_picture.motion_val[1][s->block_index[n] + v->blocks_off][0] = mx;
  368. s->current_picture.motion_val[1][s->block_index[n] + v->blocks_off][1] = my;
  369. }
  370. qx = (s->mb_x * 16) + (mx >> 2);
  371. qy = (s->mb_y * 8) + (my >> 3);
  372. if (qx < -17)
  373. mx -= 4 * (qx + 17);
  374. else if (qx > width)
  375. mx -= 4 * (qx - width);
  376. if (qy < -18)
  377. my -= 8 * (qy + 18);
  378. else if (qy > height + 1)
  379. my -= 8 * (qy - height - 1);
  380. }
  381. if ((v->fcm == ILACE_FRAME) && fieldmv)
  382. off = ((n > 1) ? s->linesize : 0) + (n & 1) * 8;
  383. else
  384. off = s->linesize * 4 * (n & 2) + (n & 1) * 8;
  385. src_x = s->mb_x * 16 + (n & 1) * 8 + (mx >> 2);
  386. if (!fieldmv)
  387. src_y = s->mb_y * 16 + (n & 2) * 4 + (my >> 2);
  388. else
  389. src_y = s->mb_y * 16 + ((n > 1) ? 1 : 0) + (my >> 2);
  390. if (v->profile != PROFILE_ADVANCED) {
  391. src_x = av_clip(src_x, -16, s->mb_width * 16);
  392. src_y = av_clip(src_y, -16, s->mb_height * 16);
  393. } else {
  394. src_x = av_clip(src_x, -17, s->avctx->coded_width);
  395. if (v->fcm == ILACE_FRAME) {
  396. if (src_y & 1)
  397. src_y = av_clip(src_y, -17, s->avctx->coded_height + 1);
  398. else
  399. src_y = av_clip(src_y, -18, s->avctx->coded_height);
  400. } else {
  401. src_y = av_clip(src_y, -18, s->avctx->coded_height + 1);
  402. }
  403. }
  404. srcY += src_y * s->linesize + src_x;
  405. if (v->field_mode && v->ref_field_type[dir])
  406. srcY += s->current_picture_ptr->f->linesize[0];
  407. if (fieldmv) {
  408. if (!(src_y & 1))
  409. v_edge_pos--;
  410. else
  411. src_y -= (src_y < 4);
  412. }
  413. if (v->rangeredfrm || use_ic
  414. || s->h_edge_pos < 13 || v_edge_pos < 23
  415. || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx & 3) - 8 - s->mspel * 2
  416. || (unsigned)(src_y - (s->mspel << fieldmv)) > v_edge_pos - (my & 3) - ((8 + s->mspel * 2) << fieldmv)) {
  417. const int k = 9 + s->mspel * 2;
  418. srcY -= s->mspel * (1 + (s->linesize << fieldmv));
  419. /* check emulate edge stride and offset */
  420. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY,
  421. s->linesize, s->linesize,
  422. k, k << fieldmv,
  423. src_x - s->mspel, src_y - (s->mspel << fieldmv),
  424. s->h_edge_pos, v_edge_pos);
  425. srcY = s->edge_emu_buffer;
  426. /* if we deal with range reduction we need to scale source blocks */
  427. if (v->rangeredfrm) {
  428. vc1_scale_luma(srcY, k, s->linesize << fieldmv);
  429. }
  430. /* if we deal with intensity compensation we need to scale source blocks */
  431. if (use_ic) {
  432. vc1_lut_scale_luma(srcY,
  433. luty[v->field_mode ? v->ref_field_type[dir] : (((0<<fieldmv)+src_y - (s->mspel << fieldmv)) & 1)],
  434. luty[v->field_mode ? v->ref_field_type[dir] : (((1<<fieldmv)+src_y - (s->mspel << fieldmv)) & 1)],
  435. k, s->linesize << fieldmv);
  436. }
  437. srcY += s->mspel * (1 + (s->linesize << fieldmv));
  438. }
  439. if (s->mspel) {
  440. dxy = ((my & 3) << 2) | (mx & 3);
  441. if (avg)
  442. v->vc1dsp.avg_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
  443. else
  444. v->vc1dsp.put_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
  445. } else { // hpel mc - always used for luma
  446. dxy = (my & 2) | ((mx & 2) >> 1);
  447. if (!v->rnd)
  448. s->hdsp.put_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize, 8);
  449. else
  450. s->hdsp.put_no_rnd_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize, 8);
  451. }
  452. }
  453. /** Do motion compensation for 4-MV macroblock - both chroma blocks
  454. */
  455. void ff_vc1_mc_4mv_chroma(VC1Context *v, int dir)
  456. {
  457. MpegEncContext *s = &v->s;
  458. H264ChromaContext *h264chroma = &v->h264chroma;
  459. uint8_t *srcU, *srcV;
  460. int uvmx, uvmy, uvsrc_x, uvsrc_y;
  461. int16_t tx, ty;
  462. int chroma_ref_type;
  463. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  464. uint8_t (*lutuv)[256];
  465. int use_ic;
  466. if (!v->field_mode && !v->s.last_picture.f->data[0])
  467. return;
  468. if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY)
  469. return;
  470. /* calculate chroma MV vector from four luma MVs */
  471. if (!v->field_mode || !v->numref) {
  472. int valid_count = get_chroma_mv(v, dir, &tx, &ty);
  473. if (!valid_count) {
  474. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0;
  475. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = 0;
  476. v->luma_mv[s->mb_x][0] = v->luma_mv[s->mb_x][1] = 0;
  477. return; //no need to do MC for intra blocks
  478. }
  479. chroma_ref_type = v->ref_field_type[dir];
  480. } else {
  481. int opp_count = get_luma_mv(v, dir, &tx, &ty);
  482. chroma_ref_type = v->cur_field_type ^ (opp_count > 2);
  483. }
  484. if (v->field_mode && chroma_ref_type == 1 && v->cur_field_type == 1 && !v->s.last_picture.f->data[0])
  485. return;
  486. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = tx;
  487. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = ty;
  488. uvmx = (tx + ((tx & 3) == 3)) >> 1;
  489. uvmy = (ty + ((ty & 3) == 3)) >> 1;
  490. v->luma_mv[s->mb_x][0] = uvmx;
  491. v->luma_mv[s->mb_x][1] = uvmy;
  492. if (v->fastuvmc) {
  493. uvmx = uvmx + ((uvmx < 0) ? (uvmx & 1) : -(uvmx & 1));
  494. uvmy = uvmy + ((uvmy < 0) ? (uvmy & 1) : -(uvmy & 1));
  495. }
  496. // Field conversion bias
  497. if (v->cur_field_type != chroma_ref_type)
  498. uvmy += 2 - 4 * chroma_ref_type;
  499. uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
  500. uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
  501. if (v->profile != PROFILE_ADVANCED) {
  502. uvsrc_x = av_clip(uvsrc_x, -8, s->mb_width * 8);
  503. uvsrc_y = av_clip(uvsrc_y, -8, s->mb_height * 8);
  504. } else {
  505. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  506. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  507. }
  508. if (!dir) {
  509. if (v->field_mode && (v->cur_field_type != chroma_ref_type) && v->second_field) {
  510. srcU = s->current_picture.f->data[1];
  511. srcV = s->current_picture.f->data[2];
  512. lutuv = v->curr_lutuv;
  513. use_ic = *v->curr_use_ic;
  514. } else {
  515. srcU = s->last_picture.f->data[1];
  516. srcV = s->last_picture.f->data[2];
  517. lutuv = v->last_lutuv;
  518. use_ic = v->last_use_ic;
  519. }
  520. } else {
  521. srcU = s->next_picture.f->data[1];
  522. srcV = s->next_picture.f->data[2];
  523. lutuv = v->next_lutuv;
  524. use_ic = v->next_use_ic;
  525. }
  526. if (!srcU) {
  527. av_log(v->s.avctx, AV_LOG_ERROR, "Referenced frame missing.\n");
  528. return;
  529. }
  530. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  531. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  532. if (v->field_mode) {
  533. if (chroma_ref_type) {
  534. srcU += s->current_picture_ptr->f->linesize[1];
  535. srcV += s->current_picture_ptr->f->linesize[2];
  536. }
  537. }
  538. if (v->rangeredfrm || use_ic
  539. || s->h_edge_pos < 18 || v_edge_pos < 18
  540. || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 9
  541. || (unsigned)uvsrc_y > (v_edge_pos >> 1) - 9) {
  542. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcU,
  543. s->uvlinesize, s->uvlinesize,
  544. 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
  545. s->h_edge_pos >> 1, v_edge_pos >> 1);
  546. s->vdsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV,
  547. s->uvlinesize, s->uvlinesize,
  548. 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
  549. s->h_edge_pos >> 1, v_edge_pos >> 1);
  550. srcU = s->edge_emu_buffer;
  551. srcV = s->edge_emu_buffer + 16;
  552. /* if we deal with range reduction we need to scale source blocks */
  553. if (v->rangeredfrm) {
  554. vc1_scale_chroma(srcU, srcV, 9, s->uvlinesize);
  555. }
  556. /* if we deal with intensity compensation we need to scale source blocks */
  557. if (use_ic) {
  558. vc1_lut_scale_chroma(srcU, srcV,
  559. lutuv[v->field_mode ? chroma_ref_type : ((0 + uvsrc_y) & 1)],
  560. lutuv[v->field_mode ? chroma_ref_type : ((1 + uvsrc_y) & 1)],
  561. 9, s->uvlinesize);
  562. }
  563. }
  564. /* Chroma MC always uses qpel bilinear */
  565. uvmx = (uvmx & 3) << 1;
  566. uvmy = (uvmy & 3) << 1;
  567. if (!v->rnd) {
  568. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  569. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  570. } else {
  571. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  572. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  573. }
  574. }
  575. /** Do motion compensation for 4-MV interlaced frame chroma macroblock (both U and V)
  576. */
  577. void ff_vc1_mc_4mv_chroma4(VC1Context *v, int dir, int dir2, int avg)
  578. {
  579. MpegEncContext *s = &v->s;
  580. H264ChromaContext *h264chroma = &v->h264chroma;
  581. uint8_t *srcU, *srcV;
  582. int uvsrc_x, uvsrc_y;
  583. int uvmx_field[4], uvmy_field[4];
  584. int i, off, tx, ty;
  585. int fieldmv = v->blk_mv_type[s->block_index[0]];
  586. static const uint8_t s_rndtblfield[16] = { 0, 0, 1, 2, 4, 4, 5, 6, 2, 2, 3, 8, 6, 6, 7, 12 };
  587. int v_dist = fieldmv ? 1 : 4; // vertical offset for lower sub-blocks
  588. int v_edge_pos = s->v_edge_pos >> 1;
  589. int use_ic;
  590. uint8_t (*lutuv)[256];
  591. if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY)
  592. return;
  593. for (i = 0; i < 4; i++) {
  594. int d = i < 2 ? dir: dir2;
  595. tx = s->mv[d][i][0];
  596. uvmx_field[i] = (tx + ((tx & 3) == 3)) >> 1;
  597. ty = s->mv[d][i][1];
  598. if (fieldmv)
  599. uvmy_field[i] = (ty >> 4) * 8 + s_rndtblfield[ty & 0xF];
  600. else
  601. uvmy_field[i] = (ty + ((ty & 3) == 3)) >> 1;
  602. }
  603. for (i = 0; i < 4; i++) {
  604. off = (i & 1) * 4 + ((i & 2) ? v_dist * s->uvlinesize : 0);
  605. uvsrc_x = s->mb_x * 8 + (i & 1) * 4 + (uvmx_field[i] >> 2);
  606. uvsrc_y = s->mb_y * 8 + ((i & 2) ? v_dist : 0) + (uvmy_field[i] >> 2);
  607. // FIXME: implement proper pull-back (see vc1cropmv.c, vc1CROPMV_ChromaPullBack())
  608. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  609. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  610. if (i < 2 ? dir : dir2) {
  611. srcU = s->next_picture.f->data[1];
  612. srcV = s->next_picture.f->data[2];
  613. lutuv = v->next_lutuv;
  614. use_ic = v->next_use_ic;
  615. } else {
  616. srcU = s->last_picture.f->data[1];
  617. srcV = s->last_picture.f->data[2];
  618. lutuv = v->last_lutuv;
  619. use_ic = v->last_use_ic;
  620. }
  621. if (!srcU)
  622. return;
  623. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  624. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  625. uvmx_field[i] = (uvmx_field[i] & 3) << 1;
  626. uvmy_field[i] = (uvmy_field[i] & 3) << 1;
  627. if (fieldmv) {
  628. if (!(uvsrc_y & 1))
  629. v_edge_pos = (s->v_edge_pos >> 1) - 1;
  630. else
  631. uvsrc_y -= (uvsrc_y < 2);
  632. }
  633. if (use_ic
  634. || s->h_edge_pos < 10 || v_edge_pos < (5 << fieldmv)
  635. || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 5
  636. || (unsigned)uvsrc_y > v_edge_pos - (5 << fieldmv)) {
  637. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcU,
  638. s->uvlinesize, s->uvlinesize,
  639. 5, (5 << fieldmv), uvsrc_x, uvsrc_y,
  640. s->h_edge_pos >> 1, v_edge_pos);
  641. s->vdsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV,
  642. s->uvlinesize, s->uvlinesize,
  643. 5, (5 << fieldmv), uvsrc_x, uvsrc_y,
  644. s->h_edge_pos >> 1, v_edge_pos);
  645. srcU = s->edge_emu_buffer;
  646. srcV = s->edge_emu_buffer + 16;
  647. /* if we deal with intensity compensation we need to scale source blocks */
  648. if (use_ic) {
  649. vc1_lut_scale_chroma(srcU, srcV,
  650. lutuv[(uvsrc_y + (0 << fieldmv)) & 1],
  651. lutuv[(uvsrc_y + (1 << fieldmv)) & 1],
  652. 5, s->uvlinesize << fieldmv);
  653. }
  654. }
  655. if (avg) {
  656. if (!v->rnd) {
  657. h264chroma->avg_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  658. h264chroma->avg_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  659. } else {
  660. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  661. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  662. }
  663. } else {
  664. if (!v->rnd) {
  665. h264chroma->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  666. h264chroma->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  667. } else {
  668. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  669. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  670. }
  671. }
  672. }
  673. }
  674. /** Motion compensation for direct or interpolated blocks in B-frames
  675. */
  676. void ff_vc1_interp_mc(VC1Context *v)
  677. {
  678. MpegEncContext *s = &v->s;
  679. H264ChromaContext *h264chroma = &v->h264chroma;
  680. uint8_t *srcY, *srcU, *srcV;
  681. int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
  682. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  683. int use_ic = v->next_use_ic;
  684. if (!v->field_mode && !v->s.next_picture.f->data[0])
  685. return;
  686. mx = s->mv[1][0][0];
  687. my = s->mv[1][0][1];
  688. uvmx = (mx + ((mx & 3) == 3)) >> 1;
  689. uvmy = (my + ((my & 3) == 3)) >> 1;
  690. if (v->field_mode && v->cur_field_type != v->ref_field_type[1]) {
  691. my = my - 2 + 4 * v->cur_field_type;
  692. uvmy = uvmy - 2 + 4 * v->cur_field_type;
  693. }
  694. if (v->fastuvmc) {
  695. uvmx = uvmx + ((uvmx < 0) ? -(uvmx & 1) : (uvmx & 1));
  696. uvmy = uvmy + ((uvmy < 0) ? -(uvmy & 1) : (uvmy & 1));
  697. }
  698. srcY = s->next_picture.f->data[0];
  699. srcU = s->next_picture.f->data[1];
  700. srcV = s->next_picture.f->data[2];
  701. src_x = s->mb_x * 16 + (mx >> 2);
  702. src_y = s->mb_y * 16 + (my >> 2);
  703. uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
  704. uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
  705. if (v->profile != PROFILE_ADVANCED) {
  706. src_x = av_clip( src_x, -16, s->mb_width * 16);
  707. src_y = av_clip( src_y, -16, s->mb_height * 16);
  708. uvsrc_x = av_clip(uvsrc_x, -8, s->mb_width * 8);
  709. uvsrc_y = av_clip(uvsrc_y, -8, s->mb_height * 8);
  710. } else {
  711. src_x = av_clip( src_x, -17, s->avctx->coded_width);
  712. src_y = av_clip( src_y, -18, s->avctx->coded_height + 1);
  713. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  714. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  715. }
  716. srcY += src_y * s->linesize + src_x;
  717. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  718. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  719. if (v->field_mode && v->ref_field_type[1]) {
  720. srcY += s->current_picture_ptr->f->linesize[0];
  721. srcU += s->current_picture_ptr->f->linesize[1];
  722. srcV += s->current_picture_ptr->f->linesize[2];
  723. }
  724. /* for grayscale we should not try to read from unknown area */
  725. if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY) {
  726. srcU = s->edge_emu_buffer + 18 * s->linesize;
  727. srcV = s->edge_emu_buffer + 18 * s->linesize;
  728. }
  729. if (v->rangeredfrm || s->h_edge_pos < 22 || v_edge_pos < 22 || use_ic
  730. || (unsigned)(src_x - 1) > s->h_edge_pos - (mx & 3) - 16 - 3
  731. || (unsigned)(src_y - 1) > v_edge_pos - (my & 3) - 16 - 3) {
  732. uint8_t *ubuf = s->edge_emu_buffer + 19 * s->linesize;
  733. uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
  734. const int k = 17 + s->mspel * 2;
  735. srcY -= s->mspel * (1 + s->linesize);
  736. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY,
  737. s->linesize, s->linesize,
  738. k, k,
  739. src_x - s->mspel, src_y - s->mspel,
  740. s->h_edge_pos, v_edge_pos);
  741. srcY = s->edge_emu_buffer;
  742. s->vdsp.emulated_edge_mc(ubuf, srcU,
  743. s->uvlinesize, s->uvlinesize,
  744. 8 + 1, 8 + 1,
  745. uvsrc_x, uvsrc_y,
  746. s->h_edge_pos >> 1, v_edge_pos >> 1);
  747. s->vdsp.emulated_edge_mc(vbuf, srcV,
  748. s->uvlinesize, s->uvlinesize,
  749. 8 + 1, 8 + 1,
  750. uvsrc_x, uvsrc_y,
  751. s->h_edge_pos >> 1, v_edge_pos >> 1);
  752. srcU = ubuf;
  753. srcV = vbuf;
  754. /* if we deal with range reduction we need to scale source blocks */
  755. if (v->rangeredfrm) {
  756. vc1_scale_luma(srcY, k, s->linesize);
  757. vc1_scale_chroma(srcU, srcV, 9, s->uvlinesize);
  758. }
  759. if (use_ic) {
  760. uint8_t (*luty )[256] = v->next_luty;
  761. uint8_t (*lutuv)[256] = v->next_lutuv;
  762. vc1_lut_scale_luma(srcY,
  763. luty[v->field_mode ? v->ref_field_type[1] : ((0+src_y - s->mspel) & 1)],
  764. luty[v->field_mode ? v->ref_field_type[1] : ((1+src_y - s->mspel) & 1)],
  765. k, s->linesize);
  766. vc1_lut_scale_chroma(srcU, srcV,
  767. lutuv[v->field_mode ? v->ref_field_type[1] : ((0+uvsrc_y) & 1)],
  768. lutuv[v->field_mode ? v->ref_field_type[1] : ((1+uvsrc_y) & 1)],
  769. 9, s->uvlinesize);
  770. }
  771. srcY += s->mspel * (1 + s->linesize);
  772. }
  773. if (s->mspel) {
  774. dxy = ((my & 3) << 2) | (mx & 3);
  775. v->vc1dsp.avg_vc1_mspel_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, v->rnd);
  776. } else { // hpel mc
  777. dxy = (my & 2) | ((mx & 2) >> 1);
  778. if (!v->rnd)
  779. s->hdsp.avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
  780. else
  781. s->hdsp.avg_no_rnd_pixels_tab[dxy](s->dest[0], srcY, s->linesize, 16);
  782. }
  783. if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY) return;
  784. /* Chroma MC always uses qpel blilinear */
  785. uvmx = (uvmx & 3) << 1;
  786. uvmy = (uvmy & 3) << 1;
  787. if (!v->rnd) {
  788. h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  789. h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  790. } else {
  791. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  792. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  793. }
  794. }