You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

889 lines
35KB

  1. /*
  2. * VC-1 and WMV3 decoder
  3. * Copyright (c) 2011 Mashiat Sarker Shakkhar
  4. * Copyright (c) 2006-2007 Konstantin Shishkov
  5. * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. /**
  24. * @file
  25. * VC-1 and WMV3 block decoding routines
  26. */
  27. #include "avcodec.h"
  28. #include "h264chroma.h"
  29. #include "mathops.h"
  30. #include "mpegvideo.h"
  31. #include "vc1.h"
  32. static av_always_inline void vc1_scale_luma(uint8_t *srcY,
  33. int k, int linesize)
  34. {
  35. int i, j;
  36. for (j = 0; j < k; j++) {
  37. for (i = 0; i < k; i++)
  38. srcY[i] = ((srcY[i] - 128) >> 1) + 128;
  39. srcY += linesize;
  40. }
  41. }
  42. static av_always_inline void vc1_scale_chroma(uint8_t *srcU, uint8_t *srcV,
  43. int k, int uvlinesize)
  44. {
  45. int i, j;
  46. for (j = 0; j < k; j++) {
  47. for (i = 0; i < k; i++) {
  48. srcU[i] = ((srcU[i] - 128) >> 1) + 128;
  49. srcV[i] = ((srcV[i] - 128) >> 1) + 128;
  50. }
  51. srcU += uvlinesize;
  52. srcV += uvlinesize;
  53. }
  54. }
  55. static av_always_inline void vc1_lut_scale_luma(uint8_t *srcY,
  56. uint8_t *lut1, uint8_t *lut2,
  57. int k, int linesize)
  58. {
  59. int i, j;
  60. for (j = 0; j < k; j += 2) {
  61. for (i = 0; i < k; i++)
  62. srcY[i] = lut1[srcY[i]];
  63. srcY += linesize;
  64. if (j + 1 == k)
  65. break;
  66. for (i = 0; i < k; i++)
  67. srcY[i] = lut2[srcY[i]];
  68. srcY += linesize;
  69. }
  70. }
  71. static av_always_inline void vc1_lut_scale_chroma(uint8_t *srcU, uint8_t *srcV,
  72. uint8_t *lut1, uint8_t *lut2,
  73. int k, int uvlinesize)
  74. {
  75. int i, j;
  76. for (j = 0; j < k; j += 2) {
  77. for (i = 0; i < k; i++) {
  78. srcU[i] = lut1[srcU[i]];
  79. srcV[i] = lut1[srcV[i]];
  80. }
  81. srcU += uvlinesize;
  82. srcV += uvlinesize;
  83. if (j + 1 == k)
  84. break;
  85. for (i = 0; i < k; i++) {
  86. srcU[i] = lut2[srcU[i]];
  87. srcV[i] = lut2[srcV[i]];
  88. }
  89. srcU += uvlinesize;
  90. srcV += uvlinesize;
  91. }
  92. }
  93. static const uint8_t popcount4[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
  94. static av_always_inline int get_luma_mv(VC1Context *v, int dir, int16_t *tx, int16_t *ty)
  95. {
  96. MpegEncContext *s = &v->s;
  97. int idx = v->mv_f[dir][s->block_index[0] + v->blocks_off] |
  98. (v->mv_f[dir][s->block_index[1] + v->blocks_off] << 1) |
  99. (v->mv_f[dir][s->block_index[2] + v->blocks_off] << 2) |
  100. (v->mv_f[dir][s->block_index[3] + v->blocks_off] << 3);
  101. static const uint8_t index2[16] = { 0, 0, 0, 0x23, 0, 0x13, 0x03, 0, 0, 0x12, 0x02, 0, 0x01, 0, 0, 0 };
  102. int opp_count = popcount4[idx];
  103. switch (opp_count) {
  104. case 0:
  105. case 4:
  106. *tx = median4(s->mv[dir][0][0], s->mv[dir][1][0], s->mv[dir][2][0], s->mv[dir][3][0]);
  107. *ty = median4(s->mv[dir][0][1], s->mv[dir][1][1], s->mv[dir][2][1], s->mv[dir][3][1]);
  108. break;
  109. case 1:
  110. *tx = mid_pred(s->mv[dir][idx < 2][0], s->mv[dir][1 + (idx < 4)][0], s->mv[dir][2 + (idx < 8)][0]);
  111. *ty = mid_pred(s->mv[dir][idx < 2][1], s->mv[dir][1 + (idx < 4)][1], s->mv[dir][2 + (idx < 8)][1]);
  112. break;
  113. case 3:
  114. *tx = mid_pred(s->mv[dir][idx > 0xd][0], s->mv[dir][1 + (idx > 0xb)][0], s->mv[dir][2 + (idx > 0x7)][0]);
  115. *ty = mid_pred(s->mv[dir][idx > 0xd][1], s->mv[dir][1 + (idx > 0xb)][1], s->mv[dir][2 + (idx > 0x7)][1]);
  116. break;
  117. case 2:
  118. *tx = (s->mv[dir][index2[idx] >> 4][0] + s->mv[dir][index2[idx] & 0xf][0]) / 2;
  119. *ty = (s->mv[dir][index2[idx] >> 4][1] + s->mv[dir][index2[idx] & 0xf][1]) / 2;
  120. break;
  121. }
  122. return opp_count;
  123. }
  124. static av_always_inline int get_chroma_mv(VC1Context *v, int dir, int16_t *tx, int16_t *ty)
  125. {
  126. MpegEncContext *s = &v->s;
  127. int idx = !v->mb_type[0][s->block_index[0]] |
  128. (!v->mb_type[0][s->block_index[1]] << 1) |
  129. (!v->mb_type[0][s->block_index[2]] << 2) |
  130. (!v->mb_type[0][s->block_index[3]] << 3);
  131. static const uint8_t index2[16] = { 0, 0, 0, 0x01, 0, 0x02, 0x12, 0, 0, 0x03, 0x13, 0, 0x23, 0, 0, 0 };
  132. int valid_count = popcount4[idx];
  133. switch (valid_count) {
  134. case 4:
  135. *tx = median4(s->mv[dir][0][0], s->mv[dir][1][0], s->mv[dir][2][0], s->mv[dir][3][0]);
  136. *ty = median4(s->mv[dir][0][1], s->mv[dir][1][1], s->mv[dir][2][1], s->mv[dir][3][1]);
  137. break;
  138. case 3:
  139. *tx = mid_pred(s->mv[dir][idx > 0xd][0], s->mv[dir][1 + (idx > 0xb)][0], s->mv[dir][2 + (idx > 0x7)][0]);
  140. *ty = mid_pred(s->mv[dir][idx > 0xd][1], s->mv[dir][1 + (idx > 0xb)][1], s->mv[dir][2 + (idx > 0x7)][1]);
  141. break;
  142. case 2:
  143. *tx = (s->mv[dir][index2[idx] >> 4][0] + s->mv[dir][index2[idx] & 0xf][0]) / 2;
  144. *ty = (s->mv[dir][index2[idx] >> 4][1] + s->mv[dir][index2[idx] & 0xf][1]) / 2;
  145. break;
  146. default:
  147. return 0;
  148. }
  149. return valid_count;
  150. }
  151. /** Do motion compensation over 1 macroblock
  152. * Mostly adapted hpel_motion and qpel_motion from mpegvideo.c
  153. */
  154. void ff_vc1_mc_1mv(VC1Context *v, int dir)
  155. {
  156. MpegEncContext *s = &v->s;
  157. H264ChromaContext *h264chroma = &v->h264chroma;
  158. uint8_t *srcY, *srcU, *srcV;
  159. int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
  160. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  161. int i;
  162. uint8_t (*luty)[256], (*lutuv)[256];
  163. int use_ic;
  164. if ((!v->field_mode ||
  165. (v->ref_field_type[dir] == 1 && v->cur_field_type == 1)) &&
  166. !v->s.last_picture.f->data[0])
  167. return;
  168. mx = s->mv[dir][0][0];
  169. my = s->mv[dir][0][1];
  170. // store motion vectors for further use in B-frames
  171. if (s->pict_type == AV_PICTURE_TYPE_P) {
  172. for (i = 0; i < 4; i++) {
  173. s->current_picture.motion_val[1][s->block_index[i] + v->blocks_off][0] = mx;
  174. s->current_picture.motion_val[1][s->block_index[i] + v->blocks_off][1] = my;
  175. }
  176. }
  177. uvmx = (mx + ((mx & 3) == 3)) >> 1;
  178. uvmy = (my + ((my & 3) == 3)) >> 1;
  179. v->luma_mv[s->mb_x][0] = uvmx;
  180. v->luma_mv[s->mb_x][1] = uvmy;
  181. if (v->field_mode &&
  182. v->cur_field_type != v->ref_field_type[dir]) {
  183. my = my - 2 + 4 * v->cur_field_type;
  184. uvmy = uvmy - 2 + 4 * v->cur_field_type;
  185. }
  186. // fastuvmc shall be ignored for interlaced frame picture
  187. if (v->fastuvmc && (v->fcm != ILACE_FRAME)) {
  188. uvmx = uvmx + ((uvmx < 0) ? (uvmx & 1) : -(uvmx & 1));
  189. uvmy = uvmy + ((uvmy < 0) ? (uvmy & 1) : -(uvmy & 1));
  190. }
  191. if (!dir) {
  192. if (v->field_mode && (v->cur_field_type != v->ref_field_type[dir]) && v->second_field) {
  193. srcY = s->current_picture.f->data[0];
  194. srcU = s->current_picture.f->data[1];
  195. srcV = s->current_picture.f->data[2];
  196. luty = v->curr_luty;
  197. lutuv = v->curr_lutuv;
  198. use_ic = *v->curr_use_ic;
  199. } else {
  200. srcY = s->last_picture.f->data[0];
  201. srcU = s->last_picture.f->data[1];
  202. srcV = s->last_picture.f->data[2];
  203. luty = v->last_luty;
  204. lutuv = v->last_lutuv;
  205. use_ic = v->last_use_ic;
  206. }
  207. } else {
  208. srcY = s->next_picture.f->data[0];
  209. srcU = s->next_picture.f->data[1];
  210. srcV = s->next_picture.f->data[2];
  211. luty = v->next_luty;
  212. lutuv = v->next_lutuv;
  213. use_ic = v->next_use_ic;
  214. }
  215. if (!srcY || !srcU) {
  216. av_log(v->s.avctx, AV_LOG_ERROR, "Referenced frame missing.\n");
  217. return;
  218. }
  219. src_x = s->mb_x * 16 + (mx >> 2);
  220. src_y = s->mb_y * 16 + (my >> 2);
  221. uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
  222. uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
  223. if (v->profile != PROFILE_ADVANCED) {
  224. src_x = av_clip( src_x, -16, s->mb_width * 16);
  225. src_y = av_clip( src_y, -16, s->mb_height * 16);
  226. uvsrc_x = av_clip(uvsrc_x, -8, s->mb_width * 8);
  227. uvsrc_y = av_clip(uvsrc_y, -8, s->mb_height * 8);
  228. } else {
  229. src_x = av_clip( src_x, -17, s->avctx->coded_width);
  230. src_y = av_clip( src_y, -18, s->avctx->coded_height + 1);
  231. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  232. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  233. }
  234. srcY += src_y * s->linesize + src_x;
  235. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  236. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  237. if (v->field_mode && v->ref_field_type[dir]) {
  238. srcY += s->current_picture_ptr->f->linesize[0];
  239. srcU += s->current_picture_ptr->f->linesize[1];
  240. srcV += s->current_picture_ptr->f->linesize[2];
  241. }
  242. /* for grayscale we should not try to read from unknown area */
  243. if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY) {
  244. srcU = s->sc.edge_emu_buffer + 18 * s->linesize;
  245. srcV = s->sc.edge_emu_buffer + 18 * s->linesize;
  246. }
  247. if (v->rangeredfrm || use_ic
  248. || s->h_edge_pos < 22 || v_edge_pos < 22
  249. || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx&3) - 16 - s->mspel * 3
  250. || (unsigned)(src_y - 1) > v_edge_pos - (my&3) - 16 - 3) {
  251. uint8_t *ubuf = s->sc.edge_emu_buffer + 19 * s->linesize;
  252. uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
  253. const int k = 17 + s->mspel * 2;
  254. srcY -= s->mspel * (1 + s->linesize);
  255. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcY,
  256. s->linesize, s->linesize,
  257. k, k,
  258. src_x - s->mspel, src_y - s->mspel,
  259. s->h_edge_pos, v_edge_pos);
  260. srcY = s->sc.edge_emu_buffer;
  261. s->vdsp.emulated_edge_mc(ubuf, srcU,
  262. s->uvlinesize, s->uvlinesize,
  263. 8 + 1, 8 + 1,
  264. uvsrc_x, uvsrc_y,
  265. s->h_edge_pos >> 1, v_edge_pos >> 1);
  266. s->vdsp.emulated_edge_mc(vbuf, srcV,
  267. s->uvlinesize, s->uvlinesize,
  268. 8 + 1, 8 + 1,
  269. uvsrc_x, uvsrc_y,
  270. s->h_edge_pos >> 1, v_edge_pos >> 1);
  271. srcU = ubuf;
  272. srcV = vbuf;
  273. /* if we deal with range reduction we need to scale source blocks */
  274. if (v->rangeredfrm) {
  275. vc1_scale_luma(srcY, k, s->linesize);
  276. vc1_scale_chroma(srcU, srcV, 9, s->uvlinesize);
  277. }
  278. /* if we deal with intensity compensation we need to scale source blocks */
  279. if (use_ic) {
  280. vc1_lut_scale_luma(srcY,
  281. luty[v->field_mode ? v->ref_field_type[dir] : ((0 + src_y - s->mspel) & 1)],
  282. luty[v->field_mode ? v->ref_field_type[dir] : ((1 + src_y - s->mspel) & 1)],
  283. k, s->linesize);
  284. vc1_lut_scale_chroma(srcU, srcV,
  285. lutuv[v->field_mode ? v->ref_field_type[dir] : ((0 + uvsrc_y) & 1)],
  286. lutuv[v->field_mode ? v->ref_field_type[dir] : ((1 + uvsrc_y) & 1)],
  287. 9, s->uvlinesize);
  288. }
  289. srcY += s->mspel * (1 + s->linesize);
  290. }
  291. if (s->mspel) {
  292. dxy = ((my & 3) << 2) | (mx & 3);
  293. v->vc1dsp.put_vc1_mspel_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, v->rnd);
  294. } else { // hpel mc - always used for luma
  295. dxy = (my & 2) | ((mx & 2) >> 1);
  296. if (!v->rnd)
  297. s->hdsp.put_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
  298. else
  299. s->hdsp.put_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
  300. }
  301. if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
  302. return;
  303. /* Chroma MC always uses qpel bilinear */
  304. uvmx = (uvmx & 3) << 1;
  305. uvmy = (uvmy & 3) << 1;
  306. if (!v->rnd) {
  307. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  308. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  309. } else {
  310. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  311. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  312. }
  313. if (v->field_mode) {
  314. v->mv_f[dir][s->block_index[4] + v->mb_off] = v->cur_field_type != v->ref_field_type[dir];
  315. v->mv_f[dir][s->block_index[5] + v->mb_off] = v->cur_field_type != v->ref_field_type[dir];
  316. }
  317. }
  318. /** Do motion compensation for 4-MV macroblock - luminance block
  319. */
  320. void ff_vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg)
  321. {
  322. MpegEncContext *s = &v->s;
  323. uint8_t *srcY;
  324. int dxy, mx, my, src_x, src_y;
  325. int off;
  326. int fieldmv = (v->fcm == ILACE_FRAME) ? v->blk_mv_type[s->block_index[n]] : 0;
  327. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  328. uint8_t (*luty)[256];
  329. int use_ic;
  330. if ((!v->field_mode ||
  331. (v->ref_field_type[dir] == 1 && v->cur_field_type == 1)) &&
  332. !v->s.last_picture.f->data[0])
  333. return;
  334. mx = s->mv[dir][n][0];
  335. my = s->mv[dir][n][1];
  336. if (!dir) {
  337. if (v->field_mode && (v->cur_field_type != v->ref_field_type[dir]) && v->second_field) {
  338. srcY = s->current_picture.f->data[0];
  339. luty = v->curr_luty;
  340. use_ic = *v->curr_use_ic;
  341. } else {
  342. srcY = s->last_picture.f->data[0];
  343. luty = v->last_luty;
  344. use_ic = v->last_use_ic;
  345. }
  346. } else {
  347. srcY = s->next_picture.f->data[0];
  348. luty = v->next_luty;
  349. use_ic = v->next_use_ic;
  350. }
  351. if (!srcY) {
  352. av_log(v->s.avctx, AV_LOG_ERROR, "Referenced frame missing.\n");
  353. return;
  354. }
  355. if (v->field_mode) {
  356. if (v->cur_field_type != v->ref_field_type[dir])
  357. my = my - 2 + 4 * v->cur_field_type;
  358. }
  359. if (s->pict_type == AV_PICTURE_TYPE_P && n == 3 && v->field_mode) {
  360. int opp_count = get_luma_mv(v, 0,
  361. &s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0],
  362. &s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1]);
  363. int k, f = opp_count > 2;
  364. for (k = 0; k < 4; k++)
  365. v->mv_f[1][s->block_index[k] + v->blocks_off] = f;
  366. }
  367. if (v->fcm == ILACE_FRAME) { // not sure if needed for other types of picture
  368. int qx, qy;
  369. int width = s->avctx->coded_width;
  370. int height = s->avctx->coded_height >> 1;
  371. if (s->pict_type == AV_PICTURE_TYPE_P) {
  372. s->current_picture.motion_val[1][s->block_index[n] + v->blocks_off][0] = mx;
  373. s->current_picture.motion_val[1][s->block_index[n] + v->blocks_off][1] = my;
  374. }
  375. qx = (s->mb_x * 16) + (mx >> 2);
  376. qy = (s->mb_y * 8) + (my >> 3);
  377. if (qx < -17)
  378. mx -= 4 * (qx + 17);
  379. else if (qx > width)
  380. mx -= 4 * (qx - width);
  381. if (qy < -18)
  382. my -= 8 * (qy + 18);
  383. else if (qy > height + 1)
  384. my -= 8 * (qy - height - 1);
  385. }
  386. if ((v->fcm == ILACE_FRAME) && fieldmv)
  387. off = ((n > 1) ? s->linesize : 0) + (n & 1) * 8;
  388. else
  389. off = s->linesize * 4 * (n & 2) + (n & 1) * 8;
  390. src_x = s->mb_x * 16 + (n & 1) * 8 + (mx >> 2);
  391. if (!fieldmv)
  392. src_y = s->mb_y * 16 + (n & 2) * 4 + (my >> 2);
  393. else
  394. src_y = s->mb_y * 16 + ((n > 1) ? 1 : 0) + (my >> 2);
  395. if (v->profile != PROFILE_ADVANCED) {
  396. src_x = av_clip(src_x, -16, s->mb_width * 16);
  397. src_y = av_clip(src_y, -16, s->mb_height * 16);
  398. } else {
  399. src_x = av_clip(src_x, -17, s->avctx->coded_width);
  400. if (v->fcm == ILACE_FRAME) {
  401. if (src_y & 1)
  402. src_y = av_clip(src_y, -17, s->avctx->coded_height + 1);
  403. else
  404. src_y = av_clip(src_y, -18, s->avctx->coded_height);
  405. } else {
  406. src_y = av_clip(src_y, -18, s->avctx->coded_height + 1);
  407. }
  408. }
  409. srcY += src_y * s->linesize + src_x;
  410. if (v->field_mode && v->ref_field_type[dir])
  411. srcY += s->current_picture_ptr->f->linesize[0];
  412. if (fieldmv) {
  413. if (!(src_y & 1))
  414. v_edge_pos--;
  415. else
  416. src_y -= (src_y < 4);
  417. }
  418. if (v->rangeredfrm || use_ic
  419. || s->h_edge_pos < 13 || v_edge_pos < 23
  420. || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx & 3) - 8 - s->mspel * 2
  421. || (unsigned)(src_y - (s->mspel << fieldmv)) > v_edge_pos - (my & 3) - ((8 + s->mspel * 2) << fieldmv)) {
  422. const int k = 9 + s->mspel * 2;
  423. srcY -= s->mspel * (1 + (s->linesize << fieldmv));
  424. /* check emulate edge stride and offset */
  425. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcY,
  426. s->linesize, s->linesize,
  427. k, k << fieldmv,
  428. src_x - s->mspel, src_y - (s->mspel << fieldmv),
  429. s->h_edge_pos, v_edge_pos);
  430. srcY = s->sc.edge_emu_buffer;
  431. /* if we deal with range reduction we need to scale source blocks */
  432. if (v->rangeredfrm) {
  433. vc1_scale_luma(srcY, k, s->linesize << fieldmv);
  434. }
  435. /* if we deal with intensity compensation we need to scale source blocks */
  436. if (use_ic) {
  437. vc1_lut_scale_luma(srcY,
  438. luty[v->field_mode ? v->ref_field_type[dir] : (((0<<fieldmv)+src_y - (s->mspel << fieldmv)) & 1)],
  439. luty[v->field_mode ? v->ref_field_type[dir] : (((1<<fieldmv)+src_y - (s->mspel << fieldmv)) & 1)],
  440. k, s->linesize << fieldmv);
  441. }
  442. srcY += s->mspel * (1 + (s->linesize << fieldmv));
  443. }
  444. if (s->mspel) {
  445. dxy = ((my & 3) << 2) | (mx & 3);
  446. if (avg)
  447. v->vc1dsp.avg_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
  448. else
  449. v->vc1dsp.put_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
  450. } else { // hpel mc - always used for luma
  451. dxy = (my & 2) | ((mx & 2) >> 1);
  452. if (!v->rnd)
  453. s->hdsp.put_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize, 8);
  454. else
  455. s->hdsp.put_no_rnd_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize, 8);
  456. }
  457. }
  458. /** Do motion compensation for 4-MV macroblock - both chroma blocks
  459. */
  460. void ff_vc1_mc_4mv_chroma(VC1Context *v, int dir)
  461. {
  462. MpegEncContext *s = &v->s;
  463. H264ChromaContext *h264chroma = &v->h264chroma;
  464. uint8_t *srcU, *srcV;
  465. int uvmx, uvmy, uvsrc_x, uvsrc_y;
  466. int16_t tx, ty;
  467. int chroma_ref_type;
  468. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  469. uint8_t (*lutuv)[256];
  470. int use_ic;
  471. if (!v->field_mode && !v->s.last_picture.f->data[0])
  472. return;
  473. if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
  474. return;
  475. /* calculate chroma MV vector from four luma MVs */
  476. if (!v->field_mode || !v->numref) {
  477. int valid_count = get_chroma_mv(v, dir, &tx, &ty);
  478. if (!valid_count) {
  479. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0;
  480. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = 0;
  481. v->luma_mv[s->mb_x][0] = v->luma_mv[s->mb_x][1] = 0;
  482. return; //no need to do MC for intra blocks
  483. }
  484. chroma_ref_type = v->ref_field_type[dir];
  485. } else {
  486. int opp_count = get_luma_mv(v, dir, &tx, &ty);
  487. chroma_ref_type = v->cur_field_type ^ (opp_count > 2);
  488. }
  489. if (v->field_mode && chroma_ref_type == 1 && v->cur_field_type == 1 && !v->s.last_picture.f->data[0])
  490. return;
  491. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = tx;
  492. s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = ty;
  493. uvmx = (tx + ((tx & 3) == 3)) >> 1;
  494. uvmy = (ty + ((ty & 3) == 3)) >> 1;
  495. v->luma_mv[s->mb_x][0] = uvmx;
  496. v->luma_mv[s->mb_x][1] = uvmy;
  497. if (v->fastuvmc) {
  498. uvmx = uvmx + ((uvmx < 0) ? (uvmx & 1) : -(uvmx & 1));
  499. uvmy = uvmy + ((uvmy < 0) ? (uvmy & 1) : -(uvmy & 1));
  500. }
  501. // Field conversion bias
  502. if (v->cur_field_type != chroma_ref_type)
  503. uvmy += 2 - 4 * chroma_ref_type;
  504. uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
  505. uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
  506. if (v->profile != PROFILE_ADVANCED) {
  507. uvsrc_x = av_clip(uvsrc_x, -8, s->mb_width * 8);
  508. uvsrc_y = av_clip(uvsrc_y, -8, s->mb_height * 8);
  509. } else {
  510. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  511. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  512. }
  513. if (!dir) {
  514. if (v->field_mode && (v->cur_field_type != chroma_ref_type) && v->second_field) {
  515. srcU = s->current_picture.f->data[1];
  516. srcV = s->current_picture.f->data[2];
  517. lutuv = v->curr_lutuv;
  518. use_ic = *v->curr_use_ic;
  519. } else {
  520. srcU = s->last_picture.f->data[1];
  521. srcV = s->last_picture.f->data[2];
  522. lutuv = v->last_lutuv;
  523. use_ic = v->last_use_ic;
  524. }
  525. } else {
  526. srcU = s->next_picture.f->data[1];
  527. srcV = s->next_picture.f->data[2];
  528. lutuv = v->next_lutuv;
  529. use_ic = v->next_use_ic;
  530. }
  531. if (!srcU) {
  532. av_log(v->s.avctx, AV_LOG_ERROR, "Referenced frame missing.\n");
  533. return;
  534. }
  535. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  536. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  537. if (v->field_mode) {
  538. if (chroma_ref_type) {
  539. srcU += s->current_picture_ptr->f->linesize[1];
  540. srcV += s->current_picture_ptr->f->linesize[2];
  541. }
  542. }
  543. if (v->rangeredfrm || use_ic
  544. || s->h_edge_pos < 18 || v_edge_pos < 18
  545. || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 9
  546. || (unsigned)uvsrc_y > (v_edge_pos >> 1) - 9) {
  547. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcU,
  548. s->uvlinesize, s->uvlinesize,
  549. 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
  550. s->h_edge_pos >> 1, v_edge_pos >> 1);
  551. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16, srcV,
  552. s->uvlinesize, s->uvlinesize,
  553. 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
  554. s->h_edge_pos >> 1, v_edge_pos >> 1);
  555. srcU = s->sc.edge_emu_buffer;
  556. srcV = s->sc.edge_emu_buffer + 16;
  557. /* if we deal with range reduction we need to scale source blocks */
  558. if (v->rangeredfrm) {
  559. vc1_scale_chroma(srcU, srcV, 9, s->uvlinesize);
  560. }
  561. /* if we deal with intensity compensation we need to scale source blocks */
  562. if (use_ic) {
  563. vc1_lut_scale_chroma(srcU, srcV,
  564. lutuv[v->field_mode ? chroma_ref_type : ((0 + uvsrc_y) & 1)],
  565. lutuv[v->field_mode ? chroma_ref_type : ((1 + uvsrc_y) & 1)],
  566. 9, s->uvlinesize);
  567. }
  568. }
  569. /* Chroma MC always uses qpel bilinear */
  570. uvmx = (uvmx & 3) << 1;
  571. uvmy = (uvmy & 3) << 1;
  572. if (!v->rnd) {
  573. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  574. h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  575. } else {
  576. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  577. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  578. }
  579. if (v->field_mode) {
  580. v->mv_f[dir][s->block_index[4] + v->mb_off] = v->cur_field_type != chroma_ref_type;
  581. v->mv_f[dir][s->block_index[5] + v->mb_off] = v->cur_field_type != chroma_ref_type;
  582. }
  583. }
  584. /** Do motion compensation for 4-MV interlaced frame chroma macroblock (both U and V)
  585. */
  586. void ff_vc1_mc_4mv_chroma4(VC1Context *v, int dir, int dir2, int avg)
  587. {
  588. MpegEncContext *s = &v->s;
  589. H264ChromaContext *h264chroma = &v->h264chroma;
  590. uint8_t *srcU, *srcV;
  591. int uvsrc_x, uvsrc_y;
  592. int uvmx_field[4], uvmy_field[4];
  593. int i, off, tx, ty;
  594. int fieldmv = v->blk_mv_type[s->block_index[0]];
  595. static const uint8_t s_rndtblfield[16] = { 0, 0, 1, 2, 4, 4, 5, 6, 2, 2, 3, 8, 6, 6, 7, 12 };
  596. int v_dist = fieldmv ? 1 : 4; // vertical offset for lower sub-blocks
  597. int v_edge_pos = s->v_edge_pos >> 1;
  598. int use_ic;
  599. uint8_t (*lutuv)[256];
  600. if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
  601. return;
  602. for (i = 0; i < 4; i++) {
  603. int d = i < 2 ? dir: dir2;
  604. tx = s->mv[d][i][0];
  605. uvmx_field[i] = (tx + ((tx & 3) == 3)) >> 1;
  606. ty = s->mv[d][i][1];
  607. if (fieldmv)
  608. uvmy_field[i] = (ty >> 4) * 8 + s_rndtblfield[ty & 0xF];
  609. else
  610. uvmy_field[i] = (ty + ((ty & 3) == 3)) >> 1;
  611. }
  612. for (i = 0; i < 4; i++) {
  613. off = (i & 1) * 4 + ((i & 2) ? v_dist * s->uvlinesize : 0);
  614. uvsrc_x = s->mb_x * 8 + (i & 1) * 4 + (uvmx_field[i] >> 2);
  615. uvsrc_y = s->mb_y * 8 + ((i & 2) ? v_dist : 0) + (uvmy_field[i] >> 2);
  616. // FIXME: implement proper pull-back (see vc1cropmv.c, vc1CROPMV_ChromaPullBack())
  617. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  618. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  619. if (i < 2 ? dir : dir2) {
  620. srcU = s->next_picture.f->data[1];
  621. srcV = s->next_picture.f->data[2];
  622. lutuv = v->next_lutuv;
  623. use_ic = v->next_use_ic;
  624. } else {
  625. srcU = s->last_picture.f->data[1];
  626. srcV = s->last_picture.f->data[2];
  627. lutuv = v->last_lutuv;
  628. use_ic = v->last_use_ic;
  629. }
  630. if (!srcU)
  631. return;
  632. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  633. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  634. uvmx_field[i] = (uvmx_field[i] & 3) << 1;
  635. uvmy_field[i] = (uvmy_field[i] & 3) << 1;
  636. if (fieldmv) {
  637. if (!(uvsrc_y & 1))
  638. v_edge_pos = (s->v_edge_pos >> 1) - 1;
  639. else
  640. uvsrc_y -= (uvsrc_y < 2);
  641. }
  642. if (use_ic
  643. || s->h_edge_pos < 10 || v_edge_pos < (5 << fieldmv)
  644. || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 5
  645. || (unsigned)uvsrc_y > v_edge_pos - (5 << fieldmv)) {
  646. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcU,
  647. s->uvlinesize, s->uvlinesize,
  648. 5, (5 << fieldmv), uvsrc_x, uvsrc_y,
  649. s->h_edge_pos >> 1, v_edge_pos);
  650. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16, srcV,
  651. s->uvlinesize, s->uvlinesize,
  652. 5, (5 << fieldmv), uvsrc_x, uvsrc_y,
  653. s->h_edge_pos >> 1, v_edge_pos);
  654. srcU = s->sc.edge_emu_buffer;
  655. srcV = s->sc.edge_emu_buffer + 16;
  656. /* if we deal with intensity compensation we need to scale source blocks */
  657. if (use_ic) {
  658. vc1_lut_scale_chroma(srcU, srcV,
  659. lutuv[(uvsrc_y + (0 << fieldmv)) & 1],
  660. lutuv[(uvsrc_y + (1 << fieldmv)) & 1],
  661. 5, s->uvlinesize << fieldmv);
  662. }
  663. }
  664. if (avg) {
  665. if (!v->rnd) {
  666. h264chroma->avg_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  667. h264chroma->avg_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  668. } else {
  669. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  670. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  671. }
  672. } else {
  673. if (!v->rnd) {
  674. h264chroma->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  675. h264chroma->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  676. } else {
  677. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  678. v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
  679. }
  680. }
  681. }
  682. }
  683. /** Motion compensation for direct or interpolated blocks in B-frames
  684. */
  685. void ff_vc1_interp_mc(VC1Context *v)
  686. {
  687. MpegEncContext *s = &v->s;
  688. H264ChromaContext *h264chroma = &v->h264chroma;
  689. uint8_t *srcY, *srcU, *srcV;
  690. int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
  691. int v_edge_pos = s->v_edge_pos >> v->field_mode;
  692. int use_ic = v->next_use_ic;
  693. if (!v->field_mode && !v->s.next_picture.f->data[0])
  694. return;
  695. mx = s->mv[1][0][0];
  696. my = s->mv[1][0][1];
  697. uvmx = (mx + ((mx & 3) == 3)) >> 1;
  698. uvmy = (my + ((my & 3) == 3)) >> 1;
  699. if (v->field_mode && v->cur_field_type != v->ref_field_type[1]) {
  700. my = my - 2 + 4 * v->cur_field_type;
  701. uvmy = uvmy - 2 + 4 * v->cur_field_type;
  702. }
  703. if (v->fastuvmc) {
  704. uvmx = uvmx + ((uvmx < 0) ? -(uvmx & 1) : (uvmx & 1));
  705. uvmy = uvmy + ((uvmy < 0) ? -(uvmy & 1) : (uvmy & 1));
  706. }
  707. srcY = s->next_picture.f->data[0];
  708. srcU = s->next_picture.f->data[1];
  709. srcV = s->next_picture.f->data[2];
  710. src_x = s->mb_x * 16 + (mx >> 2);
  711. src_y = s->mb_y * 16 + (my >> 2);
  712. uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
  713. uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
  714. if (v->profile != PROFILE_ADVANCED) {
  715. src_x = av_clip( src_x, -16, s->mb_width * 16);
  716. src_y = av_clip( src_y, -16, s->mb_height * 16);
  717. uvsrc_x = av_clip(uvsrc_x, -8, s->mb_width * 8);
  718. uvsrc_y = av_clip(uvsrc_y, -8, s->mb_height * 8);
  719. } else {
  720. src_x = av_clip( src_x, -17, s->avctx->coded_width);
  721. src_y = av_clip( src_y, -18, s->avctx->coded_height + 1);
  722. uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width >> 1);
  723. uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
  724. }
  725. srcY += src_y * s->linesize + src_x;
  726. srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
  727. srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
  728. if (v->field_mode && v->ref_field_type[1]) {
  729. srcY += s->current_picture_ptr->f->linesize[0];
  730. srcU += s->current_picture_ptr->f->linesize[1];
  731. srcV += s->current_picture_ptr->f->linesize[2];
  732. }
  733. /* for grayscale we should not try to read from unknown area */
  734. if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY) {
  735. srcU = s->sc.edge_emu_buffer + 18 * s->linesize;
  736. srcV = s->sc.edge_emu_buffer + 18 * s->linesize;
  737. }
  738. if (v->rangeredfrm || s->h_edge_pos < 22 || v_edge_pos < 22 || use_ic
  739. || (unsigned)(src_x - 1) > s->h_edge_pos - (mx & 3) - 16 - 3
  740. || (unsigned)(src_y - 1) > v_edge_pos - (my & 3) - 16 - 3) {
  741. uint8_t *ubuf = s->sc.edge_emu_buffer + 19 * s->linesize;
  742. uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
  743. const int k = 17 + s->mspel * 2;
  744. srcY -= s->mspel * (1 + s->linesize);
  745. s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcY,
  746. s->linesize, s->linesize,
  747. k, k,
  748. src_x - s->mspel, src_y - s->mspel,
  749. s->h_edge_pos, v_edge_pos);
  750. srcY = s->sc.edge_emu_buffer;
  751. s->vdsp.emulated_edge_mc(ubuf, srcU,
  752. s->uvlinesize, s->uvlinesize,
  753. 8 + 1, 8 + 1,
  754. uvsrc_x, uvsrc_y,
  755. s->h_edge_pos >> 1, v_edge_pos >> 1);
  756. s->vdsp.emulated_edge_mc(vbuf, srcV,
  757. s->uvlinesize, s->uvlinesize,
  758. 8 + 1, 8 + 1,
  759. uvsrc_x, uvsrc_y,
  760. s->h_edge_pos >> 1, v_edge_pos >> 1);
  761. srcU = ubuf;
  762. srcV = vbuf;
  763. /* if we deal with range reduction we need to scale source blocks */
  764. if (v->rangeredfrm) {
  765. vc1_scale_luma(srcY, k, s->linesize);
  766. vc1_scale_chroma(srcU, srcV, 9, s->uvlinesize);
  767. }
  768. if (use_ic) {
  769. uint8_t (*luty )[256] = v->next_luty;
  770. uint8_t (*lutuv)[256] = v->next_lutuv;
  771. vc1_lut_scale_luma(srcY,
  772. luty[v->field_mode ? v->ref_field_type[1] : ((0+src_y - s->mspel) & 1)],
  773. luty[v->field_mode ? v->ref_field_type[1] : ((1+src_y - s->mspel) & 1)],
  774. k, s->linesize);
  775. vc1_lut_scale_chroma(srcU, srcV,
  776. lutuv[v->field_mode ? v->ref_field_type[1] : ((0+uvsrc_y) & 1)],
  777. lutuv[v->field_mode ? v->ref_field_type[1] : ((1+uvsrc_y) & 1)],
  778. 9, s->uvlinesize);
  779. }
  780. srcY += s->mspel * (1 + s->linesize);
  781. }
  782. if (s->mspel) {
  783. dxy = ((my & 3) << 2) | (mx & 3);
  784. v->vc1dsp.avg_vc1_mspel_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, v->rnd);
  785. } else { // hpel mc
  786. dxy = (my & 2) | ((mx & 2) >> 1);
  787. if (!v->rnd)
  788. s->hdsp.avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
  789. else
  790. s->hdsp.avg_no_rnd_pixels_tab[dxy](s->dest[0], srcY, s->linesize, 16);
  791. }
  792. if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
  793. return;
  794. /* Chroma MC always uses qpel bilinear */
  795. uvmx = (uvmx & 3) << 1;
  796. uvmy = (uvmy & 3) << 1;
  797. if (!v->rnd) {
  798. h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  799. h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  800. } else {
  801. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
  802. v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
  803. }
  804. }