You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

983 lines
37KB

  1. /*
  2. * Copyright (c) 2000,2001 Fabrice Bellard
  3. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  6. *
  7. * This file is part of Libav.
  8. *
  9. * Libav is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * Libav is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with Libav; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. #include <string.h>
  24. #include "libavutil/internal.h"
  25. #include "avcodec.h"
  26. #include "dsputil.h"
  27. #include "h261.h"
  28. #include "mpegvideo.h"
  29. #include "mjpegenc.h"
  30. #include "msmpeg4.h"
  31. #include <limits.h>
  32. static void gmc1_motion(MpegEncContext *s,
  33. uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
  34. uint8_t **ref_picture)
  35. {
  36. uint8_t *ptr;
  37. int src_x, src_y, motion_x, motion_y;
  38. ptrdiff_t offset, linesize, uvlinesize;
  39. int emu = 0;
  40. motion_x = s->sprite_offset[0][0];
  41. motion_y = s->sprite_offset[0][1];
  42. src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy + 1));
  43. src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy + 1));
  44. motion_x <<= (3 - s->sprite_warping_accuracy);
  45. motion_y <<= (3 - s->sprite_warping_accuracy);
  46. src_x = av_clip(src_x, -16, s->width);
  47. if (src_x == s->width)
  48. motion_x = 0;
  49. src_y = av_clip(src_y, -16, s->height);
  50. if (src_y == s->height)
  51. motion_y = 0;
  52. linesize = s->linesize;
  53. uvlinesize = s->uvlinesize;
  54. ptr = ref_picture[0] + src_y * linesize + src_x;
  55. if ((unsigned)src_x >= FFMAX(s->h_edge_pos - 17, 0) ||
  56. (unsigned)src_y >= FFMAX(s->v_edge_pos - 17, 0)) {
  57. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr,
  58. linesize, linesize,
  59. 17, 17,
  60. src_x, src_y,
  61. s->h_edge_pos, s->v_edge_pos);
  62. ptr = s->edge_emu_buffer;
  63. }
  64. if ((motion_x | motion_y) & 7) {
  65. s->dsp.gmc1(dest_y, ptr, linesize, 16,
  66. motion_x & 15, motion_y & 15, 128 - s->no_rounding);
  67. s->dsp.gmc1(dest_y + 8, ptr + 8, linesize, 16,
  68. motion_x & 15, motion_y & 15, 128 - s->no_rounding);
  69. } else {
  70. int dxy;
  71. dxy = ((motion_x >> 3) & 1) | ((motion_y >> 2) & 2);
  72. if (s->no_rounding) {
  73. s->hdsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
  74. } else {
  75. s->hdsp.put_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
  76. }
  77. }
  78. if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY)
  79. return;
  80. motion_x = s->sprite_offset[1][0];
  81. motion_y = s->sprite_offset[1][1];
  82. src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy + 1));
  83. src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy + 1));
  84. motion_x <<= (3 - s->sprite_warping_accuracy);
  85. motion_y <<= (3 - s->sprite_warping_accuracy);
  86. src_x = av_clip(src_x, -8, s->width >> 1);
  87. if (src_x == s->width >> 1)
  88. motion_x = 0;
  89. src_y = av_clip(src_y, -8, s->height >> 1);
  90. if (src_y == s->height >> 1)
  91. motion_y = 0;
  92. offset = (src_y * uvlinesize) + src_x;
  93. ptr = ref_picture[1] + offset;
  94. if ((unsigned)src_x >= FFMAX((s->h_edge_pos >> 1) - 9, 0) ||
  95. (unsigned)src_y >= FFMAX((s->v_edge_pos >> 1) - 9, 0)) {
  96. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr,
  97. uvlinesize, uvlinesize,
  98. 9, 9,
  99. src_x, src_y,
  100. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  101. ptr = s->edge_emu_buffer;
  102. emu = 1;
  103. }
  104. s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8,
  105. motion_x & 15, motion_y & 15, 128 - s->no_rounding);
  106. ptr = ref_picture[2] + offset;
  107. if (emu) {
  108. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr,
  109. uvlinesize, uvlinesize,
  110. 9, 9,
  111. src_x, src_y,
  112. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  113. ptr = s->edge_emu_buffer;
  114. }
  115. s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8,
  116. motion_x & 15, motion_y & 15, 128 - s->no_rounding);
  117. }
  118. static void gmc_motion(MpegEncContext *s,
  119. uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
  120. uint8_t **ref_picture)
  121. {
  122. uint8_t *ptr;
  123. int linesize, uvlinesize;
  124. const int a = s->sprite_warping_accuracy;
  125. int ox, oy;
  126. linesize = s->linesize;
  127. uvlinesize = s->uvlinesize;
  128. ptr = ref_picture[0];
  129. ox = s->sprite_offset[0][0] + s->sprite_delta[0][0] * s->mb_x * 16 +
  130. s->sprite_delta[0][1] * s->mb_y * 16;
  131. oy = s->sprite_offset[0][1] + s->sprite_delta[1][0] * s->mb_x * 16 +
  132. s->sprite_delta[1][1] * s->mb_y * 16;
  133. s->dsp.gmc(dest_y, ptr, linesize, 16,
  134. ox, oy,
  135. s->sprite_delta[0][0], s->sprite_delta[0][1],
  136. s->sprite_delta[1][0], s->sprite_delta[1][1],
  137. a + 1, (1 << (2 * a + 1)) - s->no_rounding,
  138. s->h_edge_pos, s->v_edge_pos);
  139. s->dsp.gmc(dest_y + 8, ptr, linesize, 16,
  140. ox + s->sprite_delta[0][0] * 8,
  141. oy + s->sprite_delta[1][0] * 8,
  142. s->sprite_delta[0][0], s->sprite_delta[0][1],
  143. s->sprite_delta[1][0], s->sprite_delta[1][1],
  144. a + 1, (1 << (2 * a + 1)) - s->no_rounding,
  145. s->h_edge_pos, s->v_edge_pos);
  146. if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY)
  147. return;
  148. ox = s->sprite_offset[1][0] + s->sprite_delta[0][0] * s->mb_x * 8 +
  149. s->sprite_delta[0][1] * s->mb_y * 8;
  150. oy = s->sprite_offset[1][1] + s->sprite_delta[1][0] * s->mb_x * 8 +
  151. s->sprite_delta[1][1] * s->mb_y * 8;
  152. ptr = ref_picture[1];
  153. s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
  154. ox, oy,
  155. s->sprite_delta[0][0], s->sprite_delta[0][1],
  156. s->sprite_delta[1][0], s->sprite_delta[1][1],
  157. a + 1, (1 << (2 * a + 1)) - s->no_rounding,
  158. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  159. ptr = ref_picture[2];
  160. s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
  161. ox, oy,
  162. s->sprite_delta[0][0], s->sprite_delta[0][1],
  163. s->sprite_delta[1][0], s->sprite_delta[1][1],
  164. a + 1, (1 << (2 * a + 1)) - s->no_rounding,
  165. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  166. }
  167. static inline int hpel_motion(MpegEncContext *s,
  168. uint8_t *dest, uint8_t *src,
  169. int src_x, int src_y,
  170. op_pixels_func *pix_op,
  171. int motion_x, int motion_y)
  172. {
  173. int dxy = 0;
  174. int emu = 0;
  175. src_x += motion_x >> 1;
  176. src_y += motion_y >> 1;
  177. /* WARNING: do no forget half pels */
  178. src_x = av_clip(src_x, -16, s->width); // FIXME unneeded for emu?
  179. if (src_x != s->width)
  180. dxy |= motion_x & 1;
  181. src_y = av_clip(src_y, -16, s->height);
  182. if (src_y != s->height)
  183. dxy |= (motion_y & 1) << 1;
  184. src += src_y * s->linesize + src_x;
  185. if (s->unrestricted_mv) {
  186. if ((unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x & 1) - 8, 0) ||
  187. (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y & 1) - 8, 0)) {
  188. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
  189. s->linesize, s->linesize,
  190. 9, 9,
  191. src_x, src_y, s->h_edge_pos,
  192. s->v_edge_pos);
  193. src = s->edge_emu_buffer;
  194. emu = 1;
  195. }
  196. }
  197. pix_op[dxy](dest, src, s->linesize, 8);
  198. return emu;
  199. }
  200. static av_always_inline
  201. void mpeg_motion_internal(MpegEncContext *s,
  202. uint8_t *dest_y,
  203. uint8_t *dest_cb,
  204. uint8_t *dest_cr,
  205. int field_based,
  206. int bottom_field,
  207. int field_select,
  208. uint8_t **ref_picture,
  209. op_pixels_func (*pix_op)[4],
  210. int motion_x,
  211. int motion_y,
  212. int h,
  213. int is_mpeg12,
  214. int mb_y)
  215. {
  216. uint8_t *ptr_y, *ptr_cb, *ptr_cr;
  217. int dxy, uvdxy, mx, my, src_x, src_y,
  218. uvsrc_x, uvsrc_y, v_edge_pos;
  219. ptrdiff_t uvlinesize, linesize;
  220. #if 0
  221. if (s->quarter_sample) {
  222. motion_x >>= 1;
  223. motion_y >>= 1;
  224. }
  225. #endif
  226. v_edge_pos = s->v_edge_pos >> field_based;
  227. linesize = s->current_picture.f.linesize[0] << field_based;
  228. uvlinesize = s->current_picture.f.linesize[1] << field_based;
  229. dxy = ((motion_y & 1) << 1) | (motion_x & 1);
  230. src_x = s->mb_x * 16 + (motion_x >> 1);
  231. src_y = (mb_y << (4 - field_based)) + (motion_y >> 1);
  232. if (!is_mpeg12 && s->out_format == FMT_H263) {
  233. if ((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based) {
  234. mx = (motion_x >> 1) | (motion_x & 1);
  235. my = motion_y >> 1;
  236. uvdxy = ((my & 1) << 1) | (mx & 1);
  237. uvsrc_x = s->mb_x * 8 + (mx >> 1);
  238. uvsrc_y = (mb_y << (3 - field_based)) + (my >> 1);
  239. } else {
  240. uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
  241. uvsrc_x = src_x >> 1;
  242. uvsrc_y = src_y >> 1;
  243. }
  244. // Even chroma mv's are full pel in H261
  245. } else if (!is_mpeg12 && s->out_format == FMT_H261) {
  246. mx = motion_x / 4;
  247. my = motion_y / 4;
  248. uvdxy = 0;
  249. uvsrc_x = s->mb_x * 8 + mx;
  250. uvsrc_y = mb_y * 8 + my;
  251. } else {
  252. if (s->chroma_y_shift) {
  253. mx = motion_x / 2;
  254. my = motion_y / 2;
  255. uvdxy = ((my & 1) << 1) | (mx & 1);
  256. uvsrc_x = s->mb_x * 8 + (mx >> 1);
  257. uvsrc_y = (mb_y << (3 - field_based)) + (my >> 1);
  258. } else {
  259. if (s->chroma_x_shift) {
  260. // Chroma422
  261. mx = motion_x / 2;
  262. uvdxy = ((motion_y & 1) << 1) | (mx & 1);
  263. uvsrc_x = s->mb_x * 8 + (mx >> 1);
  264. uvsrc_y = src_y;
  265. } else {
  266. // Chroma444
  267. uvdxy = dxy;
  268. uvsrc_x = src_x;
  269. uvsrc_y = src_y;
  270. }
  271. }
  272. }
  273. ptr_y = ref_picture[0] + src_y * linesize + src_x;
  274. ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
  275. ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
  276. if ((unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x & 1) - 16, 0) ||
  277. (unsigned)src_y > FFMAX(v_edge_pos - (motion_y & 1) - h, 0)) {
  278. if (is_mpeg12 ||
  279. s->codec_id == AV_CODEC_ID_MPEG2VIDEO ||
  280. s->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
  281. av_log(s->avctx, AV_LOG_DEBUG,
  282. "MPEG motion vector out of boundary (%d %d)\n", src_x,
  283. src_y);
  284. return;
  285. }
  286. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr_y,
  287. s->linesize, s->linesize,
  288. 17, 17 + field_based,
  289. src_x, src_y << field_based,
  290. s->h_edge_pos, s->v_edge_pos);
  291. ptr_y = s->edge_emu_buffer;
  292. if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
  293. uint8_t *uvbuf = s->edge_emu_buffer + 18 * s->linesize;
  294. s->vdsp.emulated_edge_mc(uvbuf, ptr_cb,
  295. s->uvlinesize, s->uvlinesize,
  296. 9, 9 + field_based,
  297. uvsrc_x, uvsrc_y << field_based,
  298. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  299. s->vdsp.emulated_edge_mc(uvbuf + 16, ptr_cr,
  300. s->uvlinesize, s->uvlinesize,
  301. 9, 9 + field_based,
  302. uvsrc_x, uvsrc_y << field_based,
  303. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  304. ptr_cb = uvbuf;
  305. ptr_cr = uvbuf + 16;
  306. }
  307. }
  308. /* FIXME use this for field pix too instead of the obnoxious hack which
  309. * changes picture.data */
  310. if (bottom_field) {
  311. dest_y += s->linesize;
  312. dest_cb += s->uvlinesize;
  313. dest_cr += s->uvlinesize;
  314. }
  315. if (field_select) {
  316. ptr_y += s->linesize;
  317. ptr_cb += s->uvlinesize;
  318. ptr_cr += s->uvlinesize;
  319. }
  320. pix_op[0][dxy](dest_y, ptr_y, linesize, h);
  321. if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
  322. pix_op[s->chroma_x_shift][uvdxy]
  323. (dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
  324. pix_op[s->chroma_x_shift][uvdxy]
  325. (dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
  326. }
  327. if (!is_mpeg12 && (CONFIG_H261_ENCODER || CONFIG_H261_DECODER) &&
  328. s->out_format == FMT_H261) {
  329. ff_h261_loop_filter(s);
  330. }
  331. }
  332. /* apply one mpeg motion vector to the three components */
  333. static void mpeg_motion(MpegEncContext *s,
  334. uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
  335. int field_select, uint8_t **ref_picture,
  336. op_pixels_func (*pix_op)[4],
  337. int motion_x, int motion_y, int h, int mb_y)
  338. {
  339. #if !CONFIG_SMALL
  340. if (s->out_format == FMT_MPEG1)
  341. mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 0, 0,
  342. field_select, ref_picture, pix_op,
  343. motion_x, motion_y, h, 1, mb_y);
  344. else
  345. #endif
  346. mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 0, 0,
  347. field_select, ref_picture, pix_op,
  348. motion_x, motion_y, h, 0, mb_y);
  349. }
  350. static void mpeg_motion_field(MpegEncContext *s, uint8_t *dest_y,
  351. uint8_t *dest_cb, uint8_t *dest_cr,
  352. int bottom_field, int field_select,
  353. uint8_t **ref_picture,
  354. op_pixels_func (*pix_op)[4],
  355. int motion_x, int motion_y, int h, int mb_y)
  356. {
  357. #if !CONFIG_SMALL
  358. if(s->out_format == FMT_MPEG1)
  359. mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 1,
  360. bottom_field, field_select, ref_picture, pix_op,
  361. motion_x, motion_y, h, 1, mb_y);
  362. else
  363. #endif
  364. mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 1,
  365. bottom_field, field_select, ref_picture, pix_op,
  366. motion_x, motion_y, h, 0, mb_y);
  367. }
  368. // FIXME move to dsputil, avg variant, 16x16 version
  369. static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride)
  370. {
  371. int x;
  372. uint8_t *const top = src[1];
  373. uint8_t *const left = src[2];
  374. uint8_t *const mid = src[0];
  375. uint8_t *const right = src[3];
  376. uint8_t *const bottom = src[4];
  377. #define OBMC_FILTER(x, t, l, m, r, b)\
  378. dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
  379. #define OBMC_FILTER4(x, t, l, m, r, b)\
  380. OBMC_FILTER(x , t, l, m, r, b);\
  381. OBMC_FILTER(x+1 , t, l, m, r, b);\
  382. OBMC_FILTER(x +stride, t, l, m, r, b);\
  383. OBMC_FILTER(x+1+stride, t, l, m, r, b);
  384. x = 0;
  385. OBMC_FILTER (x , 2, 2, 4, 0, 0);
  386. OBMC_FILTER (x + 1, 2, 1, 5, 0, 0);
  387. OBMC_FILTER4(x + 2, 2, 1, 5, 0, 0);
  388. OBMC_FILTER4(x + 4, 2, 0, 5, 1, 0);
  389. OBMC_FILTER (x + 6, 2, 0, 5, 1, 0);
  390. OBMC_FILTER (x + 7, 2, 0, 4, 2, 0);
  391. x += stride;
  392. OBMC_FILTER (x , 1, 2, 5, 0, 0);
  393. OBMC_FILTER (x + 1, 1, 2, 5, 0, 0);
  394. OBMC_FILTER (x + 6, 1, 0, 5, 2, 0);
  395. OBMC_FILTER (x + 7, 1, 0, 5, 2, 0);
  396. x += stride;
  397. OBMC_FILTER4(x , 1, 2, 5, 0, 0);
  398. OBMC_FILTER4(x + 2, 1, 1, 6, 0, 0);
  399. OBMC_FILTER4(x + 4, 1, 0, 6, 1, 0);
  400. OBMC_FILTER4(x + 6, 1, 0, 5, 2, 0);
  401. x += 2 * stride;
  402. OBMC_FILTER4(x , 0, 2, 5, 0, 1);
  403. OBMC_FILTER4(x + 2, 0, 1, 6, 0, 1);
  404. OBMC_FILTER4(x + 4, 0, 0, 6, 1, 1);
  405. OBMC_FILTER4(x + 6, 0, 0, 5, 2, 1);
  406. x += 2*stride;
  407. OBMC_FILTER (x , 0, 2, 5, 0, 1);
  408. OBMC_FILTER (x + 1, 0, 2, 5, 0, 1);
  409. OBMC_FILTER4(x + 2, 0, 1, 5, 0, 2);
  410. OBMC_FILTER4(x + 4, 0, 0, 5, 1, 2);
  411. OBMC_FILTER (x + 6, 0, 0, 5, 2, 1);
  412. OBMC_FILTER (x + 7, 0, 0, 5, 2, 1);
  413. x += stride;
  414. OBMC_FILTER (x , 0, 2, 4, 0, 2);
  415. OBMC_FILTER (x + 1, 0, 1, 5, 0, 2);
  416. OBMC_FILTER (x + 6, 0, 0, 5, 1, 2);
  417. OBMC_FILTER (x + 7, 0, 0, 4, 2, 2);
  418. }
  419. /* obmc for 1 8x8 luma block */
  420. static inline void obmc_motion(MpegEncContext *s,
  421. uint8_t *dest, uint8_t *src,
  422. int src_x, int src_y,
  423. op_pixels_func *pix_op,
  424. int16_t mv[5][2] /* mid top left right bottom */)
  425. #define MID 0
  426. {
  427. int i;
  428. uint8_t *ptr[5];
  429. assert(s->quarter_sample == 0);
  430. for (i = 0; i < 5; i++) {
  431. if (i && mv[i][0] == mv[MID][0] && mv[i][1] == mv[MID][1]) {
  432. ptr[i] = ptr[MID];
  433. } else {
  434. ptr[i] = s->obmc_scratchpad + 8 * (i & 1) +
  435. s->linesize * 8 * (i >> 1);
  436. hpel_motion(s, ptr[i], src, src_x, src_y, pix_op,
  437. mv[i][0], mv[i][1]);
  438. }
  439. }
  440. put_obmc(dest, ptr, s->linesize);
  441. }
  442. static inline void qpel_motion(MpegEncContext *s,
  443. uint8_t *dest_y,
  444. uint8_t *dest_cb,
  445. uint8_t *dest_cr,
  446. int field_based, int bottom_field,
  447. int field_select, uint8_t **ref_picture,
  448. op_pixels_func (*pix_op)[4],
  449. qpel_mc_func (*qpix_op)[16],
  450. int motion_x, int motion_y, int h)
  451. {
  452. uint8_t *ptr_y, *ptr_cb, *ptr_cr;
  453. int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos;
  454. ptrdiff_t linesize, uvlinesize;
  455. dxy = ((motion_y & 3) << 2) | (motion_x & 3);
  456. src_x = s->mb_x * 16 + (motion_x >> 2);
  457. src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
  458. v_edge_pos = s->v_edge_pos >> field_based;
  459. linesize = s->linesize << field_based;
  460. uvlinesize = s->uvlinesize << field_based;
  461. if (field_based) {
  462. mx = motion_x / 2;
  463. my = motion_y >> 1;
  464. } else if (s->workaround_bugs & FF_BUG_QPEL_CHROMA2) {
  465. static const int rtab[8] = { 0, 0, 1, 1, 0, 0, 0, 1 };
  466. mx = (motion_x >> 1) + rtab[motion_x & 7];
  467. my = (motion_y >> 1) + rtab[motion_y & 7];
  468. } else if (s->workaround_bugs & FF_BUG_QPEL_CHROMA) {
  469. mx = (motion_x >> 1) | (motion_x & 1);
  470. my = (motion_y >> 1) | (motion_y & 1);
  471. } else {
  472. mx = motion_x / 2;
  473. my = motion_y / 2;
  474. }
  475. mx = (mx >> 1) | (mx & 1);
  476. my = (my >> 1) | (my & 1);
  477. uvdxy = (mx & 1) | ((my & 1) << 1);
  478. mx >>= 1;
  479. my >>= 1;
  480. uvsrc_x = s->mb_x * 8 + mx;
  481. uvsrc_y = s->mb_y * (8 >> field_based) + my;
  482. ptr_y = ref_picture[0] + src_y * linesize + src_x;
  483. ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
  484. ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
  485. if ((unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x & 3) - 16, 0) ||
  486. (unsigned)src_y > FFMAX(v_edge_pos - (motion_y & 3) - h, 0)) {
  487. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr_y,
  488. s->linesize, s->linesize,
  489. 17, 17 + field_based,
  490. src_x, src_y << field_based,
  491. s->h_edge_pos, s->v_edge_pos);
  492. ptr_y = s->edge_emu_buffer;
  493. if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
  494. uint8_t *uvbuf = s->edge_emu_buffer + 18 * s->linesize;
  495. s->vdsp.emulated_edge_mc(uvbuf, ptr_cb,
  496. s->uvlinesize, s->uvlinesize,
  497. 9, 9 + field_based,
  498. uvsrc_x, uvsrc_y << field_based,
  499. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  500. s->vdsp.emulated_edge_mc(uvbuf + 16, ptr_cr,
  501. s->uvlinesize, s->uvlinesize,
  502. 9, 9 + field_based,
  503. uvsrc_x, uvsrc_y << field_based,
  504. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  505. ptr_cb = uvbuf;
  506. ptr_cr = uvbuf + 16;
  507. }
  508. }
  509. if (!field_based)
  510. qpix_op[0][dxy](dest_y, ptr_y, linesize);
  511. else {
  512. if (bottom_field) {
  513. dest_y += s->linesize;
  514. dest_cb += s->uvlinesize;
  515. dest_cr += s->uvlinesize;
  516. }
  517. if (field_select) {
  518. ptr_y += s->linesize;
  519. ptr_cb += s->uvlinesize;
  520. ptr_cr += s->uvlinesize;
  521. }
  522. // damn interlaced mode
  523. // FIXME boundary mirroring is not exactly correct here
  524. qpix_op[1][dxy](dest_y, ptr_y, linesize);
  525. qpix_op[1][dxy](dest_y + 8, ptr_y + 8, linesize);
  526. }
  527. if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
  528. pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
  529. pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
  530. }
  531. }
  532. /**
  533. * h263 chroma 4mv motion compensation.
  534. */
  535. static void chroma_4mv_motion(MpegEncContext *s,
  536. uint8_t *dest_cb, uint8_t *dest_cr,
  537. uint8_t **ref_picture,
  538. op_pixels_func *pix_op,
  539. int mx, int my)
  540. {
  541. uint8_t *ptr;
  542. int src_x, src_y, dxy, emu = 0;
  543. ptrdiff_t offset;
  544. /* In case of 8X8, we construct a single chroma motion vector
  545. * with a special rounding */
  546. mx = ff_h263_round_chroma(mx);
  547. my = ff_h263_round_chroma(my);
  548. dxy = ((my & 1) << 1) | (mx & 1);
  549. mx >>= 1;
  550. my >>= 1;
  551. src_x = s->mb_x * 8 + mx;
  552. src_y = s->mb_y * 8 + my;
  553. src_x = av_clip(src_x, -8, (s->width >> 1));
  554. if (src_x == (s->width >> 1))
  555. dxy &= ~1;
  556. src_y = av_clip(src_y, -8, (s->height >> 1));
  557. if (src_y == (s->height >> 1))
  558. dxy &= ~2;
  559. offset = src_y * s->uvlinesize + src_x;
  560. ptr = ref_picture[1] + offset;
  561. if ((unsigned)src_x > FFMAX((s->h_edge_pos >> 1) - (dxy & 1) - 8, 0) ||
  562. (unsigned)src_y > FFMAX((s->v_edge_pos >> 1) - (dxy >> 1) - 8, 0)) {
  563. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr,
  564. s->uvlinesize, s->uvlinesize,
  565. 9, 9, src_x, src_y,
  566. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  567. ptr = s->edge_emu_buffer;
  568. emu = 1;
  569. }
  570. pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
  571. ptr = ref_picture[2] + offset;
  572. if (emu) {
  573. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr,
  574. s->uvlinesize, s->uvlinesize,
  575. 9, 9, src_x, src_y,
  576. s->h_edge_pos >> 1, s->v_edge_pos >> 1);
  577. ptr = s->edge_emu_buffer;
  578. }
  579. pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
  580. }
  581. static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir)
  582. {
  583. /* fetch pixels for estimated mv 4 macroblocks ahead
  584. * optimized for 64byte cache lines */
  585. const int shift = s->quarter_sample ? 2 : 1;
  586. const int mx = (s->mv[dir][0][0] >> shift) + 16 * s->mb_x + 8;
  587. const int my = (s->mv[dir][0][1] >> shift) + 16 * s->mb_y;
  588. int off = mx + (my + (s->mb_x & 3) * 4) * s->linesize + 64;
  589. s->vdsp.prefetch(pix[0] + off, s->linesize, 4);
  590. off = (mx >> 1) + ((my >> 1) + (s->mb_x & 7)) * s->uvlinesize + 64;
  591. s->vdsp.prefetch(pix[1] + off, pix[2] - pix[1], 2);
  592. }
  593. static inline void apply_obmc(MpegEncContext *s,
  594. uint8_t *dest_y,
  595. uint8_t *dest_cb,
  596. uint8_t *dest_cr,
  597. uint8_t **ref_picture,
  598. op_pixels_func (*pix_op)[4])
  599. {
  600. LOCAL_ALIGNED_8(int16_t, mv_cache, [4], [4][2]);
  601. Picture *cur_frame = &s->current_picture;
  602. int mb_x = s->mb_x;
  603. int mb_y = s->mb_y;
  604. const int xy = mb_x + mb_y * s->mb_stride;
  605. const int mot_stride = s->b8_stride;
  606. const int mot_xy = mb_x * 2 + mb_y * 2 * mot_stride;
  607. int mx, my, i;
  608. assert(!s->mb_skipped);
  609. AV_COPY32(mv_cache[1][1], cur_frame->motion_val[0][mot_xy]);
  610. AV_COPY32(mv_cache[1][2], cur_frame->motion_val[0][mot_xy + 1]);
  611. AV_COPY32(mv_cache[2][1],
  612. cur_frame->motion_val[0][mot_xy + mot_stride]);
  613. AV_COPY32(mv_cache[2][2],
  614. cur_frame->motion_val[0][mot_xy + mot_stride + 1]);
  615. AV_COPY32(mv_cache[3][1],
  616. cur_frame->motion_val[0][mot_xy + mot_stride]);
  617. AV_COPY32(mv_cache[3][2],
  618. cur_frame->motion_val[0][mot_xy + mot_stride + 1]);
  619. if (mb_y == 0 || IS_INTRA(cur_frame->mb_type[xy - s->mb_stride])) {
  620. AV_COPY32(mv_cache[0][1], mv_cache[1][1]);
  621. AV_COPY32(mv_cache[0][2], mv_cache[1][2]);
  622. } else {
  623. AV_COPY32(mv_cache[0][1],
  624. cur_frame->motion_val[0][mot_xy - mot_stride]);
  625. AV_COPY32(mv_cache[0][2],
  626. cur_frame->motion_val[0][mot_xy - mot_stride + 1]);
  627. }
  628. if (mb_x == 0 || IS_INTRA(cur_frame->mb_type[xy - 1])) {
  629. AV_COPY32(mv_cache[1][0], mv_cache[1][1]);
  630. AV_COPY32(mv_cache[2][0], mv_cache[2][1]);
  631. } else {
  632. AV_COPY32(mv_cache[1][0], cur_frame->motion_val[0][mot_xy - 1]);
  633. AV_COPY32(mv_cache[2][0],
  634. cur_frame->motion_val[0][mot_xy - 1 + mot_stride]);
  635. }
  636. if (mb_x + 1 >= s->mb_width || IS_INTRA(cur_frame->mb_type[xy + 1])) {
  637. AV_COPY32(mv_cache[1][3], mv_cache[1][2]);
  638. AV_COPY32(mv_cache[2][3], mv_cache[2][2]);
  639. } else {
  640. AV_COPY32(mv_cache[1][3], cur_frame->motion_val[0][mot_xy + 2]);
  641. AV_COPY32(mv_cache[2][3],
  642. cur_frame->motion_val[0][mot_xy + 2 + mot_stride]);
  643. }
  644. mx = 0;
  645. my = 0;
  646. for (i = 0; i < 4; i++) {
  647. const int x = (i & 1) + 1;
  648. const int y = (i >> 1) + 1;
  649. int16_t mv[5][2] = {
  650. { mv_cache[y][x][0], mv_cache[y][x][1] },
  651. { mv_cache[y - 1][x][0], mv_cache[y - 1][x][1] },
  652. { mv_cache[y][x - 1][0], mv_cache[y][x - 1][1] },
  653. { mv_cache[y][x + 1][0], mv_cache[y][x + 1][1] },
  654. { mv_cache[y + 1][x][0], mv_cache[y + 1][x][1] }
  655. };
  656. // FIXME cleanup
  657. obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
  658. ref_picture[0],
  659. mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8,
  660. pix_op[1],
  661. mv);
  662. mx += mv[0][0];
  663. my += mv[0][1];
  664. }
  665. if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY))
  666. chroma_4mv_motion(s, dest_cb, dest_cr,
  667. ref_picture, pix_op[1],
  668. mx, my);
  669. }
  670. static inline void apply_8x8(MpegEncContext *s,
  671. uint8_t *dest_y,
  672. uint8_t *dest_cb,
  673. uint8_t *dest_cr,
  674. int dir,
  675. uint8_t **ref_picture,
  676. qpel_mc_func (*qpix_op)[16],
  677. op_pixels_func (*pix_op)[4])
  678. {
  679. int dxy, mx, my, src_x, src_y;
  680. int i;
  681. int mb_x = s->mb_x;
  682. int mb_y = s->mb_y;
  683. uint8_t *ptr, *dest;
  684. mx = 0;
  685. my = 0;
  686. if (s->quarter_sample) {
  687. for (i = 0; i < 4; i++) {
  688. int motion_x = s->mv[dir][i][0];
  689. int motion_y = s->mv[dir][i][1];
  690. dxy = ((motion_y & 3) << 2) | (motion_x & 3);
  691. src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
  692. src_y = mb_y * 16 + (motion_y >> 2) + (i >> 1) * 8;
  693. /* WARNING: do no forget half pels */
  694. src_x = av_clip(src_x, -16, s->width);
  695. if (src_x == s->width)
  696. dxy &= ~3;
  697. src_y = av_clip(src_y, -16, s->height);
  698. if (src_y == s->height)
  699. dxy &= ~12;
  700. ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
  701. if ((unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x & 3) - 8, 0) ||
  702. (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y & 3) - 8, 0)) {
  703. s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr,
  704. s->linesize, s->linesize,
  705. 9, 9,
  706. src_x, src_y,
  707. s->h_edge_pos,
  708. s->v_edge_pos);
  709. ptr = s->edge_emu_buffer;
  710. }
  711. dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
  712. qpix_op[1][dxy](dest, ptr, s->linesize);
  713. mx += s->mv[dir][i][0] / 2;
  714. my += s->mv[dir][i][1] / 2;
  715. }
  716. } else {
  717. for (i = 0; i < 4; i++) {
  718. hpel_motion(s,
  719. dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
  720. ref_picture[0],
  721. mb_x * 16 + (i & 1) * 8,
  722. mb_y * 16 + (i >> 1) * 8,
  723. pix_op[1],
  724. s->mv[dir][i][0],
  725. s->mv[dir][i][1]);
  726. mx += s->mv[dir][i][0];
  727. my += s->mv[dir][i][1];
  728. }
  729. }
  730. if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY))
  731. chroma_4mv_motion(s, dest_cb, dest_cr,
  732. ref_picture, pix_op[1], mx, my);
  733. }
  734. /**
  735. * motion compensation of a single macroblock
  736. * @param s context
  737. * @param dest_y luma destination pointer
  738. * @param dest_cb chroma cb/u destination pointer
  739. * @param dest_cr chroma cr/v destination pointer
  740. * @param dir direction (0->forward, 1->backward)
  741. * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
  742. * @param pix_op halfpel motion compensation function (average or put normally)
  743. * @param qpix_op qpel motion compensation function (average or put normally)
  744. * the motion vectors are taken from s->mv and the MV type from s->mv_type
  745. */
  746. static av_always_inline void MPV_motion_internal(MpegEncContext *s,
  747. uint8_t *dest_y,
  748. uint8_t *dest_cb,
  749. uint8_t *dest_cr,
  750. int dir,
  751. uint8_t **ref_picture,
  752. op_pixels_func (*pix_op)[4],
  753. qpel_mc_func (*qpix_op)[16],
  754. int is_mpeg12)
  755. {
  756. int i;
  757. int mb_y = s->mb_y;
  758. prefetch_motion(s, ref_picture, dir);
  759. if (!is_mpeg12 && s->obmc && s->pict_type != AV_PICTURE_TYPE_B) {
  760. apply_obmc(s, dest_y, dest_cb, dest_cr, ref_picture, pix_op);
  761. return;
  762. }
  763. switch (s->mv_type) {
  764. case MV_TYPE_16X16:
  765. if (s->mcsel) {
  766. if (s->real_sprite_warping_points == 1) {
  767. gmc1_motion(s, dest_y, dest_cb, dest_cr,
  768. ref_picture);
  769. } else {
  770. gmc_motion(s, dest_y, dest_cb, dest_cr,
  771. ref_picture);
  772. }
  773. } else if (!is_mpeg12 && s->quarter_sample) {
  774. qpel_motion(s, dest_y, dest_cb, dest_cr,
  775. 0, 0, 0,
  776. ref_picture, pix_op, qpix_op,
  777. s->mv[dir][0][0], s->mv[dir][0][1], 16);
  778. } else if (!is_mpeg12 && (CONFIG_WMV2_DECODER || CONFIG_WMV2_ENCODER) &&
  779. s->mspel && s->codec_id == AV_CODEC_ID_WMV2) {
  780. ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
  781. ref_picture, pix_op,
  782. s->mv[dir][0][0], s->mv[dir][0][1], 16);
  783. } else {
  784. mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
  785. ref_picture, pix_op,
  786. s->mv[dir][0][0], s->mv[dir][0][1], 16, mb_y);
  787. }
  788. break;
  789. case MV_TYPE_8X8:
  790. if (!is_mpeg12)
  791. apply_8x8(s, dest_y, dest_cb, dest_cr,
  792. dir, ref_picture, qpix_op, pix_op);
  793. break;
  794. case MV_TYPE_FIELD:
  795. if (s->picture_structure == PICT_FRAME) {
  796. if (!is_mpeg12 && s->quarter_sample) {
  797. for (i = 0; i < 2; i++)
  798. qpel_motion(s, dest_y, dest_cb, dest_cr,
  799. 1, i, s->field_select[dir][i],
  800. ref_picture, pix_op, qpix_op,
  801. s->mv[dir][i][0], s->mv[dir][i][1], 8);
  802. } else {
  803. /* top field */
  804. mpeg_motion_field(s, dest_y, dest_cb, dest_cr,
  805. 0, s->field_select[dir][0],
  806. ref_picture, pix_op,
  807. s->mv[dir][0][0], s->mv[dir][0][1], 8, mb_y);
  808. /* bottom field */
  809. mpeg_motion_field(s, dest_y, dest_cb, dest_cr,
  810. 1, s->field_select[dir][1],
  811. ref_picture, pix_op,
  812. s->mv[dir][1][0], s->mv[dir][1][1], 8, mb_y);
  813. }
  814. } else {
  815. if (s->picture_structure != s->field_select[dir][0] + 1 &&
  816. s->pict_type != AV_PICTURE_TYPE_B && !s->first_field) {
  817. ref_picture = s->current_picture_ptr->f.data;
  818. }
  819. mpeg_motion(s, dest_y, dest_cb, dest_cr,
  820. s->field_select[dir][0],
  821. ref_picture, pix_op,
  822. s->mv[dir][0][0], s->mv[dir][0][1], 16, mb_y >> 1);
  823. }
  824. break;
  825. case MV_TYPE_16X8:
  826. for (i = 0; i < 2; i++) {
  827. uint8_t **ref2picture;
  828. if (s->picture_structure == s->field_select[dir][i] + 1
  829. || s->pict_type == AV_PICTURE_TYPE_B || s->first_field) {
  830. ref2picture = ref_picture;
  831. } else {
  832. ref2picture = s->current_picture_ptr->f.data;
  833. }
  834. mpeg_motion(s, dest_y, dest_cb, dest_cr,
  835. s->field_select[dir][i],
  836. ref2picture, pix_op,
  837. s->mv[dir][i][0], s->mv[dir][i][1] + 16 * i,
  838. 8, mb_y >> 1);
  839. dest_y += 16 * s->linesize;
  840. dest_cb += (16 >> s->chroma_y_shift) * s->uvlinesize;
  841. dest_cr += (16 >> s->chroma_y_shift) * s->uvlinesize;
  842. }
  843. break;
  844. case MV_TYPE_DMV:
  845. if (s->picture_structure == PICT_FRAME) {
  846. for (i = 0; i < 2; i++) {
  847. int j;
  848. for (j = 0; j < 2; j++)
  849. mpeg_motion_field(s, dest_y, dest_cb, dest_cr,
  850. j, j ^ i, ref_picture, pix_op,
  851. s->mv[dir][2 * i + j][0],
  852. s->mv[dir][2 * i + j][1], 8, mb_y);
  853. pix_op = s->hdsp.avg_pixels_tab;
  854. }
  855. } else {
  856. for (i = 0; i < 2; i++) {
  857. mpeg_motion(s, dest_y, dest_cb, dest_cr,
  858. s->picture_structure != i + 1,
  859. ref_picture, pix_op,
  860. s->mv[dir][2 * i][0], s->mv[dir][2 * i][1],
  861. 16, mb_y >> 1);
  862. // after put we make avg of the same block
  863. pix_op = s->hdsp.avg_pixels_tab;
  864. /* opposite parity is always in the same frame if this is
  865. * second field */
  866. if (!s->first_field) {
  867. ref_picture = s->current_picture_ptr->f.data;
  868. }
  869. }
  870. }
  871. break;
  872. default: assert(0);
  873. }
  874. }
  875. void ff_MPV_motion(MpegEncContext *s,
  876. uint8_t *dest_y, uint8_t *dest_cb,
  877. uint8_t *dest_cr, int dir,
  878. uint8_t **ref_picture,
  879. op_pixels_func (*pix_op)[4],
  880. qpel_mc_func (*qpix_op)[16])
  881. {
  882. #if !CONFIG_SMALL
  883. if (s->out_format == FMT_MPEG1)
  884. MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
  885. ref_picture, pix_op, qpix_op, 1);
  886. else
  887. #endif
  888. MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
  889. ref_picture, pix_op, qpix_op, 0);
  890. }