You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1509 lines
50KB

  1. /*
  2. * Motion estimation
  3. * Copyright (c) 2000,2001 Fabrice Bellard.
  4. * Copyright (c) 2002 Michael Niedermayer
  5. *
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
  22. */
  23. #include <stdlib.h>
  24. #include <stdio.h>
  25. #include "avcodec.h"
  26. #include "dsputil.h"
  27. #include "mpegvideo.h"
  28. //#undef NDEBUG
  29. //#include <assert.h>
  30. #define SQ(a) ((a)*(a))
  31. #define P_LAST P[0]
  32. #define P_LEFT P[1]
  33. #define P_TOP P[2]
  34. #define P_TOPRIGHT P[3]
  35. #define P_MEDIAN P[4]
  36. #define P_LAST_LEFT P[5]
  37. #define P_LAST_RIGHT P[6]
  38. #define P_LAST_TOP P[7]
  39. #define P_LAST_BOTTOM P[8]
  40. #define P_MV1 P[9]
  41. static inline int sad_hpel_motion_search(MpegEncContext * s,
  42. int *mx_ptr, int *my_ptr, int dmin,
  43. int xmin, int ymin, int xmax, int ymax,
  44. int pred_x, int pred_y, Picture *picture,
  45. int n, int size, uint16_t * const mv_penalty);
  46. static inline int update_map_generation(MpegEncContext * s)
  47. {
  48. s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2);
  49. if(s->me.map_generation==0){
  50. s->me.map_generation= 1<<(ME_MAP_MV_BITS*2);
  51. memset(s->me.map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
  52. }
  53. return s->me.map_generation;
  54. }
  55. /* SIMPLE */
  56. #define RENAME(a) simple_ ## a
  57. #define CMP(d, x, y, size)\
  58. d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);
  59. #define CMP_HPEL(d, dx, dy, x, y, size)\
  60. {\
  61. const int dxy= (dx) + 2*(dy);\
  62. hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
  63. d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
  64. }
  65. #define CMP_QPEL(d, dx, dy, x, y, size)\
  66. {\
  67. const int dxy= (dx) + 4*(dy);\
  68. qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
  69. d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
  70. }
  71. #include "motion_est_template.c"
  72. #undef RENAME
  73. #undef CMP
  74. #undef CMP_HPEL
  75. #undef CMP_QPEL
  76. #undef INIT
  77. /* SIMPLE CHROMA */
  78. #define RENAME(a) simple_chroma_ ## a
  79. #define CMP(d, x, y, size)\
  80. d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\
  81. if(chroma_cmp){\
  82. int dxy= ((x)&1) + 2*((y)&1);\
  83. int c= ((x)>>1) + ((y)>>1)*uvstride;\
  84. \
  85. chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
  86. d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\
  87. chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
  88. d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\
  89. }
  90. #define CMP_HPEL(d, dx, dy, x, y, size)\
  91. {\
  92. const int dxy= (dx) + 2*(dy);\
  93. hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
  94. d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
  95. if(chroma_cmp_sub){\
  96. int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
  97. int c= ((x)>>1) + ((y)>>1)*uvstride;\
  98. chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
  99. d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
  100. chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
  101. d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
  102. }\
  103. }
  104. #define CMP_QPEL(d, dx, dy, x, y, size)\
  105. {\
  106. const int dxy= (dx) + 4*(dy);\
  107. qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
  108. d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
  109. if(chroma_cmp_sub){\
  110. int cxy, c;\
  111. int cx= (4*(x) + (dx))/2;\
  112. int cy= (4*(y) + (dy))/2;\
  113. cx= (cx>>1)|(cx&1);\
  114. cy= (cy>>1)|(cy&1);\
  115. cxy= (cx&1) + 2*(cy&1);\
  116. c= ((cx)>>1) + ((cy)>>1)*uvstride;\
  117. chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
  118. d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
  119. chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
  120. d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
  121. }\
  122. }
  123. #include "motion_est_template.c"
  124. #undef RENAME
  125. #undef CMP
  126. #undef CMP_HPEL
  127. #undef CMP_QPEL
  128. #undef INIT
  129. /* SIMPLE DIRECT HPEL */
  130. #define RENAME(a) simple_direct_hpel_ ## a
  131. //FIXME precalc divisions stuff
  132. #define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
  133. if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*ymax){\
  134. const int hx= 2*(x) + (dx);\
  135. const int hy= 2*(y) + (dy);\
  136. if(s->mv_type==MV_TYPE_8X8){\
  137. int i;\
  138. for(i=0; i<4; i++){\
  139. int fx = s->me.direct_basis_mv[i][0] + hx;\
  140. int fy = s->me.direct_basis_mv[i][1] + hy;\
  141. int bx = hx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
  142. int by = hy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
  143. int fxy= (fx&1) + 2*(fy&1);\
  144. int bxy= (bx&1) + 2*(by&1);\
  145. \
  146. uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
  147. hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
  148. hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\
  149. }\
  150. }else{\
  151. int fx = s->me.direct_basis_mv[0][0] + hx;\
  152. int fy = s->me.direct_basis_mv[0][1] + hy;\
  153. int bx = hx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
  154. int by = hy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
  155. int fxy= (fx&1) + 2*(fy&1);\
  156. int bxy= (bx&1) + 2*(by&1);\
  157. \
  158. hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
  159. hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\
  160. }\
  161. d = cmp_func(s, s->me.scratchpad, src_y, stride);\
  162. }else\
  163. d= 256*256*256*32;
  164. #define CMP_HPEL(d, dx, dy, x, y, size)\
  165. CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
  166. #define CMP(d, x, y, size)\
  167. CMP_DIRECT(d, 0, 0, x, y, size, cmp)
  168. #include "motion_est_template.c"
  169. #undef RENAME
  170. #undef CMP
  171. #undef CMP_HPEL
  172. #undef CMP_QPEL
  173. #undef INIT
  174. #undef CMP_DIRECT
  175. /* SIMPLE DIRECT QPEL */
  176. #define RENAME(a) simple_direct_qpel_ ## a
  177. #define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
  178. if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*ymax){\
  179. const int qx= 4*(x) + (dx);\
  180. const int qy= 4*(y) + (dy);\
  181. if(s->mv_type==MV_TYPE_8X8){\
  182. int i;\
  183. for(i=0; i<4; i++){\
  184. int fx = s->me.direct_basis_mv[i][0] + qx;\
  185. int fy = s->me.direct_basis_mv[i][1] + qy;\
  186. int bx = qx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
  187. int by = qy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
  188. int fxy= (fx&3) + 4*(fy&3);\
  189. int bxy= (bx&3) + 4*(by&3);\
  190. \
  191. uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
  192. qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
  193. qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
  194. }\
  195. }else{\
  196. int fx = s->me.direct_basis_mv[0][0] + qx;\
  197. int fy = s->me.direct_basis_mv[0][1] + qy;\
  198. int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
  199. int by = qy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
  200. int fxy= (fx&3) + 4*(fy&3);\
  201. int bxy= (bx&3) + 4*(by&3);\
  202. \
  203. qpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
  204. qpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
  205. }\
  206. d = cmp_func(s, s->me.scratchpad, src_y, stride);\
  207. }else\
  208. d= 256*256*256*32;
  209. #define CMP_QPEL(d, dx, dy, x, y, size)\
  210. CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
  211. #define CMP(d, x, y, size)\
  212. CMP_DIRECT(d, 0, 0, x, y, size, cmp)
  213. #include "motion_est_template.c"
  214. #undef RENAME
  215. #undef CMP
  216. #undef CMP_HPEL
  217. #undef CMP_QPEL
  218. #undef INIT
  219. #undef CMP__DIRECT
  220. static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){
  221. return 0;
  222. }
  223. static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
  224. DSPContext* c= &s->dsp;
  225. int i;
  226. memset(cmp, 0, sizeof(void*)*11);
  227. switch(type&0xFF){
  228. case FF_CMP_SAD:
  229. cmp[0]= c->sad[0];
  230. cmp[1]= c->sad[1];
  231. break;
  232. case FF_CMP_SATD:
  233. cmp[0]= c->hadamard8_diff[0];
  234. cmp[1]= c->hadamard8_diff[1];
  235. break;
  236. case FF_CMP_SSE:
  237. cmp[0]= c->sse[0];
  238. cmp[1]= c->sse[1];
  239. break;
  240. case FF_CMP_DCT:
  241. cmp[0]= c->dct_sad[0];
  242. cmp[1]= c->dct_sad[1];
  243. break;
  244. case FF_CMP_PSNR:
  245. cmp[0]= c->quant_psnr[0];
  246. cmp[1]= c->quant_psnr[1];
  247. break;
  248. case FF_CMP_ZERO:
  249. for(i=0; i<7; i++){
  250. cmp[i]= zero_cmp;
  251. }
  252. break;
  253. default:
  254. fprintf(stderr,"internal error in cmp function selection\n");
  255. }
  256. };
  257. static inline int get_penalty_factor(MpegEncContext *s, int type){
  258. switch(type){
  259. default:
  260. case FF_CMP_SAD:
  261. return s->qscale;
  262. case FF_CMP_SSE:
  263. // return s->qscale*8;
  264. case FF_CMP_DCT:
  265. case FF_CMP_SATD:
  266. return s->qscale*8;
  267. }
  268. }
  269. void ff_init_me(MpegEncContext *s){
  270. set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp);
  271. set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
  272. set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp);
  273. if(s->flags&CODEC_FLAG_QPEL){
  274. if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
  275. s->me.sub_motion_search= simple_chroma_qpel_motion_search;
  276. else
  277. s->me.sub_motion_search= simple_qpel_motion_search;
  278. }else{
  279. if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
  280. s->me.sub_motion_search= simple_chroma_hpel_motion_search;
  281. else if(s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx->me_cmp == FF_CMP_SAD)
  282. s->me.sub_motion_search= sad_hpel_motion_search;
  283. else
  284. s->me.sub_motion_search= simple_hpel_motion_search;
  285. }
  286. if(s->avctx->me_cmp&FF_CMP_CHROMA){
  287. s->me.motion_search[0]= simple_chroma_epzs_motion_search;
  288. s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
  289. }else{
  290. s->me.motion_search[0]= simple_epzs_motion_search;
  291. s->me.motion_search[1]= simple_epzs_motion_search4;
  292. }
  293. }
  294. static int pix_dev(UINT8 * pix, int line_size, int mean)
  295. {
  296. int s, i, j;
  297. s = 0;
  298. for (i = 0; i < 16; i++) {
  299. for (j = 0; j < 16; j += 8) {
  300. s += ABS(pix[0]-mean);
  301. s += ABS(pix[1]-mean);
  302. s += ABS(pix[2]-mean);
  303. s += ABS(pix[3]-mean);
  304. s += ABS(pix[4]-mean);
  305. s += ABS(pix[5]-mean);
  306. s += ABS(pix[6]-mean);
  307. s += ABS(pix[7]-mean);
  308. pix += 8;
  309. }
  310. pix += line_size - 16;
  311. }
  312. return s;
  313. }
  314. static inline void no_motion_search(MpegEncContext * s,
  315. int *mx_ptr, int *my_ptr)
  316. {
  317. *mx_ptr = 16 * s->mb_x;
  318. *my_ptr = 16 * s->mb_y;
  319. }
  320. static int full_motion_search(MpegEncContext * s,
  321. int *mx_ptr, int *my_ptr, int range,
  322. int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
  323. {
  324. int x1, y1, x2, y2, xx, yy, x, y;
  325. int mx, my, dmin, d;
  326. UINT8 *pix;
  327. xx = 16 * s->mb_x;
  328. yy = 16 * s->mb_y;
  329. x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
  330. if (x1 < xmin)
  331. x1 = xmin;
  332. x2 = xx + range - 1;
  333. if (x2 > xmax)
  334. x2 = xmax;
  335. y1 = yy - range + 1;
  336. if (y1 < ymin)
  337. y1 = ymin;
  338. y2 = yy + range - 1;
  339. if (y2 > ymax)
  340. y2 = ymax;
  341. pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
  342. dmin = 0x7fffffff;
  343. mx = 0;
  344. my = 0;
  345. for (y = y1; y <= y2; y++) {
  346. for (x = x1; x <= x2; x++) {
  347. d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
  348. s->linesize);
  349. if (d < dmin ||
  350. (d == dmin &&
  351. (abs(x - xx) + abs(y - yy)) <
  352. (abs(mx - xx) + abs(my - yy)))) {
  353. dmin = d;
  354. mx = x;
  355. my = y;
  356. }
  357. }
  358. }
  359. *mx_ptr = mx;
  360. *my_ptr = my;
  361. #if 0
  362. if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
  363. *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
  364. fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
  365. }
  366. #endif
  367. return dmin;
  368. }
  369. static int log_motion_search(MpegEncContext * s,
  370. int *mx_ptr, int *my_ptr, int range,
  371. int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
  372. {
  373. int x1, y1, x2, y2, xx, yy, x, y;
  374. int mx, my, dmin, d;
  375. UINT8 *pix;
  376. xx = s->mb_x << 4;
  377. yy = s->mb_y << 4;
  378. /* Left limit */
  379. x1 = xx - range;
  380. if (x1 < xmin)
  381. x1 = xmin;
  382. /* Right limit */
  383. x2 = xx + range;
  384. if (x2 > xmax)
  385. x2 = xmax;
  386. /* Upper limit */
  387. y1 = yy - range;
  388. if (y1 < ymin)
  389. y1 = ymin;
  390. /* Lower limit */
  391. y2 = yy + range;
  392. if (y2 > ymax)
  393. y2 = ymax;
  394. pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
  395. dmin = 0x7fffffff;
  396. mx = 0;
  397. my = 0;
  398. do {
  399. for (y = y1; y <= y2; y += range) {
  400. for (x = x1; x <= x2; x += range) {
  401. d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
  402. if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
  403. dmin = d;
  404. mx = x;
  405. my = y;
  406. }
  407. }
  408. }
  409. range = range >> 1;
  410. x1 = mx - range;
  411. if (x1 < xmin)
  412. x1 = xmin;
  413. x2 = mx + range;
  414. if (x2 > xmax)
  415. x2 = xmax;
  416. y1 = my - range;
  417. if (y1 < ymin)
  418. y1 = ymin;
  419. y2 = my + range;
  420. if (y2 > ymax)
  421. y2 = ymax;
  422. } while (range >= 1);
  423. #ifdef DEBUG
  424. fprintf(stderr, "log - MX: %d\tMY: %d\n", mx, my);
  425. #endif
  426. *mx_ptr = mx;
  427. *my_ptr = my;
  428. return dmin;
  429. }
  430. static int phods_motion_search(MpegEncContext * s,
  431. int *mx_ptr, int *my_ptr, int range,
  432. int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
  433. {
  434. int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
  435. int mx, my, dminx, dminy;
  436. UINT8 *pix;
  437. xx = s->mb_x << 4;
  438. yy = s->mb_y << 4;
  439. /* Left limit */
  440. x1 = xx - range;
  441. if (x1 < xmin)
  442. x1 = xmin;
  443. /* Right limit */
  444. x2 = xx + range;
  445. if (x2 > xmax)
  446. x2 = xmax;
  447. /* Upper limit */
  448. y1 = yy - range;
  449. if (y1 < ymin)
  450. y1 = ymin;
  451. /* Lower limit */
  452. y2 = yy + range;
  453. if (y2 > ymax)
  454. y2 = ymax;
  455. pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
  456. mx = 0;
  457. my = 0;
  458. x = xx;
  459. y = yy;
  460. do {
  461. dminx = 0x7fffffff;
  462. dminy = 0x7fffffff;
  463. lastx = x;
  464. for (x = x1; x <= x2; x += range) {
  465. d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
  466. if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
  467. dminx = d;
  468. mx = x;
  469. }
  470. }
  471. x = lastx;
  472. for (y = y1; y <= y2; y += range) {
  473. d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
  474. if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
  475. dminy = d;
  476. my = y;
  477. }
  478. }
  479. range = range >> 1;
  480. x = mx;
  481. y = my;
  482. x1 = mx - range;
  483. if (x1 < xmin)
  484. x1 = xmin;
  485. x2 = mx + range;
  486. if (x2 > xmax)
  487. x2 = xmax;
  488. y1 = my - range;
  489. if (y1 < ymin)
  490. y1 = ymin;
  491. y2 = my + range;
  492. if (y2 > ymax)
  493. y2 = ymax;
  494. } while (range >= 1);
  495. #ifdef DEBUG
  496. fprintf(stderr, "phods - MX: %d\tMY: %d\n", mx, my);
  497. #endif
  498. /* half pixel search */
  499. *mx_ptr = mx;
  500. *my_ptr = my;
  501. return dminy;
  502. }
  503. #define Z_THRESHOLD 256
  504. #define CHECK_SAD_HALF_MV(suffix, x, y) \
  505. {\
  506. d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
  507. d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
  508. COPY3_IF_LT(dminh, d, dx, x, dy, y)\
  509. }
  510. static inline int sad_hpel_motion_search(MpegEncContext * s,
  511. int *mx_ptr, int *my_ptr, int dmin,
  512. int xmin, int ymin, int xmax, int ymax,
  513. int pred_x, int pred_y, Picture *picture,
  514. int n, int size, uint16_t * const mv_penalty)
  515. {
  516. uint8_t *ref_picture= picture->data[0];
  517. uint32_t *score_map= s->me.score_map;
  518. const int penalty_factor= s->me.sub_penalty_factor;
  519. int mx, my, xx, yy, dminh;
  520. UINT8 *pix, *ptr;
  521. op_pixels_abs_func pix_abs_x2;
  522. op_pixels_abs_func pix_abs_y2;
  523. op_pixels_abs_func pix_abs_xy2;
  524. if(size==0){
  525. pix_abs_x2 = s->dsp.pix_abs16x16_x2;
  526. pix_abs_y2 = s->dsp.pix_abs16x16_y2;
  527. pix_abs_xy2= s->dsp.pix_abs16x16_xy2;
  528. }else{
  529. pix_abs_x2 = s->dsp.pix_abs8x8_x2;
  530. pix_abs_y2 = s->dsp.pix_abs8x8_y2;
  531. pix_abs_xy2= s->dsp.pix_abs8x8_xy2;
  532. }
  533. if(s->me.skip){
  534. // printf("S");
  535. *mx_ptr = 0;
  536. *my_ptr = 0;
  537. return dmin;
  538. }
  539. // printf("N");
  540. xx = 16 * s->mb_x + 8*(n&1);
  541. yy = 16 * s->mb_y + 8*(n>>1);
  542. pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
  543. mx = *mx_ptr;
  544. my = *my_ptr;
  545. ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
  546. dminh = dmin;
  547. if (mx > xmin && mx < xmax &&
  548. my > ymin && my < ymax) {
  549. int dx=0, dy=0;
  550. int d, pen_x, pen_y;
  551. const int index= (my<<ME_MAP_SHIFT) + mx;
  552. const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
  553. const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
  554. const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
  555. const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
  556. mx<<=1;
  557. my<<=1;
  558. pen_x= pred_x + mx;
  559. pen_y= pred_y + my;
  560. ptr-= s->linesize;
  561. if(t<=b){
  562. CHECK_SAD_HALF_MV(y2 , 0, -1)
  563. if(l<=r){
  564. CHECK_SAD_HALF_MV(xy2, -1, -1)
  565. if(t+r<=b+l){
  566. CHECK_SAD_HALF_MV(xy2, +1, -1)
  567. ptr+= s->linesize;
  568. }else{
  569. ptr+= s->linesize;
  570. CHECK_SAD_HALF_MV(xy2, -1, +1)
  571. }
  572. CHECK_SAD_HALF_MV(x2 , -1, 0)
  573. }else{
  574. CHECK_SAD_HALF_MV(xy2, +1, -1)
  575. if(t+l<=b+r){
  576. CHECK_SAD_HALF_MV(xy2, -1, -1)
  577. ptr+= s->linesize;
  578. }else{
  579. ptr+= s->linesize;
  580. CHECK_SAD_HALF_MV(xy2, +1, +1)
  581. }
  582. CHECK_SAD_HALF_MV(x2 , +1, 0)
  583. }
  584. }else{
  585. if(l<=r){
  586. if(t+l<=b+r){
  587. CHECK_SAD_HALF_MV(xy2, -1, -1)
  588. ptr+= s->linesize;
  589. }else{
  590. ptr+= s->linesize;
  591. CHECK_SAD_HALF_MV(xy2, +1, +1)
  592. }
  593. CHECK_SAD_HALF_MV(x2 , -1, 0)
  594. CHECK_SAD_HALF_MV(xy2, -1, +1)
  595. }else{
  596. if(t+r<=b+l){
  597. CHECK_SAD_HALF_MV(xy2, +1, -1)
  598. ptr+= s->linesize;
  599. }else{
  600. ptr+= s->linesize;
  601. CHECK_SAD_HALF_MV(xy2, -1, +1)
  602. }
  603. CHECK_SAD_HALF_MV(x2 , +1, 0)
  604. CHECK_SAD_HALF_MV(xy2, +1, +1)
  605. }
  606. CHECK_SAD_HALF_MV(y2 , 0, +1)
  607. }
  608. mx+=dx;
  609. my+=dy;
  610. }else{
  611. mx<<=1;
  612. my<<=1;
  613. }
  614. *mx_ptr = mx;
  615. *my_ptr = my;
  616. return dminh;
  617. }
  618. static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
  619. {
  620. const int xy= s->mb_x + 1 + (s->mb_y + 1)*(s->mb_width + 2);
  621. s->p_mv_table[xy][0] = mx;
  622. s->p_mv_table[xy][1] = my;
  623. /* has allready been set to the 4 MV if 4MV is done */
  624. if(mv4){
  625. int mot_xy= s->block_index[0];
  626. s->motion_val[mot_xy ][0]= mx;
  627. s->motion_val[mot_xy ][1]= my;
  628. s->motion_val[mot_xy+1][0]= mx;
  629. s->motion_val[mot_xy+1][1]= my;
  630. mot_xy += s->block_wrap[0];
  631. s->motion_val[mot_xy ][0]= mx;
  632. s->motion_val[mot_xy ][1]= my;
  633. s->motion_val[mot_xy+1][0]= mx;
  634. s->motion_val[mot_xy+1][1]= my;
  635. }
  636. }
  637. static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax, int f_code)
  638. {
  639. *range = 8 * (1 << (f_code - 1));
  640. /* XXX: temporary kludge to avoid overflow for msmpeg4 */
  641. if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
  642. *range *= 2;
  643. if (s->unrestricted_mv) {
  644. *xmin = -16;
  645. *ymin = -16;
  646. if (s->h263_plus)
  647. *range *= 2;
  648. if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
  649. *xmax = s->mb_width*16;
  650. *ymax = s->mb_height*16;
  651. }else {
  652. /* XXX: dunno if this is correct but ffmpeg4 decoder wont like it otherwise
  653. (cuz the drawn edge isnt large enough))*/
  654. *xmax = s->width;
  655. *ymax = s->height;
  656. }
  657. } else {
  658. *xmin = 0;
  659. *ymin = 0;
  660. *xmax = s->mb_width*16 - 16;
  661. *ymax = s->mb_height*16 - 16;
  662. }
  663. }
  664. static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift)
  665. {
  666. int block;
  667. int P[10][2];
  668. uint8_t *ref_picture= s->last_picture.data[0];
  669. int dmin_sum=0;
  670. uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
  671. for(block=0; block<4; block++){
  672. int mx4, my4;
  673. int pred_x4, pred_y4;
  674. int dmin4;
  675. static const int off[4]= {2, 1, 1, -1};
  676. const int mot_stride = s->block_wrap[0];
  677. const int mot_xy = s->block_index[block];
  678. // const int block_x= (block&1);
  679. // const int block_y= (block>>1);
  680. #if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way
  681. const int rel_xmin4= xmin;
  682. const int rel_xmax4= xmax;
  683. const int rel_ymin4= ymin;
  684. const int rel_ymax4= ymax;
  685. #else
  686. const int rel_xmin4= xmin - block_x*8;
  687. const int rel_xmax4= xmax - block_x*8 + 8;
  688. const int rel_ymin4= ymin - block_y*8;
  689. const int rel_ymax4= ymax - block_y*8 + 8;
  690. #endif
  691. P_LAST[0] = s->motion_val[mot_xy ][0];
  692. P_LAST[1] = s->motion_val[mot_xy ][1];
  693. P_LEFT[0] = s->motion_val[mot_xy - 1][0];
  694. P_LEFT[1] = s->motion_val[mot_xy - 1][1];
  695. P_LAST_RIGHT[0] = s->motion_val[mot_xy + 1][0];
  696. P_LAST_RIGHT[1] = s->motion_val[mot_xy + 1][1];
  697. P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 1*mot_stride][0];
  698. P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 1*mot_stride][1];
  699. if(P_LEFT[0] > (rel_xmax4<<shift)) P_LEFT[0] = (rel_xmax4<<shift);
  700. if(P_LAST_RIGHT[0] < (rel_xmin4<<shift)) P_LAST_RIGHT[0] = (rel_xmin4<<shift);
  701. if(P_LAST_BOTTOM[1]< (rel_ymin4<<shift)) P_LAST_BOTTOM[1]= (rel_ymin4<<shift);
  702. /* special case for first line */
  703. if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
  704. pred_x4= P_LEFT[0];
  705. pred_y4= P_LEFT[1];
  706. } else {
  707. P_TOP[0] = s->motion_val[mot_xy - mot_stride ][0];
  708. P_TOP[1] = s->motion_val[mot_xy - mot_stride ][1];
  709. P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + off[block]][0];
  710. P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + off[block]][1];
  711. if(P_TOP[1] > (rel_ymax4<<shift)) P_TOP[1] = (rel_ymax4<<shift);
  712. if(P_TOPRIGHT[0] < (rel_xmin4<<shift)) P_TOPRIGHT[0]= (rel_xmin4<<shift);
  713. if(P_TOPRIGHT[0] > (rel_xmax4<<shift)) P_TOPRIGHT[0]= (rel_xmax4<<shift);
  714. if(P_TOPRIGHT[1] > (rel_ymax4<<shift)) P_TOPRIGHT[1]= (rel_ymax4<<shift);
  715. P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
  716. P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
  717. if(s->out_format == FMT_H263){
  718. pred_x4 = P_MEDIAN[0];
  719. pred_y4 = P_MEDIAN[1];
  720. }else { /* mpeg1 at least */
  721. pred_x4= P_LEFT[0];
  722. pred_y4= P_LEFT[1];
  723. }
  724. }
  725. P_MV1[0]= mx;
  726. P_MV1[1]= my;
  727. dmin4 = s->me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
  728. &s->last_picture, mv_penalty);
  729. dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
  730. pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty);
  731. s->motion_val[ s->block_index[block] ][0]= mx4;
  732. s->motion_val[ s->block_index[block] ][1]= my4;
  733. dmin_sum+= dmin4;
  734. }
  735. return dmin_sum;
  736. }
  737. void ff_estimate_p_frame_motion(MpegEncContext * s,
  738. int mb_x, int mb_y)
  739. {
  740. UINT8 *pix, *ppix;
  741. int sum, varc, vard, mx, my, range, dmin, xx, yy;
  742. int xmin, ymin, xmax, ymax;
  743. int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
  744. int pred_x=0, pred_y=0;
  745. int P[10][2];
  746. const int shift= 1+s->quarter_sample;
  747. int mb_type=0;
  748. uint8_t *ref_picture= s->last_picture.data[0];
  749. Picture * const pic= &s->current_picture;
  750. uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
  751. assert(s->quarter_sample==0 || s->quarter_sample==1);
  752. s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
  753. s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
  754. get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
  755. rel_xmin= xmin - mb_x*16;
  756. rel_xmax= xmax - mb_x*16;
  757. rel_ymin= ymin - mb_y*16;
  758. rel_ymax= ymax - mb_y*16;
  759. s->me.skip=0;
  760. switch(s->me_method) {
  761. case ME_ZERO:
  762. default:
  763. no_motion_search(s, &mx, &my);
  764. mx-= mb_x*16;
  765. my-= mb_y*16;
  766. dmin = 0;
  767. break;
  768. case ME_FULL:
  769. dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
  770. mx-= mb_x*16;
  771. my-= mb_y*16;
  772. break;
  773. case ME_LOG:
  774. dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
  775. mx-= mb_x*16;
  776. my-= mb_y*16;
  777. break;
  778. case ME_PHODS:
  779. dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
  780. mx-= mb_x*16;
  781. my-= mb_y*16;
  782. break;
  783. case ME_X1:
  784. case ME_EPZS:
  785. {
  786. const int mot_stride = s->block_wrap[0];
  787. const int mot_xy = s->block_index[0];
  788. P_LAST[0] = s->motion_val[mot_xy ][0];
  789. P_LAST[1] = s->motion_val[mot_xy ][1];
  790. P_LEFT[0] = s->motion_val[mot_xy - 1][0];
  791. P_LEFT[1] = s->motion_val[mot_xy - 1][1];
  792. P_LAST_RIGHT[0] = s->motion_val[mot_xy + 2][0];
  793. P_LAST_RIGHT[1] = s->motion_val[mot_xy + 2][1];
  794. P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 2*mot_stride][0];
  795. P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 2*mot_stride][1];
  796. if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift);
  797. if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
  798. if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
  799. /* special case for first line */
  800. if ((mb_y == 0 || s->first_slice_line)) {
  801. pred_x= P_LEFT[0];
  802. pred_y= P_LEFT[1];
  803. } else {
  804. P_TOP[0] = s->motion_val[mot_xy - mot_stride ][0];
  805. P_TOP[1] = s->motion_val[mot_xy - mot_stride ][1];
  806. P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + 2][0];
  807. P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + 2][1];
  808. if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1] = (rel_ymax<<shift);
  809. if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
  810. if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
  811. P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
  812. P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
  813. if(s->out_format == FMT_H263){
  814. pred_x = P_MEDIAN[0];
  815. pred_y = P_MEDIAN[1];
  816. }else { /* mpeg1 at least */
  817. pred_x= P_LEFT[0];
  818. pred_y= P_LEFT[1];
  819. }
  820. }
  821. }
  822. dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
  823. &s->last_picture, mv_penalty);
  824. break;
  825. }
  826. /* intra / predictive decision */
  827. xx = mb_x * 16;
  828. yy = mb_y * 16;
  829. pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
  830. /* At this point (mx,my) are full-pell and the relative displacement */
  831. ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
  832. sum = s->dsp.pix_sum(pix, s->linesize);
  833. varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
  834. vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8;
  835. //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
  836. pic->mb_var [s->mb_width * mb_y + mb_x] = varc;
  837. pic->mc_mb_var[s->mb_width * mb_y + mb_x] = vard;
  838. pic->mb_mean [s->mb_width * mb_y + mb_x] = (sum+128)>>8;
  839. pic->mb_var_sum += varc;
  840. pic->mc_mb_var_sum += vard;
  841. //printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
  842. #if 0
  843. printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
  844. varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
  845. #endif
  846. if(s->flags&CODEC_FLAG_HQ){
  847. if (vard <= 64 || vard < varc)
  848. s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
  849. else
  850. s->scene_change_score+= s->qscale;
  851. if (vard*2 + 200 > varc)
  852. mb_type|= MB_TYPE_INTRA;
  853. if (varc*2 + 200 > vard){
  854. mb_type|= MB_TYPE_INTER;
  855. s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
  856. pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
  857. }else{
  858. mx <<=shift;
  859. my <<=shift;
  860. }
  861. if((s->flags&CODEC_FLAG_4MV)
  862. && !s->me.skip && varc>50 && vard>10){
  863. mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
  864. mb_type|=MB_TYPE_INTER4V;
  865. set_p_mv_tables(s, mx, my, 0);
  866. }else
  867. set_p_mv_tables(s, mx, my, 1);
  868. }else{
  869. if (vard <= 64 || vard < varc) {
  870. // if (sadP <= 32 || sadP < sadI + 500) {
  871. s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
  872. mb_type|= MB_TYPE_INTER;
  873. if (s->me_method != ME_ZERO) {
  874. dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
  875. pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
  876. if((s->flags&CODEC_FLAG_4MV)
  877. && !s->me.skip && varc>50 && vard>10){
  878. int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
  879. if(dmin4 + 128 <dmin)
  880. mb_type= MB_TYPE_INTER4V;
  881. }
  882. set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
  883. } else {
  884. mx <<=shift;
  885. my <<=shift;
  886. }
  887. #if 0
  888. if (vard < 10) {
  889. skip++;
  890. fprintf(stderr,"\nEarly skip: %d vard: %2d varc: %5d dmin: %d",
  891. skip, vard, varc, dmin);
  892. }
  893. #endif
  894. }else{
  895. s->scene_change_score+= 20;
  896. mb_type|= MB_TYPE_INTRA;
  897. mx = 0;
  898. my = 0;
  899. }
  900. }
  901. s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
  902. }
  903. int ff_estimate_motion_b(MpegEncContext * s,
  904. int mb_x, int mb_y, int16_t (*mv_table)[2], Picture *picture, int f_code)
  905. {
  906. int mx, my, range, dmin;
  907. int xmin, ymin, xmax, ymax;
  908. int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
  909. int pred_x=0, pred_y=0;
  910. int P[10][2];
  911. const int shift= 1+s->quarter_sample;
  912. const int mot_stride = s->mb_width + 2;
  913. const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
  914. uint8_t * const ref_picture= picture->data[0];
  915. uint16_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
  916. s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
  917. s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
  918. get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
  919. rel_xmin= xmin - mb_x*16;
  920. rel_xmax= xmax - mb_x*16;
  921. rel_ymin= ymin - mb_y*16;
  922. rel_ymax= ymax - mb_y*16;
  923. switch(s->me_method) {
  924. case ME_ZERO:
  925. default:
  926. no_motion_search(s, &mx, &my);
  927. dmin = 0;
  928. mx-= mb_x*16;
  929. my-= mb_y*16;
  930. break;
  931. case ME_FULL:
  932. dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
  933. mx-= mb_x*16;
  934. my-= mb_y*16;
  935. break;
  936. case ME_LOG:
  937. dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
  938. mx-= mb_x*16;
  939. my-= mb_y*16;
  940. break;
  941. case ME_PHODS:
  942. dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
  943. mx-= mb_x*16;
  944. my-= mb_y*16;
  945. break;
  946. case ME_X1:
  947. case ME_EPZS:
  948. {
  949. P_LAST[0] = mv_table[mot_xy ][0];
  950. P_LAST[1] = mv_table[mot_xy ][1];
  951. P_LEFT[0] = mv_table[mot_xy - 1][0];
  952. P_LEFT[1] = mv_table[mot_xy - 1][1];
  953. P_LAST_RIGHT[0] = mv_table[mot_xy + 1][0];
  954. P_LAST_RIGHT[1] = mv_table[mot_xy + 1][1];
  955. P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0];
  956. P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1];
  957. if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift);
  958. if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
  959. if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
  960. /* special case for first line */
  961. if ((mb_y == 0 || s->first_slice_line)) {
  962. } else {
  963. P_TOP[0] = mv_table[mot_xy - mot_stride ][0];
  964. P_TOP[1] = mv_table[mot_xy - mot_stride ][1];
  965. P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0];
  966. P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1];
  967. if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1]= (rel_ymax<<shift);
  968. if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
  969. if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
  970. P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
  971. P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
  972. }
  973. pred_x= P_LEFT[0];
  974. pred_y= P_LEFT[1];
  975. }
  976. dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
  977. picture, mv_penalty);
  978. break;
  979. }
  980. dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
  981. pred_x, pred_y, picture, 0, 0, mv_penalty);
  982. //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
  983. // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
  984. mv_table[mot_xy][0]= mx;
  985. mv_table[mot_xy][1]= my;
  986. return dmin;
  987. }
  988. static inline int check_bidir_mv(MpegEncContext * s,
  989. int mb_x, int mb_y,
  990. int motion_fx, int motion_fy,
  991. int motion_bx, int motion_by,
  992. int pred_fx, int pred_fy,
  993. int pred_bx, int pred_by)
  994. {
  995. //FIXME optimize?
  996. //FIXME move into template?
  997. //FIXME better f_code prediction (max mv & distance)
  998. UINT16 *mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
  999. uint8_t *dest_y = s->me.scratchpad;
  1000. uint8_t *ptr;
  1001. int dxy;
  1002. int src_x, src_y;
  1003. int fbmin;
  1004. if(s->quarter_sample){
  1005. dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
  1006. src_x = mb_x * 16 + (motion_fx >> 2);
  1007. src_y = mb_y * 16 + (motion_fy >> 2);
  1008. assert(src_x >=-16 && src_x<=s->width);
  1009. assert(src_y >=-16 && src_y<=s->height);
  1010. ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
  1011. s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
  1012. dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
  1013. src_x = mb_x * 16 + (motion_bx >> 2);
  1014. src_y = mb_y * 16 + (motion_by >> 2);
  1015. assert(src_x >=-16 && src_x<=s->width);
  1016. assert(src_y >=-16 && src_y<=s->height);
  1017. ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
  1018. s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
  1019. }else{
  1020. dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
  1021. src_x = mb_x * 16 + (motion_fx >> 1);
  1022. src_y = mb_y * 16 + (motion_fy >> 1);
  1023. assert(src_x >=-16 && src_x<=s->width);
  1024. assert(src_y >=-16 && src_y<=s->height);
  1025. ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
  1026. s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
  1027. dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
  1028. src_x = mb_x * 16 + (motion_bx >> 1);
  1029. src_y = mb_y * 16 + (motion_by >> 1);
  1030. assert(src_x >=-16 && src_x<=s->width);
  1031. assert(src_y >=-16 && src_y<=s->height);
  1032. ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
  1033. s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
  1034. }
  1035. fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.sub_penalty_factor
  1036. +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.sub_penalty_factor;
  1037. + s->dsp.me_sub_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
  1038. return fbmin;
  1039. }
  1040. /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
  1041. static inline int bidir_refine(MpegEncContext * s,
  1042. int mb_x, int mb_y)
  1043. {
  1044. const int mot_stride = s->mb_width + 2;
  1045. const int xy = (mb_y + 1)*mot_stride + mb_x + 1;
  1046. int fbmin;
  1047. int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
  1048. int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
  1049. int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
  1050. int pred_by= s->b_bidir_back_mv_table[xy-1][1];
  1051. int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
  1052. int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
  1053. int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
  1054. int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
  1055. //FIXME do refinement and add flag
  1056. fbmin= check_bidir_mv(s, mb_x, mb_y,
  1057. motion_fx, motion_fy,
  1058. motion_bx, motion_by,
  1059. pred_fx, pred_fy,
  1060. pred_bx, pred_by);
  1061. return fbmin;
  1062. }
  1063. static inline int direct_search(MpegEncContext * s,
  1064. int mb_x, int mb_y)
  1065. {
  1066. int P[10][2];
  1067. const int mot_stride = s->mb_width + 2;
  1068. const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
  1069. const int shift= 1+s->quarter_sample;
  1070. int dmin, i;
  1071. const int time_pp= s->pp_time;
  1072. const int time_pb= s->pb_time;
  1073. int mx, my, xmin, xmax, ymin, ymax;
  1074. int16_t (*mv_table)[2]= s->b_direct_mv_table;
  1075. uint16_t * const mv_penalty= s->me.mv_penalty[1] + MAX_MV;
  1076. P_LAST[0] = mv_table[mot_xy ][0];
  1077. P_LAST[1] = mv_table[mot_xy ][1];
  1078. P_LEFT[0] = mv_table[mot_xy - 1][0];
  1079. P_LEFT[1] = mv_table[mot_xy - 1][1];
  1080. P_LAST_RIGHT[0] = mv_table[mot_xy + 1][0];
  1081. P_LAST_RIGHT[1] = mv_table[mot_xy + 1][1];
  1082. P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0];
  1083. P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1];
  1084. /*
  1085. if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift);
  1086. if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
  1087. if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
  1088. */
  1089. /* special case for first line */
  1090. if ((mb_y == 0 || s->first_slice_line)) {
  1091. } else {
  1092. P_TOP[0] = mv_table[mot_xy - mot_stride ][0];
  1093. P_TOP[1] = mv_table[mot_xy - mot_stride ][1];
  1094. P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0];
  1095. P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1];
  1096. P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
  1097. P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
  1098. }
  1099. ymin= xmin=(-32)>>shift;
  1100. ymax= xmax= 31>>shift;
  1101. if(s->co_located_type_table[mb_x + mb_y*s->mb_width]==CO_LOCATED_TYPE_4MV){
  1102. s->mv_type= MV_TYPE_8X8;
  1103. }else{
  1104. s->mv_type= MV_TYPE_16X16;
  1105. }
  1106. for(i=0; i<4; i++){
  1107. int index= s->block_index[i];
  1108. int min, max;
  1109. s->me.co_located_mv[i][0]= s->motion_val[index][0];
  1110. s->me.co_located_mv[i][1]= s->motion_val[index][1];
  1111. s->me.direct_basis_mv[i][0]= s->me.co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
  1112. s->me.direct_basis_mv[i][1]= s->me.co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
  1113. // s->me.direct_basis_mv[1][i][0]= s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
  1114. // s->me.direct_basis_mv[1][i][1]= s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);
  1115. max= FFMAX(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
  1116. min= FFMIN(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
  1117. max+= (2*mb_x + (i& 1))*8 - 1; // +-1 is for the simpler rounding
  1118. min+= (2*mb_x + (i& 1))*8 + 1;
  1119. if(max >= s->width) xmax= s->width - max - 1;
  1120. if(min < -16 ) xmin= - 32 - min;
  1121. max= FFMAX(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
  1122. min= FFMIN(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
  1123. max+= (2*mb_y + (i>>1))*8 - 1; // +-1 is for the simpler rounding
  1124. min+= (2*mb_y + (i>>1))*8 + 1;
  1125. if(max >= s->height) ymax= s->height - max - 1;
  1126. if(min < -16 ) ymin= - 32 - min;
  1127. if(s->mv_type == MV_TYPE_16X16) break;
  1128. }
  1129. assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
  1130. if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
  1131. s->b_direct_mv_table[mot_xy][0]= 0;
  1132. s->b_direct_mv_table[mot_xy][1]= 0;
  1133. return 256*256*256*64;
  1134. }
  1135. if(s->flags&CODEC_FLAG_QPEL){
  1136. dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
  1137. &s->last_picture, mv_penalty);
  1138. dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
  1139. 0, 0, &s->last_picture, 0, 0, mv_penalty);
  1140. }else{
  1141. dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
  1142. &s->last_picture, mv_penalty);
  1143. dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
  1144. 0, 0, &s->last_picture, 0, 0, mv_penalty);
  1145. }
  1146. s->b_direct_mv_table[mot_xy][0]= mx;
  1147. s->b_direct_mv_table[mot_xy][1]= my;
  1148. return dmin;
  1149. }
  1150. void ff_estimate_b_frame_motion(MpegEncContext * s,
  1151. int mb_x, int mb_y)
  1152. {
  1153. const int penalty_factor= s->me.penalty_factor;
  1154. int fmin, bmin, dmin, fbmin;
  1155. int type=0;
  1156. dmin= direct_search(s, mb_x, mb_y);
  1157. fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code);
  1158. bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) - penalty_factor;
  1159. //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
  1160. fbmin= bidir_refine(s, mb_x, mb_y);
  1161. {
  1162. int score= dmin;
  1163. type=MB_TYPE_DIRECT;
  1164. if(fmin<score){
  1165. score=fmin;
  1166. type= MB_TYPE_FORWARD;
  1167. }
  1168. if(bmin<score){
  1169. score=bmin;
  1170. type= MB_TYPE_BACKWARD;
  1171. }
  1172. if(fbmin<score){
  1173. score=fbmin;
  1174. type= MB_TYPE_BIDIR;
  1175. }
  1176. score= ((unsigned)(score*score + 128*256))>>16;
  1177. s->current_picture.mc_mb_var_sum += score;
  1178. s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD
  1179. }
  1180. if(s->flags&CODEC_FLAG_HQ){
  1181. type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter
  1182. if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
  1183. }
  1184. s->mb_type[mb_y*s->mb_width + mb_x]= type;
  1185. }
  1186. /* find best f_code for ME which do unlimited searches */
  1187. int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
  1188. {
  1189. if(s->me_method>=ME_EPZS){
  1190. int score[8];
  1191. int i, y;
  1192. UINT8 * fcode_tab= s->fcode_tab;
  1193. int best_fcode=-1;
  1194. int best_score=-10000000;
  1195. for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
  1196. for(y=0; y<s->mb_height; y++){
  1197. int x;
  1198. int xy= (y+1)* (s->mb_width+2) + 1;
  1199. i= y*s->mb_width;
  1200. for(x=0; x<s->mb_width; x++){
  1201. if(s->mb_type[i] & type){
  1202. int fcode= FFMAX(fcode_tab[mv_table[xy][0] + MAX_MV],
  1203. fcode_tab[mv_table[xy][1] + MAX_MV]);
  1204. int j;
  1205. for(j=0; j<fcode && j<8; j++){
  1206. if(s->pict_type==B_TYPE || s->current_picture.mc_mb_var[i] < s->current_picture.mb_var[i])
  1207. score[j]-= 170;
  1208. }
  1209. }
  1210. i++;
  1211. xy++;
  1212. }
  1213. }
  1214. for(i=1; i<8; i++){
  1215. if(score[i] > best_score){
  1216. best_score= score[i];
  1217. best_fcode= i;
  1218. }
  1219. // printf("%d %d\n", i, score[i]);
  1220. }
  1221. // printf("fcode: %d type: %d\n", i, s->pict_type);
  1222. return best_fcode;
  1223. /* for(i=0; i<=MAX_FCODE; i++){
  1224. printf("%d ", mv_num[i]);
  1225. }
  1226. printf("\n");*/
  1227. }else{
  1228. return 1;
  1229. }
  1230. }
  1231. void ff_fix_long_p_mvs(MpegEncContext * s)
  1232. {
  1233. const int f_code= s->f_code;
  1234. int y;
  1235. UINT8 * fcode_tab= s->fcode_tab;
  1236. //int clip=0;
  1237. //int noclip=0;
  1238. /* clip / convert to intra 16x16 type MVs */
  1239. for(y=0; y<s->mb_height; y++){
  1240. int x;
  1241. int xy= (y+1)* (s->mb_width+2)+1;
  1242. int i= y*s->mb_width;
  1243. for(x=0; x<s->mb_width; x++){
  1244. if(s->mb_type[i]&MB_TYPE_INTER){
  1245. if( fcode_tab[s->p_mv_table[xy][0] + MAX_MV] > f_code
  1246. || fcode_tab[s->p_mv_table[xy][0] + MAX_MV] == 0
  1247. || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] > f_code
  1248. || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] == 0 ){
  1249. s->mb_type[i] &= ~MB_TYPE_INTER;
  1250. s->mb_type[i] |= MB_TYPE_INTRA;
  1251. s->p_mv_table[xy][0] = 0;
  1252. s->p_mv_table[xy][1] = 0;
  1253. //clip++;
  1254. }
  1255. //else
  1256. // noclip++;
  1257. }
  1258. xy++;
  1259. i++;
  1260. }
  1261. }
  1262. //printf("%d no:%d %d//\n", clip, noclip, f_code);
  1263. if(s->flags&CODEC_FLAG_4MV){
  1264. const int wrap= 2+ s->mb_width*2;
  1265. /* clip / convert to intra 8x8 type MVs */
  1266. for(y=0; y<s->mb_height; y++){
  1267. int xy= (y*2 + 1)*wrap + 1;
  1268. int i= y*s->mb_width;
  1269. int x;
  1270. for(x=0; x<s->mb_width; x++){
  1271. if(s->mb_type[i]&MB_TYPE_INTER4V){
  1272. int block;
  1273. for(block=0; block<4; block++){
  1274. int off= (block& 1) + (block>>1)*wrap;
  1275. int mx= s->motion_val[ xy + off ][0];
  1276. int my= s->motion_val[ xy + off ][1];
  1277. if( fcode_tab[mx + MAX_MV] > f_code
  1278. || fcode_tab[mx + MAX_MV] == 0
  1279. || fcode_tab[my + MAX_MV] > f_code
  1280. || fcode_tab[my + MAX_MV] == 0 ){
  1281. s->mb_type[i] &= ~MB_TYPE_INTER4V;
  1282. s->mb_type[i] |= MB_TYPE_INTRA;
  1283. }
  1284. }
  1285. }
  1286. xy+=2;
  1287. i++;
  1288. }
  1289. }
  1290. }
  1291. }
  1292. void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type)
  1293. {
  1294. int y;
  1295. UINT8 * fcode_tab= s->fcode_tab;
  1296. /* clip / convert to intra 16x16 type MVs */
  1297. for(y=0; y<s->mb_height; y++){
  1298. int x;
  1299. int xy= (y+1)* (s->mb_width+2)+1;
  1300. int i= y*s->mb_width;
  1301. for(x=0; x<s->mb_width; x++){
  1302. if( fcode_tab[mv_table[xy][0] + MAX_MV] > f_code
  1303. || fcode_tab[mv_table[xy][0] + MAX_MV] == 0){
  1304. if(mv_table[xy][0]>0) mv_table[xy][0]= (16<<f_code)-1;
  1305. else mv_table[xy][0]= -(16<<f_code);
  1306. }
  1307. if( fcode_tab[mv_table[xy][1] + MAX_MV] > f_code
  1308. || fcode_tab[mv_table[xy][1] + MAX_MV] == 0){
  1309. if(mv_table[xy][1]>0) mv_table[xy][1]= (16<<f_code)-1;
  1310. else mv_table[xy][1]= -(16<<f_code);
  1311. }
  1312. xy++;
  1313. i++;
  1314. }
  1315. }
  1316. }