You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1105 lines
38KB

  1. /*
  2. * Motion estimation
  3. * Copyright (c) 2002-2004 Michael Niedermayer
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. /**
  23. * @file motion_est_template.c
  24. * Motion estimation template.
  25. */
  26. //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
  27. #define LOAD_COMMON\
  28. uint32_t attribute_unused * const score_map= c->score_map;\
  29. const int attribute_unused xmin= c->xmin;\
  30. const int attribute_unused ymin= c->ymin;\
  31. const int attribute_unused xmax= c->xmax;\
  32. const int attribute_unused ymax= c->ymax;\
  33. uint8_t *mv_penalty= c->current_mv_penalty;\
  34. const int pred_x= c->pred_x;\
  35. const int pred_y= c->pred_y;\
  36. #define CHECK_HALF_MV(dx, dy, x, y)\
  37. {\
  38. const int hx= 2*(x)+(dx);\
  39. const int hy= 2*(y)+(dy);\
  40. d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
  41. d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
  42. COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
  43. }
  44. #if 0
  45. static int hpel_motion_search)(MpegEncContext * s,
  46. int *mx_ptr, int *my_ptr, int dmin,
  47. uint8_t *ref_data[3],
  48. int size)
  49. {
  50. const int xx = 16 * s->mb_x + 8*(n&1);
  51. const int yy = 16 * s->mb_y + 8*(n>>1);
  52. const int mx = *mx_ptr;
  53. const int my = *my_ptr;
  54. const int penalty_factor= c->sub_penalty_factor;
  55. LOAD_COMMON
  56. // INIT;
  57. //FIXME factorize
  58. me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
  59. if(s->no_rounding /*FIXME b_type*/){
  60. hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
  61. chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
  62. }else{
  63. hpel_put=& s->dsp.put_pixels_tab[size];
  64. chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
  65. }
  66. cmpf= s->dsp.me_cmp[size];
  67. chroma_cmpf= s->dsp.me_cmp[size+1];
  68. cmp_sub= s->dsp.me_sub_cmp[size];
  69. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  70. if(c->skip){ //FIXME somehow move up (benchmark)
  71. *mx_ptr = 0;
  72. *my_ptr = 0;
  73. return dmin;
  74. }
  75. if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
  76. CMP_HPEL(dmin, 0, 0, mx, my, size);
  77. if(mx || my)
  78. dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
  79. }
  80. if (mx > xmin && mx < xmax &&
  81. my > ymin && my < ymax) {
  82. int bx=2*mx, by=2*my;
  83. int d= dmin;
  84. CHECK_HALF_MV(1, 1, mx-1, my-1)
  85. CHECK_HALF_MV(0, 1, mx , my-1)
  86. CHECK_HALF_MV(1, 1, mx , my-1)
  87. CHECK_HALF_MV(1, 0, mx-1, my )
  88. CHECK_HALF_MV(1, 0, mx , my )
  89. CHECK_HALF_MV(1, 1, mx-1, my )
  90. CHECK_HALF_MV(0, 1, mx , my )
  91. CHECK_HALF_MV(1, 1, mx , my )
  92. assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
  93. *mx_ptr = bx;
  94. *my_ptr = by;
  95. }else{
  96. *mx_ptr =2*mx;
  97. *my_ptr =2*my;
  98. }
  99. return dmin;
  100. }
  101. #else
  102. static int hpel_motion_search(MpegEncContext * s,
  103. int *mx_ptr, int *my_ptr, int dmin,
  104. int src_index, int ref_index,
  105. int size, int h)
  106. {
  107. MotionEstContext * const c= &s->me;
  108. const int mx = *mx_ptr;
  109. const int my = *my_ptr;
  110. const int penalty_factor= c->sub_penalty_factor;
  111. me_cmp_func cmp_sub, chroma_cmp_sub;
  112. int bx=2*mx, by=2*my;
  113. LOAD_COMMON
  114. int flags= c->sub_flags;
  115. //FIXME factorize
  116. cmp_sub= s->dsp.me_sub_cmp[size];
  117. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  118. if(c->skip){ //FIXME move out of hpel?
  119. *mx_ptr = 0;
  120. *my_ptr = 0;
  121. return dmin;
  122. }
  123. if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
  124. dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
  125. if(mx || my || size>0)
  126. dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
  127. }
  128. if (mx > xmin && mx < xmax &&
  129. my > ymin && my < ymax) {
  130. int d= dmin;
  131. const int index= (my<<ME_MAP_SHIFT) + mx;
  132. const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
  133. + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
  134. const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
  135. + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
  136. const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
  137. + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
  138. const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
  139. + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
  140. #if 1
  141. int key;
  142. int map_generation= c->map_generation;
  143. #ifndef NDEBUG
  144. uint32_t *map= c->map;
  145. #endif
  146. key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
  147. assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
  148. key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
  149. assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
  150. key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
  151. assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
  152. key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
  153. assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
  154. #endif
  155. if(t<=b){
  156. CHECK_HALF_MV(0, 1, mx ,my-1)
  157. if(l<=r){
  158. CHECK_HALF_MV(1, 1, mx-1, my-1)
  159. if(t+r<=b+l){
  160. CHECK_HALF_MV(1, 1, mx , my-1)
  161. }else{
  162. CHECK_HALF_MV(1, 1, mx-1, my )
  163. }
  164. CHECK_HALF_MV(1, 0, mx-1, my )
  165. }else{
  166. CHECK_HALF_MV(1, 1, mx , my-1)
  167. if(t+l<=b+r){
  168. CHECK_HALF_MV(1, 1, mx-1, my-1)
  169. }else{
  170. CHECK_HALF_MV(1, 1, mx , my )
  171. }
  172. CHECK_HALF_MV(1, 0, mx , my )
  173. }
  174. }else{
  175. if(l<=r){
  176. if(t+l<=b+r){
  177. CHECK_HALF_MV(1, 1, mx-1, my-1)
  178. }else{
  179. CHECK_HALF_MV(1, 1, mx , my )
  180. }
  181. CHECK_HALF_MV(1, 0, mx-1, my)
  182. CHECK_HALF_MV(1, 1, mx-1, my)
  183. }else{
  184. if(t+r<=b+l){
  185. CHECK_HALF_MV(1, 1, mx , my-1)
  186. }else{
  187. CHECK_HALF_MV(1, 1, mx-1, my)
  188. }
  189. CHECK_HALF_MV(1, 0, mx , my)
  190. CHECK_HALF_MV(1, 1, mx , my)
  191. }
  192. CHECK_HALF_MV(0, 1, mx , my)
  193. }
  194. assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
  195. }
  196. *mx_ptr = bx;
  197. *my_ptr = by;
  198. return dmin;
  199. }
  200. #endif
  201. static int no_sub_motion_search(MpegEncContext * s,
  202. int *mx_ptr, int *my_ptr, int dmin,
  203. int src_index, int ref_index,
  204. int size, int h)
  205. {
  206. (*mx_ptr)<<=1;
  207. (*my_ptr)<<=1;
  208. return dmin;
  209. }
  210. inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
  211. int ref_index, int size, int h, int add_rate)
  212. {
  213. // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
  214. MotionEstContext * const c= &s->me;
  215. const int penalty_factor= c->mb_penalty_factor;
  216. const int flags= c->mb_flags;
  217. const int qpel= flags & FLAG_QPEL;
  218. const int mask= 1+2*qpel;
  219. me_cmp_func cmp_sub, chroma_cmp_sub;
  220. int d;
  221. LOAD_COMMON
  222. //FIXME factorize
  223. cmp_sub= s->dsp.mb_cmp[size];
  224. chroma_cmp_sub= s->dsp.mb_cmp[size+1];
  225. // assert(!c->skip);
  226. // assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
  227. d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
  228. //FIXME check cbp before adding penalty for (0,0) vector
  229. if(add_rate && (mx || my || size>0))
  230. d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
  231. return d;
  232. }
  233. #define CHECK_QUARTER_MV(dx, dy, x, y)\
  234. {\
  235. const int hx= 4*(x)+(dx);\
  236. const int hy= 4*(y)+(dy);\
  237. d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  238. d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
  239. COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
  240. }
  241. static int qpel_motion_search(MpegEncContext * s,
  242. int *mx_ptr, int *my_ptr, int dmin,
  243. int src_index, int ref_index,
  244. int size, int h)
  245. {
  246. MotionEstContext * const c= &s->me;
  247. const int mx = *mx_ptr;
  248. const int my = *my_ptr;
  249. const int penalty_factor= c->sub_penalty_factor;
  250. const int map_generation= c->map_generation;
  251. const int subpel_quality= c->avctx->me_subpel_quality;
  252. uint32_t *map= c->map;
  253. me_cmp_func cmpf, chroma_cmpf;
  254. me_cmp_func cmp_sub, chroma_cmp_sub;
  255. LOAD_COMMON
  256. int flags= c->sub_flags;
  257. cmpf= s->dsp.me_cmp[size];
  258. chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
  259. //FIXME factorize
  260. cmp_sub= s->dsp.me_sub_cmp[size];
  261. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  262. if(c->skip){ //FIXME somehow move up (benchmark)
  263. *mx_ptr = 0;
  264. *my_ptr = 0;
  265. return dmin;
  266. }
  267. if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
  268. dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
  269. if(mx || my || size>0)
  270. dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
  271. }
  272. if (mx > xmin && mx < xmax &&
  273. my > ymin && my < ymax) {
  274. int bx=4*mx, by=4*my;
  275. int d= dmin;
  276. int i, nx, ny;
  277. const int index= (my<<ME_MAP_SHIFT) + mx;
  278. const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
  279. const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
  280. const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
  281. const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
  282. const int c= score_map[(index )&(ME_MAP_SIZE-1)];
  283. int best[8];
  284. int best_pos[8][2];
  285. memset(best, 64, sizeof(int)*8);
  286. #if 1
  287. if(s->me.dia_size>=2){
  288. const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  289. const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  290. const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  291. const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  292. for(ny= -3; ny <= 3; ny++){
  293. for(nx= -3; nx <= 3; nx++){
  294. //FIXME this could overflow (unlikely though)
  295. const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
  296. const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
  297. const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
  298. int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
  299. int i;
  300. if((nx&3)==0 && (ny&3)==0) continue;
  301. score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
  302. // if(nx&1) score-=1024*c->penalty_factor;
  303. // if(ny&1) score-=1024*c->penalty_factor;
  304. for(i=0; i<8; i++){
  305. if(score < best[i]){
  306. memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
  307. memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
  308. best[i]= score;
  309. best_pos[i][0]= nx + 4*mx;
  310. best_pos[i][1]= ny + 4*my;
  311. break;
  312. }
  313. }
  314. }
  315. }
  316. }else{
  317. int tl;
  318. //FIXME this could overflow (unlikely though)
  319. const int cx = 4*(r - l);
  320. const int cx2= r + l - 2*c;
  321. const int cy = 4*(b - t);
  322. const int cy2= b + t - 2*c;
  323. int cxy;
  324. if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
  325. tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  326. }else{
  327. tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
  328. }
  329. cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
  330. assert(16*cx2 + 4*cx + 32*c == 32*r);
  331. assert(16*cx2 - 4*cx + 32*c == 32*l);
  332. assert(16*cy2 + 4*cy + 32*c == 32*b);
  333. assert(16*cy2 - 4*cy + 32*c == 32*t);
  334. assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
  335. for(ny= -3; ny <= 3; ny++){
  336. for(nx= -3; nx <= 3; nx++){
  337. //FIXME this could overflow (unlikely though)
  338. int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
  339. int i;
  340. if((nx&3)==0 && (ny&3)==0) continue;
  341. score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
  342. // if(nx&1) score-=32*c->penalty_factor;
  343. // if(ny&1) score-=32*c->penalty_factor;
  344. for(i=0; i<8; i++){
  345. if(score < best[i]){
  346. memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
  347. memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
  348. best[i]= score;
  349. best_pos[i][0]= nx + 4*mx;
  350. best_pos[i][1]= ny + 4*my;
  351. break;
  352. }
  353. }
  354. }
  355. }
  356. }
  357. for(i=0; i<subpel_quality; i++){
  358. nx= best_pos[i][0];
  359. ny= best_pos[i][1];
  360. CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
  361. }
  362. #if 0
  363. const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  364. const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  365. const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  366. const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  367. // if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
  368. if(tl<br){
  369. // nx= FFMAX(4*mx - bx, bx - 4*mx);
  370. // ny= FFMAX(4*my - by, by - 4*my);
  371. static int stats[7][7], count;
  372. count++;
  373. stats[4*mx - bx + 3][4*my - by + 3]++;
  374. if(256*256*256*64 % count ==0){
  375. for(i=0; i<49; i++){
  376. if((i%7)==0) printf("\n");
  377. printf("%6d ", stats[0][i]);
  378. }
  379. printf("\n");
  380. }
  381. }
  382. #endif
  383. #else
  384. CHECK_QUARTER_MV(2, 2, mx-1, my-1)
  385. CHECK_QUARTER_MV(0, 2, mx , my-1)
  386. CHECK_QUARTER_MV(2, 2, mx , my-1)
  387. CHECK_QUARTER_MV(2, 0, mx , my )
  388. CHECK_QUARTER_MV(2, 2, mx , my )
  389. CHECK_QUARTER_MV(0, 2, mx , my )
  390. CHECK_QUARTER_MV(2, 2, mx-1, my )
  391. CHECK_QUARTER_MV(2, 0, mx-1, my )
  392. nx= bx;
  393. ny= by;
  394. for(i=0; i<8; i++){
  395. int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
  396. int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
  397. CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
  398. }
  399. #endif
  400. #if 0
  401. //outer ring
  402. CHECK_QUARTER_MV(1, 3, mx-1, my-1)
  403. CHECK_QUARTER_MV(1, 2, mx-1, my-1)
  404. CHECK_QUARTER_MV(1, 1, mx-1, my-1)
  405. CHECK_QUARTER_MV(2, 1, mx-1, my-1)
  406. CHECK_QUARTER_MV(3, 1, mx-1, my-1)
  407. CHECK_QUARTER_MV(0, 1, mx , my-1)
  408. CHECK_QUARTER_MV(1, 1, mx , my-1)
  409. CHECK_QUARTER_MV(2, 1, mx , my-1)
  410. CHECK_QUARTER_MV(3, 1, mx , my-1)
  411. CHECK_QUARTER_MV(3, 2, mx , my-1)
  412. CHECK_QUARTER_MV(3, 3, mx , my-1)
  413. CHECK_QUARTER_MV(3, 0, mx , my )
  414. CHECK_QUARTER_MV(3, 1, mx , my )
  415. CHECK_QUARTER_MV(3, 2, mx , my )
  416. CHECK_QUARTER_MV(3, 3, mx , my )
  417. CHECK_QUARTER_MV(2, 3, mx , my )
  418. CHECK_QUARTER_MV(1, 3, mx , my )
  419. CHECK_QUARTER_MV(0, 3, mx , my )
  420. CHECK_QUARTER_MV(3, 3, mx-1, my )
  421. CHECK_QUARTER_MV(2, 3, mx-1, my )
  422. CHECK_QUARTER_MV(1, 3, mx-1, my )
  423. CHECK_QUARTER_MV(1, 2, mx-1, my )
  424. CHECK_QUARTER_MV(1, 1, mx-1, my )
  425. CHECK_QUARTER_MV(1, 0, mx-1, my )
  426. #endif
  427. assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
  428. *mx_ptr = bx;
  429. *my_ptr = by;
  430. }else{
  431. *mx_ptr =4*mx;
  432. *my_ptr =4*my;
  433. }
  434. return dmin;
  435. }
  436. #define CHECK_MV(x,y)\
  437. {\
  438. const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
  439. const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
  440. assert((x) >= xmin);\
  441. assert((x) <= xmax);\
  442. assert((y) >= ymin);\
  443. assert((y) <= ymax);\
  444. /*printf("check_mv %d %d\n", x, y);*/\
  445. if(map[index]!=key){\
  446. d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  447. map[index]= key;\
  448. score_map[index]= d;\
  449. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
  450. /*printf("score:%d\n", d);*/\
  451. COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
  452. }\
  453. }
  454. #define CHECK_CLIPED_MV(ax,ay)\
  455. {\
  456. const int x= ax;\
  457. const int y= ay;\
  458. const int x2= FFMAX(xmin, FFMIN(x, xmax));\
  459. const int y2= FFMAX(ymin, FFMIN(y, ymax));\
  460. CHECK_MV(x2, y2)\
  461. }
  462. #define CHECK_MV_DIR(x,y,new_dir)\
  463. {\
  464. const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
  465. const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
  466. /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
  467. if(map[index]!=key){\
  468. d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  469. map[index]= key;\
  470. score_map[index]= d;\
  471. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
  472. /*printf("score:%d\n", d);*/\
  473. if(d<dmin){\
  474. best[0]=x;\
  475. best[1]=y;\
  476. dmin=d;\
  477. next_dir= new_dir;\
  478. }\
  479. }\
  480. }
  481. #define check(x,y,S,v)\
  482. if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
  483. if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
  484. if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
  485. if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
  486. #define LOAD_COMMON2\
  487. uint32_t *map= c->map;\
  488. const int qpel= flags&FLAG_QPEL;\
  489. const int shift= 1+qpel;\
  490. static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
  491. int src_index, int ref_index, int const penalty_factor,
  492. int size, int h, int flags)
  493. {
  494. MotionEstContext * const c= &s->me;
  495. me_cmp_func cmpf, chroma_cmpf;
  496. int next_dir=-1;
  497. LOAD_COMMON
  498. LOAD_COMMON2
  499. int map_generation= c->map_generation;
  500. cmpf= s->dsp.me_cmp[size];
  501. chroma_cmpf= s->dsp.me_cmp[size+1];
  502. { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
  503. const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
  504. const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
  505. if(map[index]!=key){ //this will be executed only very rarey
  506. score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
  507. map[index]= key;
  508. }
  509. }
  510. for(;;){
  511. int d;
  512. const int dir= next_dir;
  513. const int x= best[0];
  514. const int y= best[1];
  515. next_dir=-1;
  516. //printf("%d", dir);
  517. if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
  518. if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
  519. if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
  520. if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
  521. if(next_dir==-1){
  522. return dmin;
  523. }
  524. }
  525. }
  526. static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
  527. int src_index, int ref_index, int const penalty_factor,
  528. int size, int h, int flags)
  529. {
  530. MotionEstContext * const c= &s->me;
  531. me_cmp_func cmpf, chroma_cmpf;
  532. int dia_size;
  533. LOAD_COMMON
  534. LOAD_COMMON2
  535. int map_generation= c->map_generation;
  536. cmpf= s->dsp.me_cmp[size];
  537. chroma_cmpf= s->dsp.me_cmp[size+1];
  538. for(dia_size=1; dia_size<=4; dia_size++){
  539. int dir;
  540. const int x= best[0];
  541. const int y= best[1];
  542. if(dia_size&(dia_size-1)) continue;
  543. if( x + dia_size > xmax
  544. || x - dia_size < xmin
  545. || y + dia_size > ymax
  546. || y - dia_size < ymin)
  547. continue;
  548. for(dir= 0; dir<dia_size; dir+=2){
  549. int d;
  550. CHECK_MV(x + dir , y + dia_size - dir);
  551. CHECK_MV(x + dia_size - dir, y - dir );
  552. CHECK_MV(x - dir , y - dia_size + dir);
  553. CHECK_MV(x - dia_size + dir, y + dir );
  554. }
  555. if(x!=best[0] || y!=best[1])
  556. dia_size=0;
  557. #if 0
  558. {
  559. int dx, dy, i;
  560. static int stats[8*8];
  561. dx= ABS(x-best[0]);
  562. dy= ABS(y-best[1]);
  563. if(dy>dx){
  564. dx^=dy; dy^=dx; dx^=dy;
  565. }
  566. stats[dy*8 + dx] ++;
  567. if(256*256*256*64 % (stats[0]+1)==0){
  568. for(i=0; i<64; i++){
  569. if((i&7)==0) printf("\n");
  570. printf("%8d ", stats[i]);
  571. }
  572. printf("\n");
  573. }
  574. }
  575. #endif
  576. }
  577. return dmin;
  578. }
  579. #define SAB_CHECK_MV(ax,ay)\
  580. {\
  581. const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
  582. const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
  583. /*printf("sab check %d %d\n", ax, ay);*/\
  584. if(map[index]!=key){\
  585. d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  586. map[index]= key;\
  587. score_map[index]= d;\
  588. d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
  589. /*printf("score: %d\n", d);*/\
  590. if(d < minima[minima_count-1].height){\
  591. int j=0;\
  592. \
  593. while(d >= minima[j].height) j++;\
  594. \
  595. memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
  596. \
  597. minima[j].checked= 0;\
  598. minima[j].height= d;\
  599. minima[j].x= ax;\
  600. minima[j].y= ay;\
  601. \
  602. i=-1;\
  603. continue;\
  604. }\
  605. }\
  606. }
  607. #define MAX_SAB_SIZE ME_MAP_SIZE
  608. static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
  609. int src_index, int ref_index, int const penalty_factor,
  610. int size, int h, int flags)
  611. {
  612. MotionEstContext * const c= &s->me;
  613. me_cmp_func cmpf, chroma_cmpf;
  614. Minima minima[MAX_SAB_SIZE];
  615. const int minima_count= ABS(c->dia_size);
  616. int i, j;
  617. LOAD_COMMON
  618. LOAD_COMMON2
  619. int map_generation= c->map_generation;
  620. cmpf= s->dsp.me_cmp[size];
  621. chroma_cmpf= s->dsp.me_cmp[size+1];
  622. for(j=i=0; i<ME_MAP_SIZE; i++){
  623. uint32_t key= map[i];
  624. key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
  625. if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
  626. assert(j<MAX_SAB_SIZE); //max j = number of predictors
  627. minima[j].height= score_map[i];
  628. minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
  629. minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
  630. minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
  631. minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
  632. minima[j].checked=0;
  633. if(minima[j].x || minima[j].y)
  634. minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
  635. j++;
  636. }
  637. qsort(minima, j, sizeof(Minima), minima_cmp);
  638. for(; j<minima_count; j++){
  639. minima[j].height=256*256*256*64;
  640. minima[j].checked=0;
  641. minima[j].x= minima[j].y=0;
  642. }
  643. for(i=0; i<minima_count; i++){
  644. const int x= minima[i].x;
  645. const int y= minima[i].y;
  646. int d;
  647. if(minima[i].checked) continue;
  648. if( x >= xmax || x <= xmin
  649. || y >= ymax || y <= ymin)
  650. continue;
  651. SAB_CHECK_MV(x-1, y)
  652. SAB_CHECK_MV(x+1, y)
  653. SAB_CHECK_MV(x , y-1)
  654. SAB_CHECK_MV(x , y+1)
  655. minima[i].checked= 1;
  656. }
  657. best[0]= minima[0].x;
  658. best[1]= minima[0].y;
  659. dmin= minima[0].height;
  660. if( best[0] < xmax && best[0] > xmin
  661. && best[1] < ymax && best[1] > ymin){
  662. int d;
  663. //ensure that the refernece samples for hpel refinement are in the map
  664. CHECK_MV(best[0]-1, best[1])
  665. CHECK_MV(best[0]+1, best[1])
  666. CHECK_MV(best[0], best[1]-1)
  667. CHECK_MV(best[0], best[1]+1)
  668. }
  669. return dmin;
  670. }
  671. static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
  672. int src_index, int ref_index, int const penalty_factor,
  673. int size, int h, int flags)
  674. {
  675. MotionEstContext * const c= &s->me;
  676. me_cmp_func cmpf, chroma_cmpf;
  677. int dia_size;
  678. LOAD_COMMON
  679. LOAD_COMMON2
  680. int map_generation= c->map_generation;
  681. cmpf= s->dsp.me_cmp[size];
  682. chroma_cmpf= s->dsp.me_cmp[size+1];
  683. for(dia_size=1; dia_size<=c->dia_size; dia_size++){
  684. int dir, start, end;
  685. const int x= best[0];
  686. const int y= best[1];
  687. start= FFMAX(0, y + dia_size - ymax);
  688. end = FFMIN(dia_size, xmax - x + 1);
  689. for(dir= start; dir<end; dir++){
  690. int d;
  691. //check(x + dir,y + dia_size - dir,0, a0)
  692. CHECK_MV(x + dir , y + dia_size - dir);
  693. }
  694. start= FFMAX(0, x + dia_size - xmax);
  695. end = FFMIN(dia_size, y - ymin + 1);
  696. for(dir= start; dir<end; dir++){
  697. int d;
  698. //check(x + dia_size - dir, y - dir,0, a1)
  699. CHECK_MV(x + dia_size - dir, y - dir );
  700. }
  701. start= FFMAX(0, -y + dia_size + ymin );
  702. end = FFMIN(dia_size, x - xmin + 1);
  703. for(dir= start; dir<end; dir++){
  704. int d;
  705. //check(x - dir,y - dia_size + dir,0, a2)
  706. CHECK_MV(x - dir , y - dia_size + dir);
  707. }
  708. start= FFMAX(0, -x + dia_size + xmin );
  709. end = FFMIN(dia_size, ymax - y + 1);
  710. for(dir= start; dir<end; dir++){
  711. int d;
  712. //check(x - dia_size + dir, y + dir,0, a3)
  713. CHECK_MV(x - dia_size + dir, y + dir );
  714. }
  715. if(x!=best[0] || y!=best[1])
  716. dia_size=0;
  717. #if 0
  718. {
  719. int dx, dy, i;
  720. static int stats[8*8];
  721. dx= ABS(x-best[0]);
  722. dy= ABS(y-best[1]);
  723. stats[dy*8 + dx] ++;
  724. if(256*256*256*64 % (stats[0]+1)==0){
  725. for(i=0; i<64; i++){
  726. if((i&7)==0) printf("\n");
  727. printf("%6d ", stats[i]);
  728. }
  729. printf("\n");
  730. }
  731. }
  732. #endif
  733. }
  734. return dmin;
  735. }
  736. static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
  737. int src_index, int ref_index, int const penalty_factor,
  738. int size, int h, int flags){
  739. MotionEstContext * const c= &s->me;
  740. if(c->dia_size==-1)
  741. return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  742. else if(c->dia_size<-1)
  743. return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  744. else if(c->dia_size<2)
  745. return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  746. else
  747. return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  748. }
  749. static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
  750. int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
  751. int ref_mv_scale, int flags, int size, int h)
  752. {
  753. MotionEstContext * const c= &s->me;
  754. int best[2]={0, 0};
  755. int d, dmin;
  756. int map_generation;
  757. int penalty_factor;
  758. const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
  759. const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
  760. me_cmp_func cmpf, chroma_cmpf;
  761. LOAD_COMMON
  762. LOAD_COMMON2
  763. if(c->pre_pass){
  764. penalty_factor= c->pre_penalty_factor;
  765. cmpf= s->dsp.me_pre_cmp[size];
  766. chroma_cmpf= s->dsp.me_pre_cmp[size+1];
  767. }else{
  768. penalty_factor= c->penalty_factor;
  769. cmpf= s->dsp.me_cmp[size];
  770. chroma_cmpf= s->dsp.me_cmp[size+1];
  771. }
  772. map_generation= update_map_generation(c);
  773. assert(cmpf);
  774. dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
  775. map[0]= map_generation;
  776. score_map[0]= dmin;
  777. /* first line */
  778. if (s->first_slice_line) {
  779. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  780. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  781. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  782. }else{
  783. if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
  784. && ( P_LEFT[0] |P_LEFT[1]
  785. |P_TOP[0] |P_TOP[1]
  786. |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
  787. *mx_ptr= 0;
  788. *my_ptr= 0;
  789. c->skip=1;
  790. return dmin;
  791. }
  792. CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
  793. if(dmin>h*h*2){
  794. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  795. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  796. CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
  797. CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
  798. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  799. }
  800. }
  801. if(dmin>h*h*4){
  802. if(c->pre_pass){
  803. CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
  804. (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
  805. if(!s->first_slice_line)
  806. CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  807. (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  808. }else{
  809. CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
  810. (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
  811. if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
  812. CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  813. (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  814. }
  815. }
  816. if(c->avctx->last_predictor_count){
  817. const int count= c->avctx->last_predictor_count;
  818. const int xstart= FFMAX(0, s->mb_x - count);
  819. const int ystart= FFMAX(0, s->mb_y - count);
  820. const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
  821. const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
  822. int mb_y;
  823. for(mb_y=ystart; mb_y<yend; mb_y++){
  824. int mb_x;
  825. for(mb_x=xstart; mb_x<xend; mb_x++){
  826. const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
  827. int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
  828. int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
  829. if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
  830. CHECK_MV(mx,my)
  831. }
  832. }
  833. }
  834. //check(best[0],best[1],0, b0)
  835. dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  836. //check(best[0],best[1],0, b1)
  837. *mx_ptr= best[0];
  838. *my_ptr= best[1];
  839. // printf("%d %d %d \n", best[0], best[1], dmin);
  840. return dmin;
  841. }
  842. //this function is dedicated to the braindamaged gcc
  843. inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
  844. int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
  845. int ref_mv_scale, int size, int h)
  846. {
  847. MotionEstContext * const c= &s->me;
  848. //FIXME convert other functions in the same way if faster
  849. if(c->flags==0 && h==16 && size==0){
  850. return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
  851. // case FLAG_QPEL:
  852. // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
  853. }else{
  854. return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
  855. }
  856. }
  857. static int epzs_motion_search4(MpegEncContext * s,
  858. int *mx_ptr, int *my_ptr, int P[10][2],
  859. int src_index, int ref_index, int16_t (*last_mv)[2],
  860. int ref_mv_scale)
  861. {
  862. MotionEstContext * const c= &s->me;
  863. int best[2]={0, 0};
  864. int d, dmin;
  865. int map_generation;
  866. const int penalty_factor= c->penalty_factor;
  867. const int size=1;
  868. const int h=8;
  869. const int ref_mv_stride= s->mb_stride;
  870. const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
  871. me_cmp_func cmpf, chroma_cmpf;
  872. LOAD_COMMON
  873. int flags= c->flags;
  874. LOAD_COMMON2
  875. cmpf= s->dsp.me_cmp[size];
  876. chroma_cmpf= s->dsp.me_cmp[size+1];
  877. map_generation= update_map_generation(c);
  878. dmin = 1000000;
  879. //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
  880. /* first line */
  881. if (s->first_slice_line) {
  882. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  883. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  884. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  885. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  886. }else{
  887. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  888. //FIXME try some early stop
  889. if(dmin>64*2){
  890. CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
  891. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  892. CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
  893. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  894. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  895. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  896. }
  897. }
  898. if(dmin>64*4){
  899. CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
  900. (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
  901. if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
  902. CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  903. (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  904. }
  905. dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  906. *mx_ptr= best[0];
  907. *my_ptr= best[1];
  908. // printf("%d %d %d \n", best[0], best[1], dmin);
  909. return dmin;
  910. }
  911. //try to merge with above FIXME (needs PSNR test)
  912. static int epzs_motion_search2(MpegEncContext * s,
  913. int *mx_ptr, int *my_ptr, int P[10][2],
  914. int src_index, int ref_index, int16_t (*last_mv)[2],
  915. int ref_mv_scale)
  916. {
  917. MotionEstContext * const c= &s->me;
  918. int best[2]={0, 0};
  919. int d, dmin;
  920. int map_generation;
  921. const int penalty_factor= c->penalty_factor;
  922. const int size=0; //FIXME pass as arg
  923. const int h=8;
  924. const int ref_mv_stride= s->mb_stride;
  925. const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
  926. me_cmp_func cmpf, chroma_cmpf;
  927. LOAD_COMMON
  928. int flags= c->flags;
  929. LOAD_COMMON2
  930. cmpf= s->dsp.me_cmp[size];
  931. chroma_cmpf= s->dsp.me_cmp[size+1];
  932. map_generation= update_map_generation(c);
  933. dmin = 1000000;
  934. //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
  935. /* first line */
  936. if (s->first_slice_line) {
  937. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  938. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  939. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  940. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  941. }else{
  942. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  943. //FIXME try some early stop
  944. if(dmin>64*2){
  945. CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
  946. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  947. CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
  948. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  949. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  950. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  951. }
  952. }
  953. if(dmin>64*4){
  954. CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
  955. (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
  956. if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
  957. CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  958. (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  959. }
  960. dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  961. *mx_ptr= best[0];
  962. *my_ptr= best[1];
  963. // printf("%d %d %d \n", best[0], best[1], dmin);
  964. return dmin;
  965. }