You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1239 lines
43KB

  1. /*
  2. * Motion estimation
  3. * Copyright (c) 2002-2004 Michael Niedermayer
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. /**
  23. * @file motion_est_template.c
  24. * Motion estimation template.
  25. */
  26. //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
  27. #define LOAD_COMMON\
  28. uint32_t attribute_unused * const score_map= c->score_map;\
  29. const int attribute_unused xmin= c->xmin;\
  30. const int attribute_unused ymin= c->ymin;\
  31. const int attribute_unused xmax= c->xmax;\
  32. const int attribute_unused ymax= c->ymax;\
  33. uint8_t *mv_penalty= c->current_mv_penalty;\
  34. const int pred_x= c->pred_x;\
  35. const int pred_y= c->pred_y;\
  36. #define CHECK_HALF_MV(dx, dy, x, y)\
  37. {\
  38. const int hx= 2*(x)+(dx);\
  39. const int hy= 2*(y)+(dy);\
  40. d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
  41. d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
  42. COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
  43. }
  44. #if 0
  45. static int hpel_motion_search)(MpegEncContext * s,
  46. int *mx_ptr, int *my_ptr, int dmin,
  47. uint8_t *ref_data[3],
  48. int size)
  49. {
  50. const int xx = 16 * s->mb_x + 8*(n&1);
  51. const int yy = 16 * s->mb_y + 8*(n>>1);
  52. const int mx = *mx_ptr;
  53. const int my = *my_ptr;
  54. const int penalty_factor= c->sub_penalty_factor;
  55. LOAD_COMMON
  56. // INIT;
  57. //FIXME factorize
  58. me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
  59. if(s->no_rounding /*FIXME b_type*/){
  60. hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
  61. chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
  62. }else{
  63. hpel_put=& s->dsp.put_pixels_tab[size];
  64. chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
  65. }
  66. cmpf= s->dsp.me_cmp[size];
  67. chroma_cmpf= s->dsp.me_cmp[size+1];
  68. cmp_sub= s->dsp.me_sub_cmp[size];
  69. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  70. if(c->skip){ //FIXME somehow move up (benchmark)
  71. *mx_ptr = 0;
  72. *my_ptr = 0;
  73. return dmin;
  74. }
  75. if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
  76. CMP_HPEL(dmin, 0, 0, mx, my, size);
  77. if(mx || my)
  78. dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
  79. }
  80. if (mx > xmin && mx < xmax &&
  81. my > ymin && my < ymax) {
  82. int bx=2*mx, by=2*my;
  83. int d= dmin;
  84. CHECK_HALF_MV(1, 1, mx-1, my-1)
  85. CHECK_HALF_MV(0, 1, mx , my-1)
  86. CHECK_HALF_MV(1, 1, mx , my-1)
  87. CHECK_HALF_MV(1, 0, mx-1, my )
  88. CHECK_HALF_MV(1, 0, mx , my )
  89. CHECK_HALF_MV(1, 1, mx-1, my )
  90. CHECK_HALF_MV(0, 1, mx , my )
  91. CHECK_HALF_MV(1, 1, mx , my )
  92. assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
  93. *mx_ptr = bx;
  94. *my_ptr = by;
  95. }else{
  96. *mx_ptr =2*mx;
  97. *my_ptr =2*my;
  98. }
  99. return dmin;
  100. }
  101. #else
  102. static int hpel_motion_search(MpegEncContext * s,
  103. int *mx_ptr, int *my_ptr, int dmin,
  104. int src_index, int ref_index,
  105. int size, int h)
  106. {
  107. MotionEstContext * const c= &s->me;
  108. const int mx = *mx_ptr;
  109. const int my = *my_ptr;
  110. const int penalty_factor= c->sub_penalty_factor;
  111. me_cmp_func cmp_sub, chroma_cmp_sub;
  112. int bx=2*mx, by=2*my;
  113. LOAD_COMMON
  114. int flags= c->sub_flags;
  115. //FIXME factorize
  116. cmp_sub= s->dsp.me_sub_cmp[size];
  117. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  118. if(c->skip){ //FIXME move out of hpel?
  119. *mx_ptr = 0;
  120. *my_ptr = 0;
  121. return dmin;
  122. }
  123. if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
  124. dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
  125. if(mx || my || size>0)
  126. dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
  127. }
  128. if (mx > xmin && mx < xmax &&
  129. my > ymin && my < ymax) {
  130. int d= dmin;
  131. const int index= (my<<ME_MAP_SHIFT) + mx;
  132. const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
  133. + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
  134. const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
  135. + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
  136. const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
  137. + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
  138. const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
  139. + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
  140. #if 1
  141. int key;
  142. int map_generation= c->map_generation;
  143. #ifndef NDEBUG
  144. uint32_t *map= c->map;
  145. #endif
  146. key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
  147. assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
  148. key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
  149. assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
  150. key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
  151. assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
  152. key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
  153. assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
  154. #endif
  155. if(t<=b){
  156. CHECK_HALF_MV(0, 1, mx ,my-1)
  157. if(l<=r){
  158. CHECK_HALF_MV(1, 1, mx-1, my-1)
  159. if(t+r<=b+l){
  160. CHECK_HALF_MV(1, 1, mx , my-1)
  161. }else{
  162. CHECK_HALF_MV(1, 1, mx-1, my )
  163. }
  164. CHECK_HALF_MV(1, 0, mx-1, my )
  165. }else{
  166. CHECK_HALF_MV(1, 1, mx , my-1)
  167. if(t+l<=b+r){
  168. CHECK_HALF_MV(1, 1, mx-1, my-1)
  169. }else{
  170. CHECK_HALF_MV(1, 1, mx , my )
  171. }
  172. CHECK_HALF_MV(1, 0, mx , my )
  173. }
  174. }else{
  175. if(l<=r){
  176. if(t+l<=b+r){
  177. CHECK_HALF_MV(1, 1, mx-1, my-1)
  178. }else{
  179. CHECK_HALF_MV(1, 1, mx , my )
  180. }
  181. CHECK_HALF_MV(1, 0, mx-1, my)
  182. CHECK_HALF_MV(1, 1, mx-1, my)
  183. }else{
  184. if(t+r<=b+l){
  185. CHECK_HALF_MV(1, 1, mx , my-1)
  186. }else{
  187. CHECK_HALF_MV(1, 1, mx-1, my)
  188. }
  189. CHECK_HALF_MV(1, 0, mx , my)
  190. CHECK_HALF_MV(1, 1, mx , my)
  191. }
  192. CHECK_HALF_MV(0, 1, mx , my)
  193. }
  194. assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
  195. }
  196. *mx_ptr = bx;
  197. *my_ptr = by;
  198. return dmin;
  199. }
  200. #endif
  201. static int no_sub_motion_search(MpegEncContext * s,
  202. int *mx_ptr, int *my_ptr, int dmin,
  203. int src_index, int ref_index,
  204. int size, int h)
  205. {
  206. (*mx_ptr)<<=1;
  207. (*my_ptr)<<=1;
  208. return dmin;
  209. }
  210. inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
  211. int ref_index, int size, int h, int add_rate)
  212. {
  213. // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
  214. MotionEstContext * const c= &s->me;
  215. const int penalty_factor= c->mb_penalty_factor;
  216. const int flags= c->mb_flags;
  217. const int qpel= flags & FLAG_QPEL;
  218. const int mask= 1+2*qpel;
  219. me_cmp_func cmp_sub, chroma_cmp_sub;
  220. int d;
  221. LOAD_COMMON
  222. //FIXME factorize
  223. cmp_sub= s->dsp.mb_cmp[size];
  224. chroma_cmp_sub= s->dsp.mb_cmp[size+1];
  225. // assert(!c->skip);
  226. // assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
  227. d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
  228. //FIXME check cbp before adding penalty for (0,0) vector
  229. if(add_rate && (mx || my || size>0))
  230. d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
  231. return d;
  232. }
  233. #define CHECK_QUARTER_MV(dx, dy, x, y)\
  234. {\
  235. const int hx= 4*(x)+(dx);\
  236. const int hy= 4*(y)+(dy);\
  237. d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  238. d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
  239. COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
  240. }
  241. static int qpel_motion_search(MpegEncContext * s,
  242. int *mx_ptr, int *my_ptr, int dmin,
  243. int src_index, int ref_index,
  244. int size, int h)
  245. {
  246. MotionEstContext * const c= &s->me;
  247. const int mx = *mx_ptr;
  248. const int my = *my_ptr;
  249. const int penalty_factor= c->sub_penalty_factor;
  250. const int map_generation= c->map_generation;
  251. const int subpel_quality= c->avctx->me_subpel_quality;
  252. uint32_t *map= c->map;
  253. me_cmp_func cmpf, chroma_cmpf;
  254. me_cmp_func cmp_sub, chroma_cmp_sub;
  255. LOAD_COMMON
  256. int flags= c->sub_flags;
  257. cmpf= s->dsp.me_cmp[size];
  258. chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
  259. //FIXME factorize
  260. cmp_sub= s->dsp.me_sub_cmp[size];
  261. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  262. if(c->skip){ //FIXME somehow move up (benchmark)
  263. *mx_ptr = 0;
  264. *my_ptr = 0;
  265. return dmin;
  266. }
  267. if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
  268. dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
  269. if(mx || my || size>0)
  270. dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
  271. }
  272. if (mx > xmin && mx < xmax &&
  273. my > ymin && my < ymax) {
  274. int bx=4*mx, by=4*my;
  275. int d= dmin;
  276. int i, nx, ny;
  277. const int index= (my<<ME_MAP_SHIFT) + mx;
  278. const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
  279. const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
  280. const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
  281. const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
  282. const int c= score_map[(index )&(ME_MAP_SIZE-1)];
  283. int best[8];
  284. int best_pos[8][2];
  285. memset(best, 64, sizeof(int)*8);
  286. #if 1
  287. if(s->me.dia_size>=2){
  288. const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  289. const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  290. const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  291. const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  292. for(ny= -3; ny <= 3; ny++){
  293. for(nx= -3; nx <= 3; nx++){
  294. //FIXME this could overflow (unlikely though)
  295. const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
  296. const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
  297. const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
  298. int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
  299. int i;
  300. if((nx&3)==0 && (ny&3)==0) continue;
  301. score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
  302. // if(nx&1) score-=1024*c->penalty_factor;
  303. // if(ny&1) score-=1024*c->penalty_factor;
  304. for(i=0; i<8; i++){
  305. if(score < best[i]){
  306. memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
  307. memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
  308. best[i]= score;
  309. best_pos[i][0]= nx + 4*mx;
  310. best_pos[i][1]= ny + 4*my;
  311. break;
  312. }
  313. }
  314. }
  315. }
  316. }else{
  317. int tl;
  318. //FIXME this could overflow (unlikely though)
  319. const int cx = 4*(r - l);
  320. const int cx2= r + l - 2*c;
  321. const int cy = 4*(b - t);
  322. const int cy2= b + t - 2*c;
  323. int cxy;
  324. if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
  325. tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  326. }else{
  327. tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
  328. }
  329. cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
  330. assert(16*cx2 + 4*cx + 32*c == 32*r);
  331. assert(16*cx2 - 4*cx + 32*c == 32*l);
  332. assert(16*cy2 + 4*cy + 32*c == 32*b);
  333. assert(16*cy2 - 4*cy + 32*c == 32*t);
  334. assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
  335. for(ny= -3; ny <= 3; ny++){
  336. for(nx= -3; nx <= 3; nx++){
  337. //FIXME this could overflow (unlikely though)
  338. int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
  339. int i;
  340. if((nx&3)==0 && (ny&3)==0) continue;
  341. score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
  342. // if(nx&1) score-=32*c->penalty_factor;
  343. // if(ny&1) score-=32*c->penalty_factor;
  344. for(i=0; i<8; i++){
  345. if(score < best[i]){
  346. memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
  347. memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
  348. best[i]= score;
  349. best_pos[i][0]= nx + 4*mx;
  350. best_pos[i][1]= ny + 4*my;
  351. break;
  352. }
  353. }
  354. }
  355. }
  356. }
  357. for(i=0; i<subpel_quality; i++){
  358. nx= best_pos[i][0];
  359. ny= best_pos[i][1];
  360. CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
  361. }
  362. #if 0
  363. const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  364. const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  365. const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  366. const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  367. // if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
  368. if(tl<br){
  369. // nx= FFMAX(4*mx - bx, bx - 4*mx);
  370. // ny= FFMAX(4*my - by, by - 4*my);
  371. static int stats[7][7], count;
  372. count++;
  373. stats[4*mx - bx + 3][4*my - by + 3]++;
  374. if(256*256*256*64 % count ==0){
  375. for(i=0; i<49; i++){
  376. if((i%7)==0) printf("\n");
  377. printf("%6d ", stats[0][i]);
  378. }
  379. printf("\n");
  380. }
  381. }
  382. #endif
  383. #else
  384. CHECK_QUARTER_MV(2, 2, mx-1, my-1)
  385. CHECK_QUARTER_MV(0, 2, mx , my-1)
  386. CHECK_QUARTER_MV(2, 2, mx , my-1)
  387. CHECK_QUARTER_MV(2, 0, mx , my )
  388. CHECK_QUARTER_MV(2, 2, mx , my )
  389. CHECK_QUARTER_MV(0, 2, mx , my )
  390. CHECK_QUARTER_MV(2, 2, mx-1, my )
  391. CHECK_QUARTER_MV(2, 0, mx-1, my )
  392. nx= bx;
  393. ny= by;
  394. for(i=0; i<8; i++){
  395. int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
  396. int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
  397. CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
  398. }
  399. #endif
  400. #if 0
  401. //outer ring
  402. CHECK_QUARTER_MV(1, 3, mx-1, my-1)
  403. CHECK_QUARTER_MV(1, 2, mx-1, my-1)
  404. CHECK_QUARTER_MV(1, 1, mx-1, my-1)
  405. CHECK_QUARTER_MV(2, 1, mx-1, my-1)
  406. CHECK_QUARTER_MV(3, 1, mx-1, my-1)
  407. CHECK_QUARTER_MV(0, 1, mx , my-1)
  408. CHECK_QUARTER_MV(1, 1, mx , my-1)
  409. CHECK_QUARTER_MV(2, 1, mx , my-1)
  410. CHECK_QUARTER_MV(3, 1, mx , my-1)
  411. CHECK_QUARTER_MV(3, 2, mx , my-1)
  412. CHECK_QUARTER_MV(3, 3, mx , my-1)
  413. CHECK_QUARTER_MV(3, 0, mx , my )
  414. CHECK_QUARTER_MV(3, 1, mx , my )
  415. CHECK_QUARTER_MV(3, 2, mx , my )
  416. CHECK_QUARTER_MV(3, 3, mx , my )
  417. CHECK_QUARTER_MV(2, 3, mx , my )
  418. CHECK_QUARTER_MV(1, 3, mx , my )
  419. CHECK_QUARTER_MV(0, 3, mx , my )
  420. CHECK_QUARTER_MV(3, 3, mx-1, my )
  421. CHECK_QUARTER_MV(2, 3, mx-1, my )
  422. CHECK_QUARTER_MV(1, 3, mx-1, my )
  423. CHECK_QUARTER_MV(1, 2, mx-1, my )
  424. CHECK_QUARTER_MV(1, 1, mx-1, my )
  425. CHECK_QUARTER_MV(1, 0, mx-1, my )
  426. #endif
  427. assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
  428. *mx_ptr = bx;
  429. *my_ptr = by;
  430. }else{
  431. *mx_ptr =4*mx;
  432. *my_ptr =4*my;
  433. }
  434. return dmin;
  435. }
  436. #define CHECK_MV(x,y)\
  437. {\
  438. const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
  439. const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
  440. assert((x) >= xmin);\
  441. assert((x) <= xmax);\
  442. assert((y) >= ymin);\
  443. assert((y) <= ymax);\
  444. /*printf("check_mv %d %d\n", x, y);*/\
  445. if(map[index]!=key){\
  446. d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  447. map[index]= key;\
  448. score_map[index]= d;\
  449. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
  450. /*printf("score:%d\n", d);*/\
  451. COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
  452. }\
  453. }
  454. #define CHECK_CLIPPED_MV(ax,ay)\
  455. {\
  456. const int Lx= ax;\
  457. const int Ly= ay;\
  458. const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
  459. const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
  460. CHECK_MV(Lx2, Ly2)\
  461. }
  462. #define CHECK_MV_DIR(x,y,new_dir)\
  463. {\
  464. const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
  465. const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
  466. /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
  467. if(map[index]!=key){\
  468. d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  469. map[index]= key;\
  470. score_map[index]= d;\
  471. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
  472. /*printf("score:%d\n", d);*/\
  473. if(d<dmin){\
  474. best[0]=x;\
  475. best[1]=y;\
  476. dmin=d;\
  477. next_dir= new_dir;\
  478. }\
  479. }\
  480. }
  481. #define check(x,y,S,v)\
  482. if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
  483. if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
  484. if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
  485. if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
  486. #define LOAD_COMMON2\
  487. uint32_t *map= c->map;\
  488. const int qpel= flags&FLAG_QPEL;\
  489. const int shift= 1+qpel;\
  490. static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
  491. int src_index, int ref_index, int const penalty_factor,
  492. int size, int h, int flags)
  493. {
  494. MotionEstContext * const c= &s->me;
  495. me_cmp_func cmpf, chroma_cmpf;
  496. int next_dir=-1;
  497. LOAD_COMMON
  498. LOAD_COMMON2
  499. int map_generation= c->map_generation;
  500. cmpf= s->dsp.me_cmp[size];
  501. chroma_cmpf= s->dsp.me_cmp[size+1];
  502. { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
  503. const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
  504. const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
  505. if(map[index]!=key){ //this will be executed only very rarey
  506. score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
  507. map[index]= key;
  508. }
  509. }
  510. for(;;){
  511. int d;
  512. const int dir= next_dir;
  513. const int x= best[0];
  514. const int y= best[1];
  515. next_dir=-1;
  516. //printf("%d", dir);
  517. if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
  518. if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
  519. if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
  520. if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
  521. if(next_dir==-1){
  522. return dmin;
  523. }
  524. }
  525. }
  526. static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
  527. int src_index, int ref_index, int const penalty_factor,
  528. int size, int h, int flags)
  529. {
  530. MotionEstContext * const c= &s->me;
  531. me_cmp_func cmpf, chroma_cmpf;
  532. int dia_size;
  533. LOAD_COMMON
  534. LOAD_COMMON2
  535. int map_generation= c->map_generation;
  536. cmpf= s->dsp.me_cmp[size];
  537. chroma_cmpf= s->dsp.me_cmp[size+1];
  538. for(dia_size=1; dia_size<=4; dia_size++){
  539. int dir;
  540. const int x= best[0];
  541. const int y= best[1];
  542. if(dia_size&(dia_size-1)) continue;
  543. if( x + dia_size > xmax
  544. || x - dia_size < xmin
  545. || y + dia_size > ymax
  546. || y - dia_size < ymin)
  547. continue;
  548. for(dir= 0; dir<dia_size; dir+=2){
  549. int d;
  550. CHECK_MV(x + dir , y + dia_size - dir);
  551. CHECK_MV(x + dia_size - dir, y - dir );
  552. CHECK_MV(x - dir , y - dia_size + dir);
  553. CHECK_MV(x - dia_size + dir, y + dir );
  554. }
  555. if(x!=best[0] || y!=best[1])
  556. dia_size=0;
  557. #if 0
  558. {
  559. int dx, dy, i;
  560. static int stats[8*8];
  561. dx= FFABS(x-best[0]);
  562. dy= FFABS(y-best[1]);
  563. if(dy>dx){
  564. dx^=dy; dy^=dx; dx^=dy;
  565. }
  566. stats[dy*8 + dx] ++;
  567. if(256*256*256*64 % (stats[0]+1)==0){
  568. for(i=0; i<64; i++){
  569. if((i&7)==0) printf("\n");
  570. printf("%8d ", stats[i]);
  571. }
  572. printf("\n");
  573. }
  574. }
  575. #endif
  576. }
  577. return dmin;
  578. }
  579. static int hex_search(MpegEncContext * s, int *best, int dmin,
  580. int src_index, int ref_index, int const penalty_factor,
  581. int size, int h, int flags, int dia_size)
  582. {
  583. MotionEstContext * const c= &s->me;
  584. me_cmp_func cmpf, chroma_cmpf;
  585. LOAD_COMMON
  586. LOAD_COMMON2
  587. int map_generation= c->map_generation;
  588. int x,y,d;
  589. const int dec= dia_size & (dia_size-1);
  590. cmpf= s->dsp.me_cmp[size];
  591. chroma_cmpf= s->dsp.me_cmp[size+1];
  592. for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
  593. do{
  594. x= best[0];
  595. y= best[1];
  596. CHECK_CLIPPED_MV(x -dia_size , y);
  597. CHECK_CLIPPED_MV(x+ dia_size , y);
  598. CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
  599. CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
  600. if(dia_size>1){
  601. CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
  602. CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
  603. }
  604. }while(best[0] != x || best[1] != y);
  605. }
  606. return dmin;
  607. }
  608. static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
  609. int src_index, int ref_index, int const penalty_factor,
  610. int size, int h, int flags)
  611. {
  612. MotionEstContext * const c= &s->me;
  613. me_cmp_func cmpf, chroma_cmpf;
  614. LOAD_COMMON
  615. LOAD_COMMON2
  616. int map_generation= c->map_generation;
  617. int x,y,i,d;
  618. int dia_size= c->dia_size&0xFF;
  619. const int dec= dia_size & (dia_size-1);
  620. static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
  621. { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
  622. cmpf= s->dsp.me_cmp[size];
  623. chroma_cmpf= s->dsp.me_cmp[size+1];
  624. for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
  625. do{
  626. x= best[0];
  627. y= best[1];
  628. for(i=0; i<8; i++){
  629. CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
  630. }
  631. }while(best[0] != x || best[1] != y);
  632. }
  633. x= best[0];
  634. y= best[1];
  635. CHECK_CLIPPED_MV(x+1, y);
  636. CHECK_CLIPPED_MV(x, y+1);
  637. CHECK_CLIPPED_MV(x-1, y);
  638. CHECK_CLIPPED_MV(x, y-1);
  639. return dmin;
  640. }
  641. static int umh_search(MpegEncContext * s, int *best, int dmin,
  642. int src_index, int ref_index, int const penalty_factor,
  643. int size, int h, int flags)
  644. {
  645. MotionEstContext * const c= &s->me;
  646. me_cmp_func cmpf, chroma_cmpf;
  647. LOAD_COMMON
  648. LOAD_COMMON2
  649. int map_generation= c->map_generation;
  650. int x,y,x2,y2, i, j, d;
  651. const int dia_size= c->dia_size&0xFE;
  652. static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
  653. { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
  654. {-2, 3}, { 0, 4}, { 2, 3},
  655. {-2,-3}, { 0,-4}, { 2,-3},};
  656. cmpf= s->dsp.me_cmp[size];
  657. chroma_cmpf= s->dsp.me_cmp[size+1];
  658. x= best[0];
  659. y= best[1];
  660. for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
  661. CHECK_MV(x2, y);
  662. }
  663. for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
  664. CHECK_MV(x, y2);
  665. }
  666. x= best[0];
  667. y= best[1];
  668. for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
  669. for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
  670. CHECK_MV(x2, y2);
  671. }
  672. }
  673. //FIXME prevent the CLIP stuff
  674. for(j=1; j<=dia_size/4; j++){
  675. for(i=0; i<16; i++){
  676. CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
  677. }
  678. }
  679. return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
  680. }
  681. #define SAB_CHECK_MV(ax,ay)\
  682. {\
  683. const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
  684. const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
  685. /*printf("sab check %d %d\n", ax, ay);*/\
  686. if(map[index]!=key){\
  687. d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  688. map[index]= key;\
  689. score_map[index]= d;\
  690. d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
  691. /*printf("score: %d\n", d);*/\
  692. if(d < minima[minima_count-1].height){\
  693. int j=0;\
  694. \
  695. while(d >= minima[j].height) j++;\
  696. \
  697. memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
  698. \
  699. minima[j].checked= 0;\
  700. minima[j].height= d;\
  701. minima[j].x= ax;\
  702. minima[j].y= ay;\
  703. \
  704. i=-1;\
  705. continue;\
  706. }\
  707. }\
  708. }
  709. #define MAX_SAB_SIZE ME_MAP_SIZE
  710. static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
  711. int src_index, int ref_index, int const penalty_factor,
  712. int size, int h, int flags)
  713. {
  714. MotionEstContext * const c= &s->me;
  715. me_cmp_func cmpf, chroma_cmpf;
  716. Minima minima[MAX_SAB_SIZE];
  717. const int minima_count= FFABS(c->dia_size);
  718. int i, j;
  719. LOAD_COMMON
  720. LOAD_COMMON2
  721. int map_generation= c->map_generation;
  722. cmpf= s->dsp.me_cmp[size];
  723. chroma_cmpf= s->dsp.me_cmp[size+1];
  724. /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
  725. become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
  726. */
  727. for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
  728. uint32_t key= map[i];
  729. key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
  730. if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
  731. minima[j].height= score_map[i];
  732. minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
  733. minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
  734. minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
  735. minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
  736. // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
  737. if( minima[j].x > xmax || minima[j].x < xmin
  738. || minima[j].y > ymax || minima[j].y < ymin)
  739. continue;
  740. minima[j].checked=0;
  741. if(minima[j].x || minima[j].y)
  742. minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
  743. j++;
  744. }
  745. qsort(minima, j, sizeof(Minima), minima_cmp);
  746. for(; j<minima_count; j++){
  747. minima[j].height=256*256*256*64;
  748. minima[j].checked=0;
  749. minima[j].x= minima[j].y=0;
  750. }
  751. for(i=0; i<minima_count; i++){
  752. const int x= minima[i].x;
  753. const int y= minima[i].y;
  754. int d;
  755. if(minima[i].checked) continue;
  756. if( x >= xmax || x <= xmin
  757. || y >= ymax || y <= ymin)
  758. continue;
  759. SAB_CHECK_MV(x-1, y)
  760. SAB_CHECK_MV(x+1, y)
  761. SAB_CHECK_MV(x , y-1)
  762. SAB_CHECK_MV(x , y+1)
  763. minima[i].checked= 1;
  764. }
  765. best[0]= minima[0].x;
  766. best[1]= minima[0].y;
  767. dmin= minima[0].height;
  768. if( best[0] < xmax && best[0] > xmin
  769. && best[1] < ymax && best[1] > ymin){
  770. int d;
  771. //ensure that the refernece samples for hpel refinement are in the map
  772. CHECK_MV(best[0]-1, best[1])
  773. CHECK_MV(best[0]+1, best[1])
  774. CHECK_MV(best[0], best[1]-1)
  775. CHECK_MV(best[0], best[1]+1)
  776. }
  777. return dmin;
  778. }
  779. static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
  780. int src_index, int ref_index, int const penalty_factor,
  781. int size, int h, int flags)
  782. {
  783. MotionEstContext * const c= &s->me;
  784. me_cmp_func cmpf, chroma_cmpf;
  785. int dia_size;
  786. LOAD_COMMON
  787. LOAD_COMMON2
  788. int map_generation= c->map_generation;
  789. cmpf= s->dsp.me_cmp[size];
  790. chroma_cmpf= s->dsp.me_cmp[size+1];
  791. for(dia_size=1; dia_size<=c->dia_size; dia_size++){
  792. int dir, start, end;
  793. const int x= best[0];
  794. const int y= best[1];
  795. start= FFMAX(0, y + dia_size - ymax);
  796. end = FFMIN(dia_size, xmax - x + 1);
  797. for(dir= start; dir<end; dir++){
  798. int d;
  799. //check(x + dir,y + dia_size - dir,0, a0)
  800. CHECK_MV(x + dir , y + dia_size - dir);
  801. }
  802. start= FFMAX(0, x + dia_size - xmax);
  803. end = FFMIN(dia_size, y - ymin + 1);
  804. for(dir= start; dir<end; dir++){
  805. int d;
  806. //check(x + dia_size - dir, y - dir,0, a1)
  807. CHECK_MV(x + dia_size - dir, y - dir );
  808. }
  809. start= FFMAX(0, -y + dia_size + ymin );
  810. end = FFMIN(dia_size, x - xmin + 1);
  811. for(dir= start; dir<end; dir++){
  812. int d;
  813. //check(x - dir,y - dia_size + dir,0, a2)
  814. CHECK_MV(x - dir , y - dia_size + dir);
  815. }
  816. start= FFMAX(0, -x + dia_size + xmin );
  817. end = FFMIN(dia_size, ymax - y + 1);
  818. for(dir= start; dir<end; dir++){
  819. int d;
  820. //check(x - dia_size + dir, y + dir,0, a3)
  821. CHECK_MV(x - dia_size + dir, y + dir );
  822. }
  823. if(x!=best[0] || y!=best[1])
  824. dia_size=0;
  825. #if 0
  826. {
  827. int dx, dy, i;
  828. static int stats[8*8];
  829. dx= FFABS(x-best[0]);
  830. dy= FFABS(y-best[1]);
  831. stats[dy*8 + dx] ++;
  832. if(256*256*256*64 % (stats[0]+1)==0){
  833. for(i=0; i<64; i++){
  834. if((i&7)==0) printf("\n");
  835. printf("%6d ", stats[i]);
  836. }
  837. printf("\n");
  838. }
  839. }
  840. #endif
  841. }
  842. return dmin;
  843. }
  844. static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
  845. int src_index, int ref_index, int const penalty_factor,
  846. int size, int h, int flags){
  847. MotionEstContext * const c= &s->me;
  848. if(c->dia_size==-1)
  849. return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  850. else if(c->dia_size<-1)
  851. return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  852. else if(c->dia_size<2)
  853. return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  854. else if(c->dia_size>768)
  855. return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  856. else if(c->dia_size>512)
  857. return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
  858. else if(c->dia_size>256)
  859. return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  860. else
  861. return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  862. }
  863. static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
  864. int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
  865. int ref_mv_scale, int flags, int size, int h)
  866. {
  867. MotionEstContext * const c= &s->me;
  868. int best[2]={0, 0};
  869. int d, dmin;
  870. int map_generation;
  871. int penalty_factor;
  872. const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
  873. const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
  874. me_cmp_func cmpf, chroma_cmpf;
  875. LOAD_COMMON
  876. LOAD_COMMON2
  877. if(c->pre_pass){
  878. penalty_factor= c->pre_penalty_factor;
  879. cmpf= s->dsp.me_pre_cmp[size];
  880. chroma_cmpf= s->dsp.me_pre_cmp[size+1];
  881. }else{
  882. penalty_factor= c->penalty_factor;
  883. cmpf= s->dsp.me_cmp[size];
  884. chroma_cmpf= s->dsp.me_cmp[size+1];
  885. }
  886. map_generation= update_map_generation(c);
  887. assert(cmpf);
  888. dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
  889. map[0]= map_generation;
  890. score_map[0]= dmin;
  891. //FIXME precalc first term below?
  892. if((s->pict_type == B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
  893. dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
  894. /* first line */
  895. if (s->first_slice_line) {
  896. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  897. CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  898. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  899. }else{
  900. if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
  901. && ( P_LEFT[0] |P_LEFT[1]
  902. |P_TOP[0] |P_TOP[1]
  903. |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
  904. *mx_ptr= 0;
  905. *my_ptr= 0;
  906. c->skip=1;
  907. return dmin;
  908. }
  909. CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
  910. CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
  911. CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
  912. CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
  913. CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
  914. CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  915. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  916. CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
  917. CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
  918. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  919. }
  920. if(dmin>h*h*4){
  921. if(c->pre_pass){
  922. CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
  923. (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
  924. if(!s->first_slice_line)
  925. CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  926. (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  927. }else{
  928. CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
  929. (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
  930. if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
  931. CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  932. (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  933. }
  934. }
  935. if(c->avctx->last_predictor_count){
  936. const int count= c->avctx->last_predictor_count;
  937. const int xstart= FFMAX(0, s->mb_x - count);
  938. const int ystart= FFMAX(0, s->mb_y - count);
  939. const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
  940. const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
  941. int mb_y;
  942. for(mb_y=ystart; mb_y<yend; mb_y++){
  943. int mb_x;
  944. for(mb_x=xstart; mb_x<xend; mb_x++){
  945. const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
  946. int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
  947. int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
  948. if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
  949. CHECK_MV(mx,my)
  950. }
  951. }
  952. }
  953. //check(best[0],best[1],0, b0)
  954. dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  955. //check(best[0],best[1],0, b1)
  956. *mx_ptr= best[0];
  957. *my_ptr= best[1];
  958. // printf("%d %d %d \n", best[0], best[1], dmin);
  959. return dmin;
  960. }
  961. //this function is dedicated to the braindamaged gcc
  962. inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
  963. int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
  964. int ref_mv_scale, int size, int h)
  965. {
  966. MotionEstContext * const c= &s->me;
  967. //FIXME convert other functions in the same way if faster
  968. if(c->flags==0 && h==16 && size==0){
  969. return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
  970. // case FLAG_QPEL:
  971. // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
  972. }else{
  973. return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
  974. }
  975. }
  976. static int epzs_motion_search4(MpegEncContext * s,
  977. int *mx_ptr, int *my_ptr, int P[10][2],
  978. int src_index, int ref_index, int16_t (*last_mv)[2],
  979. int ref_mv_scale)
  980. {
  981. MotionEstContext * const c= &s->me;
  982. int best[2]={0, 0};
  983. int d, dmin;
  984. int map_generation;
  985. const int penalty_factor= c->penalty_factor;
  986. const int size=1;
  987. const int h=8;
  988. const int ref_mv_stride= s->mb_stride;
  989. const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
  990. me_cmp_func cmpf, chroma_cmpf;
  991. LOAD_COMMON
  992. int flags= c->flags;
  993. LOAD_COMMON2
  994. cmpf= s->dsp.me_cmp[size];
  995. chroma_cmpf= s->dsp.me_cmp[size+1];
  996. map_generation= update_map_generation(c);
  997. dmin = 1000000;
  998. //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
  999. /* first line */
  1000. if (s->first_slice_line) {
  1001. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  1002. CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  1003. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  1004. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  1005. }else{
  1006. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  1007. //FIXME try some early stop
  1008. CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
  1009. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  1010. CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
  1011. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  1012. CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  1013. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  1014. }
  1015. if(dmin>64*4){
  1016. CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
  1017. (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
  1018. if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
  1019. CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  1020. (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  1021. }
  1022. dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  1023. *mx_ptr= best[0];
  1024. *my_ptr= best[1];
  1025. // printf("%d %d %d \n", best[0], best[1], dmin);
  1026. return dmin;
  1027. }
  1028. //try to merge with above FIXME (needs PSNR test)
  1029. static int epzs_motion_search2(MpegEncContext * s,
  1030. int *mx_ptr, int *my_ptr, int P[10][2],
  1031. int src_index, int ref_index, int16_t (*last_mv)[2],
  1032. int ref_mv_scale)
  1033. {
  1034. MotionEstContext * const c= &s->me;
  1035. int best[2]={0, 0};
  1036. int d, dmin;
  1037. int map_generation;
  1038. const int penalty_factor= c->penalty_factor;
  1039. const int size=0; //FIXME pass as arg
  1040. const int h=8;
  1041. const int ref_mv_stride= s->mb_stride;
  1042. const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
  1043. me_cmp_func cmpf, chroma_cmpf;
  1044. LOAD_COMMON
  1045. int flags= c->flags;
  1046. LOAD_COMMON2
  1047. cmpf= s->dsp.me_cmp[size];
  1048. chroma_cmpf= s->dsp.me_cmp[size+1];
  1049. map_generation= update_map_generation(c);
  1050. dmin = 1000000;
  1051. //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
  1052. /* first line */
  1053. if (s->first_slice_line) {
  1054. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  1055. CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  1056. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  1057. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  1058. }else{
  1059. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  1060. //FIXME try some early stop
  1061. CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
  1062. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  1063. CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
  1064. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  1065. CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  1066. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  1067. }
  1068. if(dmin>64*4){
  1069. CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
  1070. (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
  1071. if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
  1072. CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  1073. (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  1074. }
  1075. dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  1076. *mx_ptr= best[0];
  1077. *my_ptr= best[1];
  1078. // printf("%d %d %d \n", best[0], best[1], dmin);
  1079. return dmin;
  1080. }