You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1229 lines
42KB

  1. /*
  2. * Motion estimation
  3. * Copyright (c) 2002-2004 Michael Niedermayer
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. /**
  23. * @file motion_est_template.c
  24. * Motion estimation template.
  25. */
  26. //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
  27. #define LOAD_COMMON\
  28. uint32_t attribute_unused * const score_map= c->score_map;\
  29. const int attribute_unused xmin= c->xmin;\
  30. const int attribute_unused ymin= c->ymin;\
  31. const int attribute_unused xmax= c->xmax;\
  32. const int attribute_unused ymax= c->ymax;\
  33. uint8_t *mv_penalty= c->current_mv_penalty;\
  34. const int pred_x= c->pred_x;\
  35. const int pred_y= c->pred_y;\
  36. #define CHECK_HALF_MV(dx, dy, x, y)\
  37. {\
  38. const int hx= 2*(x)+(dx);\
  39. const int hy= 2*(y)+(dy);\
  40. d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
  41. d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
  42. COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
  43. }
  44. #if 0
  45. static int hpel_motion_search)(MpegEncContext * s,
  46. int *mx_ptr, int *my_ptr, int dmin,
  47. uint8_t *ref_data[3],
  48. int size)
  49. {
  50. const int xx = 16 * s->mb_x + 8*(n&1);
  51. const int yy = 16 * s->mb_y + 8*(n>>1);
  52. const int mx = *mx_ptr;
  53. const int my = *my_ptr;
  54. const int penalty_factor= c->sub_penalty_factor;
  55. LOAD_COMMON
  56. // INIT;
  57. //FIXME factorize
  58. me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
  59. if(s->no_rounding /*FIXME b_type*/){
  60. hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
  61. chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
  62. }else{
  63. hpel_put=& s->dsp.put_pixels_tab[size];
  64. chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
  65. }
  66. cmpf= s->dsp.me_cmp[size];
  67. chroma_cmpf= s->dsp.me_cmp[size+1];
  68. cmp_sub= s->dsp.me_sub_cmp[size];
  69. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  70. if(c->skip){ //FIXME somehow move up (benchmark)
  71. *mx_ptr = 0;
  72. *my_ptr = 0;
  73. return dmin;
  74. }
  75. if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
  76. CMP_HPEL(dmin, 0, 0, mx, my, size);
  77. if(mx || my)
  78. dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
  79. }
  80. if (mx > xmin && mx < xmax &&
  81. my > ymin && my < ymax) {
  82. int bx=2*mx, by=2*my;
  83. int d= dmin;
  84. CHECK_HALF_MV(1, 1, mx-1, my-1)
  85. CHECK_HALF_MV(0, 1, mx , my-1)
  86. CHECK_HALF_MV(1, 1, mx , my-1)
  87. CHECK_HALF_MV(1, 0, mx-1, my )
  88. CHECK_HALF_MV(1, 0, mx , my )
  89. CHECK_HALF_MV(1, 1, mx-1, my )
  90. CHECK_HALF_MV(0, 1, mx , my )
  91. CHECK_HALF_MV(1, 1, mx , my )
  92. assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
  93. *mx_ptr = bx;
  94. *my_ptr = by;
  95. }else{
  96. *mx_ptr =2*mx;
  97. *my_ptr =2*my;
  98. }
  99. return dmin;
  100. }
  101. #else
  102. static int hpel_motion_search(MpegEncContext * s,
  103. int *mx_ptr, int *my_ptr, int dmin,
  104. int src_index, int ref_index,
  105. int size, int h)
  106. {
  107. MotionEstContext * const c= &s->me;
  108. const int mx = *mx_ptr;
  109. const int my = *my_ptr;
  110. const int penalty_factor= c->sub_penalty_factor;
  111. me_cmp_func cmp_sub, chroma_cmp_sub;
  112. int bx=2*mx, by=2*my;
  113. LOAD_COMMON
  114. int flags= c->sub_flags;
  115. //FIXME factorize
  116. cmp_sub= s->dsp.me_sub_cmp[size];
  117. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  118. if(c->skip){ //FIXME move out of hpel?
  119. *mx_ptr = 0;
  120. *my_ptr = 0;
  121. return dmin;
  122. }
  123. if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
  124. dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
  125. if(mx || my || size>0)
  126. dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
  127. }
  128. if (mx > xmin && mx < xmax &&
  129. my > ymin && my < ymax) {
  130. int d= dmin;
  131. const int index= (my<<ME_MAP_SHIFT) + mx;
  132. const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
  133. + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
  134. const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
  135. + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
  136. const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
  137. + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
  138. const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
  139. + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
  140. #if 1
  141. int key;
  142. int map_generation= c->map_generation;
  143. #ifndef NDEBUG
  144. uint32_t *map= c->map;
  145. #endif
  146. key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
  147. assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
  148. key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
  149. assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
  150. key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
  151. assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
  152. key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
  153. assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
  154. #endif
  155. if(t<=b){
  156. CHECK_HALF_MV(0, 1, mx ,my-1)
  157. if(l<=r){
  158. CHECK_HALF_MV(1, 1, mx-1, my-1)
  159. if(t+r<=b+l){
  160. CHECK_HALF_MV(1, 1, mx , my-1)
  161. }else{
  162. CHECK_HALF_MV(1, 1, mx-1, my )
  163. }
  164. CHECK_HALF_MV(1, 0, mx-1, my )
  165. }else{
  166. CHECK_HALF_MV(1, 1, mx , my-1)
  167. if(t+l<=b+r){
  168. CHECK_HALF_MV(1, 1, mx-1, my-1)
  169. }else{
  170. CHECK_HALF_MV(1, 1, mx , my )
  171. }
  172. CHECK_HALF_MV(1, 0, mx , my )
  173. }
  174. }else{
  175. if(l<=r){
  176. if(t+l<=b+r){
  177. CHECK_HALF_MV(1, 1, mx-1, my-1)
  178. }else{
  179. CHECK_HALF_MV(1, 1, mx , my )
  180. }
  181. CHECK_HALF_MV(1, 0, mx-1, my)
  182. CHECK_HALF_MV(1, 1, mx-1, my)
  183. }else{
  184. if(t+r<=b+l){
  185. CHECK_HALF_MV(1, 1, mx , my-1)
  186. }else{
  187. CHECK_HALF_MV(1, 1, mx-1, my)
  188. }
  189. CHECK_HALF_MV(1, 0, mx , my)
  190. CHECK_HALF_MV(1, 1, mx , my)
  191. }
  192. CHECK_HALF_MV(0, 1, mx , my)
  193. }
  194. assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
  195. }
  196. *mx_ptr = bx;
  197. *my_ptr = by;
  198. return dmin;
  199. }
  200. #endif
  201. static int no_sub_motion_search(MpegEncContext * s,
  202. int *mx_ptr, int *my_ptr, int dmin,
  203. int src_index, int ref_index,
  204. int size, int h)
  205. {
  206. (*mx_ptr)<<=1;
  207. (*my_ptr)<<=1;
  208. return dmin;
  209. }
  210. inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
  211. int ref_index, int size, int h, int add_rate)
  212. {
  213. // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
  214. MotionEstContext * const c= &s->me;
  215. const int penalty_factor= c->mb_penalty_factor;
  216. const int flags= c->mb_flags;
  217. const int qpel= flags & FLAG_QPEL;
  218. const int mask= 1+2*qpel;
  219. me_cmp_func cmp_sub, chroma_cmp_sub;
  220. int d;
  221. LOAD_COMMON
  222. //FIXME factorize
  223. cmp_sub= s->dsp.mb_cmp[size];
  224. chroma_cmp_sub= s->dsp.mb_cmp[size+1];
  225. // assert(!c->skip);
  226. // assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
  227. d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
  228. //FIXME check cbp before adding penalty for (0,0) vector
  229. if(add_rate && (mx || my || size>0))
  230. d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
  231. return d;
  232. }
  233. #define CHECK_QUARTER_MV(dx, dy, x, y)\
  234. {\
  235. const int hx= 4*(x)+(dx);\
  236. const int hy= 4*(y)+(dy);\
  237. d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  238. d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
  239. COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
  240. }
  241. static int qpel_motion_search(MpegEncContext * s,
  242. int *mx_ptr, int *my_ptr, int dmin,
  243. int src_index, int ref_index,
  244. int size, int h)
  245. {
  246. MotionEstContext * const c= &s->me;
  247. const int mx = *mx_ptr;
  248. const int my = *my_ptr;
  249. const int penalty_factor= c->sub_penalty_factor;
  250. const int map_generation= c->map_generation;
  251. const int subpel_quality= c->avctx->me_subpel_quality;
  252. uint32_t *map= c->map;
  253. me_cmp_func cmpf, chroma_cmpf;
  254. me_cmp_func cmp_sub, chroma_cmp_sub;
  255. LOAD_COMMON
  256. int flags= c->sub_flags;
  257. cmpf= s->dsp.me_cmp[size];
  258. chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
  259. //FIXME factorize
  260. cmp_sub= s->dsp.me_sub_cmp[size];
  261. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  262. if(c->skip){ //FIXME somehow move up (benchmark)
  263. *mx_ptr = 0;
  264. *my_ptr = 0;
  265. return dmin;
  266. }
  267. if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
  268. dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
  269. if(mx || my || size>0)
  270. dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
  271. }
  272. if (mx > xmin && mx < xmax &&
  273. my > ymin && my < ymax) {
  274. int bx=4*mx, by=4*my;
  275. int d= dmin;
  276. int i, nx, ny;
  277. const int index= (my<<ME_MAP_SHIFT) + mx;
  278. const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
  279. const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
  280. const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
  281. const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
  282. const int c= score_map[(index )&(ME_MAP_SIZE-1)];
  283. int best[8];
  284. int best_pos[8][2];
  285. memset(best, 64, sizeof(int)*8);
  286. #if 1
  287. if(s->me.dia_size>=2){
  288. const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  289. const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  290. const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  291. const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  292. for(ny= -3; ny <= 3; ny++){
  293. for(nx= -3; nx <= 3; nx++){
  294. //FIXME this could overflow (unlikely though)
  295. const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
  296. const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
  297. const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
  298. int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
  299. int i;
  300. if((nx&3)==0 && (ny&3)==0) continue;
  301. score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
  302. // if(nx&1) score-=1024*c->penalty_factor;
  303. // if(ny&1) score-=1024*c->penalty_factor;
  304. for(i=0; i<8; i++){
  305. if(score < best[i]){
  306. memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
  307. memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
  308. best[i]= score;
  309. best_pos[i][0]= nx + 4*mx;
  310. best_pos[i][1]= ny + 4*my;
  311. break;
  312. }
  313. }
  314. }
  315. }
  316. }else{
  317. int tl;
  318. //FIXME this could overflow (unlikely though)
  319. const int cx = 4*(r - l);
  320. const int cx2= r + l - 2*c;
  321. const int cy = 4*(b - t);
  322. const int cy2= b + t - 2*c;
  323. int cxy;
  324. if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
  325. tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  326. }else{
  327. tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
  328. }
  329. cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
  330. assert(16*cx2 + 4*cx + 32*c == 32*r);
  331. assert(16*cx2 - 4*cx + 32*c == 32*l);
  332. assert(16*cy2 + 4*cy + 32*c == 32*b);
  333. assert(16*cy2 - 4*cy + 32*c == 32*t);
  334. assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
  335. for(ny= -3; ny <= 3; ny++){
  336. for(nx= -3; nx <= 3; nx++){
  337. //FIXME this could overflow (unlikely though)
  338. int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
  339. int i;
  340. if((nx&3)==0 && (ny&3)==0) continue;
  341. score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
  342. // if(nx&1) score-=32*c->penalty_factor;
  343. // if(ny&1) score-=32*c->penalty_factor;
  344. for(i=0; i<8; i++){
  345. if(score < best[i]){
  346. memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
  347. memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
  348. best[i]= score;
  349. best_pos[i][0]= nx + 4*mx;
  350. best_pos[i][1]= ny + 4*my;
  351. break;
  352. }
  353. }
  354. }
  355. }
  356. }
  357. for(i=0; i<subpel_quality; i++){
  358. nx= best_pos[i][0];
  359. ny= best_pos[i][1];
  360. CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
  361. }
  362. #if 0
  363. const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  364. const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  365. const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  366. const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  367. // if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
  368. if(tl<br){
  369. // nx= FFMAX(4*mx - bx, bx - 4*mx);
  370. // ny= FFMAX(4*my - by, by - 4*my);
  371. static int stats[7][7], count;
  372. count++;
  373. stats[4*mx - bx + 3][4*my - by + 3]++;
  374. if(256*256*256*64 % count ==0){
  375. for(i=0; i<49; i++){
  376. if((i%7)==0) printf("\n");
  377. printf("%6d ", stats[0][i]);
  378. }
  379. printf("\n");
  380. }
  381. }
  382. #endif
  383. #else
  384. CHECK_QUARTER_MV(2, 2, mx-1, my-1)
  385. CHECK_QUARTER_MV(0, 2, mx , my-1)
  386. CHECK_QUARTER_MV(2, 2, mx , my-1)
  387. CHECK_QUARTER_MV(2, 0, mx , my )
  388. CHECK_QUARTER_MV(2, 2, mx , my )
  389. CHECK_QUARTER_MV(0, 2, mx , my )
  390. CHECK_QUARTER_MV(2, 2, mx-1, my )
  391. CHECK_QUARTER_MV(2, 0, mx-1, my )
  392. nx= bx;
  393. ny= by;
  394. for(i=0; i<8; i++){
  395. int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
  396. int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
  397. CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
  398. }
  399. #endif
  400. #if 0
  401. //outer ring
  402. CHECK_QUARTER_MV(1, 3, mx-1, my-1)
  403. CHECK_QUARTER_MV(1, 2, mx-1, my-1)
  404. CHECK_QUARTER_MV(1, 1, mx-1, my-1)
  405. CHECK_QUARTER_MV(2, 1, mx-1, my-1)
  406. CHECK_QUARTER_MV(3, 1, mx-1, my-1)
  407. CHECK_QUARTER_MV(0, 1, mx , my-1)
  408. CHECK_QUARTER_MV(1, 1, mx , my-1)
  409. CHECK_QUARTER_MV(2, 1, mx , my-1)
  410. CHECK_QUARTER_MV(3, 1, mx , my-1)
  411. CHECK_QUARTER_MV(3, 2, mx , my-1)
  412. CHECK_QUARTER_MV(3, 3, mx , my-1)
  413. CHECK_QUARTER_MV(3, 0, mx , my )
  414. CHECK_QUARTER_MV(3, 1, mx , my )
  415. CHECK_QUARTER_MV(3, 2, mx , my )
  416. CHECK_QUARTER_MV(3, 3, mx , my )
  417. CHECK_QUARTER_MV(2, 3, mx , my )
  418. CHECK_QUARTER_MV(1, 3, mx , my )
  419. CHECK_QUARTER_MV(0, 3, mx , my )
  420. CHECK_QUARTER_MV(3, 3, mx-1, my )
  421. CHECK_QUARTER_MV(2, 3, mx-1, my )
  422. CHECK_QUARTER_MV(1, 3, mx-1, my )
  423. CHECK_QUARTER_MV(1, 2, mx-1, my )
  424. CHECK_QUARTER_MV(1, 1, mx-1, my )
  425. CHECK_QUARTER_MV(1, 0, mx-1, my )
  426. #endif
  427. assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
  428. *mx_ptr = bx;
  429. *my_ptr = by;
  430. }else{
  431. *mx_ptr =4*mx;
  432. *my_ptr =4*my;
  433. }
  434. return dmin;
  435. }
  436. #define CHECK_MV(x,y)\
  437. {\
  438. const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
  439. const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
  440. assert((x) >= xmin);\
  441. assert((x) <= xmax);\
  442. assert((y) >= ymin);\
  443. assert((y) <= ymax);\
  444. /*printf("check_mv %d %d\n", x, y);*/\
  445. if(map[index]!=key){\
  446. d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  447. map[index]= key;\
  448. score_map[index]= d;\
  449. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
  450. /*printf("score:%d\n", d);*/\
  451. COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
  452. }\
  453. }
  454. #define CHECK_CLIPED_MV(ax,ay)\
  455. {\
  456. const int Lx= ax;\
  457. const int Ly= ay;\
  458. const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
  459. const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
  460. CHECK_MV(Lx2, Ly2)\
  461. }
  462. #define CHECK_MV_DIR(x,y,new_dir)\
  463. {\
  464. const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
  465. const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
  466. /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
  467. if(map[index]!=key){\
  468. d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  469. map[index]= key;\
  470. score_map[index]= d;\
  471. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
  472. /*printf("score:%d\n", d);*/\
  473. if(d<dmin){\
  474. best[0]=x;\
  475. best[1]=y;\
  476. dmin=d;\
  477. next_dir= new_dir;\
  478. }\
  479. }\
  480. }
  481. #define check(x,y,S,v)\
  482. if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
  483. if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
  484. if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
  485. if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
  486. #define LOAD_COMMON2\
  487. uint32_t *map= c->map;\
  488. const int qpel= flags&FLAG_QPEL;\
  489. const int shift= 1+qpel;\
  490. static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
  491. int src_index, int ref_index, int const penalty_factor,
  492. int size, int h, int flags)
  493. {
  494. MotionEstContext * const c= &s->me;
  495. me_cmp_func cmpf, chroma_cmpf;
  496. int next_dir=-1;
  497. LOAD_COMMON
  498. LOAD_COMMON2
  499. int map_generation= c->map_generation;
  500. cmpf= s->dsp.me_cmp[size];
  501. chroma_cmpf= s->dsp.me_cmp[size+1];
  502. { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
  503. const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
  504. const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
  505. if(map[index]!=key){ //this will be executed only very rarey
  506. score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
  507. map[index]= key;
  508. }
  509. }
  510. for(;;){
  511. int d;
  512. const int dir= next_dir;
  513. const int x= best[0];
  514. const int y= best[1];
  515. next_dir=-1;
  516. //printf("%d", dir);
  517. if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
  518. if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
  519. if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
  520. if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
  521. if(next_dir==-1){
  522. return dmin;
  523. }
  524. }
  525. }
  526. static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
  527. int src_index, int ref_index, int const penalty_factor,
  528. int size, int h, int flags)
  529. {
  530. MotionEstContext * const c= &s->me;
  531. me_cmp_func cmpf, chroma_cmpf;
  532. int dia_size;
  533. LOAD_COMMON
  534. LOAD_COMMON2
  535. int map_generation= c->map_generation;
  536. cmpf= s->dsp.me_cmp[size];
  537. chroma_cmpf= s->dsp.me_cmp[size+1];
  538. for(dia_size=1; dia_size<=4; dia_size++){
  539. int dir;
  540. const int x= best[0];
  541. const int y= best[1];
  542. if(dia_size&(dia_size-1)) continue;
  543. if( x + dia_size > xmax
  544. || x - dia_size < xmin
  545. || y + dia_size > ymax
  546. || y - dia_size < ymin)
  547. continue;
  548. for(dir= 0; dir<dia_size; dir+=2){
  549. int d;
  550. CHECK_MV(x + dir , y + dia_size - dir);
  551. CHECK_MV(x + dia_size - dir, y - dir );
  552. CHECK_MV(x - dir , y - dia_size + dir);
  553. CHECK_MV(x - dia_size + dir, y + dir );
  554. }
  555. if(x!=best[0] || y!=best[1])
  556. dia_size=0;
  557. #if 0
  558. {
  559. int dx, dy, i;
  560. static int stats[8*8];
  561. dx= FFABS(x-best[0]);
  562. dy= FFABS(y-best[1]);
  563. if(dy>dx){
  564. dx^=dy; dy^=dx; dx^=dy;
  565. }
  566. stats[dy*8 + dx] ++;
  567. if(256*256*256*64 % (stats[0]+1)==0){
  568. for(i=0; i<64; i++){
  569. if((i&7)==0) printf("\n");
  570. printf("%8d ", stats[i]);
  571. }
  572. printf("\n");
  573. }
  574. }
  575. #endif
  576. }
  577. return dmin;
  578. }
  579. static int hex_search(MpegEncContext * s, int *best, int dmin,
  580. int src_index, int ref_index, int const penalty_factor,
  581. int size, int h, int flags, int dia_size)
  582. {
  583. MotionEstContext * const c= &s->me;
  584. me_cmp_func cmpf, chroma_cmpf;
  585. LOAD_COMMON
  586. LOAD_COMMON2
  587. int map_generation= c->map_generation;
  588. int x,y,i,d;
  589. static const int hex[6][2]={{-2, 0}, { 2,0}, {-1,-2}, {1,-2}, {-1,2},{1,2}};
  590. cmpf= s->dsp.me_cmp[size];
  591. chroma_cmpf= s->dsp.me_cmp[size+1];
  592. for(;dia_size; dia_size--){
  593. do{
  594. x= best[0];
  595. y= best[1];
  596. for(i=0; i<6; i++){
  597. CHECK_CLIPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
  598. }
  599. }while(best[0] != x || best[1] != y);
  600. }
  601. do{
  602. x= best[0];
  603. y= best[1];
  604. CHECK_CLIPED_MV(x+1, y);
  605. CHECK_CLIPED_MV(x, y+1);
  606. CHECK_CLIPED_MV(x-1, y);
  607. CHECK_CLIPED_MV(x, y-1);
  608. }while(best[0] != x || best[1] != y);
  609. return dmin;
  610. }
  611. static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
  612. int src_index, int ref_index, int const penalty_factor,
  613. int size, int h, int flags)
  614. {
  615. MotionEstContext * const c= &s->me;
  616. me_cmp_func cmpf, chroma_cmpf;
  617. LOAD_COMMON
  618. LOAD_COMMON2
  619. int map_generation= c->map_generation;
  620. int x,y,i,d, dia_size;
  621. static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
  622. { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
  623. cmpf= s->dsp.me_cmp[size];
  624. chroma_cmpf= s->dsp.me_cmp[size+1];
  625. for(dia_size= c->dia_size&0xFF; dia_size; dia_size--){
  626. do{
  627. x= best[0];
  628. y= best[1];
  629. for(i=0; i<8; i++){
  630. CHECK_CLIPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
  631. }
  632. }while(best[0] != x || best[1] != y);
  633. }
  634. x= best[0];
  635. y= best[1];
  636. CHECK_CLIPED_MV(x+1, y);
  637. CHECK_CLIPED_MV(x, y+1);
  638. CHECK_CLIPED_MV(x-1, y);
  639. CHECK_CLIPED_MV(x, y-1);
  640. return dmin;
  641. }
  642. static int umh_search(MpegEncContext * s, int *best, int dmin,
  643. int src_index, int ref_index, int const penalty_factor,
  644. int size, int h, int flags)
  645. {
  646. MotionEstContext * const c= &s->me;
  647. me_cmp_func cmpf, chroma_cmpf;
  648. LOAD_COMMON
  649. LOAD_COMMON2
  650. int map_generation= c->map_generation;
  651. int x,y,x2,y2, i, j, d;
  652. const int dia_size= c->dia_size&0xFE;
  653. static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
  654. { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
  655. {-2, 3}, { 0, 4}, { 2, 3},
  656. {-2,-3}, { 0,-4}, { 2,-3},};
  657. cmpf= s->dsp.me_cmp[size];
  658. chroma_cmpf= s->dsp.me_cmp[size+1];
  659. x= best[0];
  660. y= best[1];
  661. for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
  662. CHECK_MV(x2, y);
  663. }
  664. for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
  665. CHECK_MV(x, y2);
  666. }
  667. x= best[0];
  668. y= best[1];
  669. for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
  670. for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
  671. CHECK_MV(x2, y2);
  672. }
  673. }
  674. //FIXME prevent the CLIP stuff
  675. for(j=1; j<=dia_size/4; j++){
  676. for(i=0; i<16; i++){
  677. CHECK_CLIPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
  678. }
  679. }
  680. return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 1);
  681. }
  682. #define SAB_CHECK_MV(ax,ay)\
  683. {\
  684. const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
  685. const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
  686. /*printf("sab check %d %d\n", ax, ay);*/\
  687. if(map[index]!=key){\
  688. d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
  689. map[index]= key;\
  690. score_map[index]= d;\
  691. d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
  692. /*printf("score: %d\n", d);*/\
  693. if(d < minima[minima_count-1].height){\
  694. int j=0;\
  695. \
  696. while(d >= minima[j].height) j++;\
  697. \
  698. memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
  699. \
  700. minima[j].checked= 0;\
  701. minima[j].height= d;\
  702. minima[j].x= ax;\
  703. minima[j].y= ay;\
  704. \
  705. i=-1;\
  706. continue;\
  707. }\
  708. }\
  709. }
  710. #define MAX_SAB_SIZE ME_MAP_SIZE
  711. static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
  712. int src_index, int ref_index, int const penalty_factor,
  713. int size, int h, int flags)
  714. {
  715. MotionEstContext * const c= &s->me;
  716. me_cmp_func cmpf, chroma_cmpf;
  717. Minima minima[MAX_SAB_SIZE];
  718. const int minima_count= FFABS(c->dia_size);
  719. int i, j;
  720. LOAD_COMMON
  721. LOAD_COMMON2
  722. int map_generation= c->map_generation;
  723. cmpf= s->dsp.me_cmp[size];
  724. chroma_cmpf= s->dsp.me_cmp[size+1];
  725. for(j=i=0; i<ME_MAP_SIZE; i++){
  726. uint32_t key= map[i];
  727. key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
  728. if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
  729. assert(j<MAX_SAB_SIZE); //max j = number of predictors
  730. minima[j].height= score_map[i];
  731. minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
  732. minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
  733. minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
  734. minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
  735. minima[j].checked=0;
  736. if(minima[j].x || minima[j].y)
  737. minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
  738. j++;
  739. }
  740. qsort(minima, j, sizeof(Minima), minima_cmp);
  741. for(; j<minima_count; j++){
  742. minima[j].height=256*256*256*64;
  743. minima[j].checked=0;
  744. minima[j].x= minima[j].y=0;
  745. }
  746. for(i=0; i<minima_count; i++){
  747. const int x= minima[i].x;
  748. const int y= minima[i].y;
  749. int d;
  750. if(minima[i].checked) continue;
  751. if( x >= xmax || x <= xmin
  752. || y >= ymax || y <= ymin)
  753. continue;
  754. SAB_CHECK_MV(x-1, y)
  755. SAB_CHECK_MV(x+1, y)
  756. SAB_CHECK_MV(x , y-1)
  757. SAB_CHECK_MV(x , y+1)
  758. minima[i].checked= 1;
  759. }
  760. best[0]= minima[0].x;
  761. best[1]= minima[0].y;
  762. dmin= minima[0].height;
  763. if( best[0] < xmax && best[0] > xmin
  764. && best[1] < ymax && best[1] > ymin){
  765. int d;
  766. //ensure that the refernece samples for hpel refinement are in the map
  767. CHECK_MV(best[0]-1, best[1])
  768. CHECK_MV(best[0]+1, best[1])
  769. CHECK_MV(best[0], best[1]-1)
  770. CHECK_MV(best[0], best[1]+1)
  771. }
  772. return dmin;
  773. }
  774. static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
  775. int src_index, int ref_index, int const penalty_factor,
  776. int size, int h, int flags)
  777. {
  778. MotionEstContext * const c= &s->me;
  779. me_cmp_func cmpf, chroma_cmpf;
  780. int dia_size;
  781. LOAD_COMMON
  782. LOAD_COMMON2
  783. int map_generation= c->map_generation;
  784. cmpf= s->dsp.me_cmp[size];
  785. chroma_cmpf= s->dsp.me_cmp[size+1];
  786. for(dia_size=1; dia_size<=c->dia_size; dia_size++){
  787. int dir, start, end;
  788. const int x= best[0];
  789. const int y= best[1];
  790. start= FFMAX(0, y + dia_size - ymax);
  791. end = FFMIN(dia_size, xmax - x + 1);
  792. for(dir= start; dir<end; dir++){
  793. int d;
  794. //check(x + dir,y + dia_size - dir,0, a0)
  795. CHECK_MV(x + dir , y + dia_size - dir);
  796. }
  797. start= FFMAX(0, x + dia_size - xmax);
  798. end = FFMIN(dia_size, y - ymin + 1);
  799. for(dir= start; dir<end; dir++){
  800. int d;
  801. //check(x + dia_size - dir, y - dir,0, a1)
  802. CHECK_MV(x + dia_size - dir, y - dir );
  803. }
  804. start= FFMAX(0, -y + dia_size + ymin );
  805. end = FFMIN(dia_size, x - xmin + 1);
  806. for(dir= start; dir<end; dir++){
  807. int d;
  808. //check(x - dir,y - dia_size + dir,0, a2)
  809. CHECK_MV(x - dir , y - dia_size + dir);
  810. }
  811. start= FFMAX(0, -x + dia_size + xmin );
  812. end = FFMIN(dia_size, ymax - y + 1);
  813. for(dir= start; dir<end; dir++){
  814. int d;
  815. //check(x - dia_size + dir, y + dir,0, a3)
  816. CHECK_MV(x - dia_size + dir, y + dir );
  817. }
  818. if(x!=best[0] || y!=best[1])
  819. dia_size=0;
  820. #if 0
  821. {
  822. int dx, dy, i;
  823. static int stats[8*8];
  824. dx= FFABS(x-best[0]);
  825. dy= FFABS(y-best[1]);
  826. stats[dy*8 + dx] ++;
  827. if(256*256*256*64 % (stats[0]+1)==0){
  828. for(i=0; i<64; i++){
  829. if((i&7)==0) printf("\n");
  830. printf("%6d ", stats[i]);
  831. }
  832. printf("\n");
  833. }
  834. }
  835. #endif
  836. }
  837. return dmin;
  838. }
  839. static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
  840. int src_index, int ref_index, int const penalty_factor,
  841. int size, int h, int flags){
  842. MotionEstContext * const c= &s->me;
  843. if(c->dia_size==-1)
  844. return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  845. else if(c->dia_size<-1)
  846. return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  847. else if(c->dia_size<2)
  848. return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  849. else if(c->dia_size>768)
  850. return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  851. else if(c->dia_size>512)
  852. return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
  853. else if(c->dia_size>256)
  854. return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  855. else
  856. return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  857. }
  858. static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
  859. int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
  860. int ref_mv_scale, int flags, int size, int h)
  861. {
  862. MotionEstContext * const c= &s->me;
  863. int best[2]={0, 0};
  864. int d, dmin;
  865. int map_generation;
  866. int penalty_factor;
  867. const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
  868. const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
  869. me_cmp_func cmpf, chroma_cmpf;
  870. LOAD_COMMON
  871. LOAD_COMMON2
  872. if(c->pre_pass){
  873. penalty_factor= c->pre_penalty_factor;
  874. cmpf= s->dsp.me_pre_cmp[size];
  875. chroma_cmpf= s->dsp.me_pre_cmp[size+1];
  876. }else{
  877. penalty_factor= c->penalty_factor;
  878. cmpf= s->dsp.me_cmp[size];
  879. chroma_cmpf= s->dsp.me_cmp[size+1];
  880. }
  881. map_generation= update_map_generation(c);
  882. assert(cmpf);
  883. dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
  884. map[0]= map_generation;
  885. score_map[0]= dmin;
  886. /* first line */
  887. if (s->first_slice_line) {
  888. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  889. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  890. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  891. }else{
  892. if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
  893. && ( P_LEFT[0] |P_LEFT[1]
  894. |P_TOP[0] |P_TOP[1]
  895. |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
  896. *mx_ptr= 0;
  897. *my_ptr= 0;
  898. c->skip=1;
  899. return dmin;
  900. }
  901. CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
  902. CHECK_CLIPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
  903. CHECK_CLIPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
  904. CHECK_CLIPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
  905. CHECK_CLIPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
  906. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  907. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  908. CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
  909. CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
  910. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  911. }
  912. if(dmin>h*h*4){
  913. if(c->pre_pass){
  914. CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
  915. (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
  916. if(!s->first_slice_line)
  917. CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  918. (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  919. }else{
  920. CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
  921. (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
  922. if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
  923. CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  924. (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  925. }
  926. }
  927. if(c->avctx->last_predictor_count){
  928. const int count= c->avctx->last_predictor_count;
  929. const int xstart= FFMAX(0, s->mb_x - count);
  930. const int ystart= FFMAX(0, s->mb_y - count);
  931. const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
  932. const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
  933. int mb_y;
  934. for(mb_y=ystart; mb_y<yend; mb_y++){
  935. int mb_x;
  936. for(mb_x=xstart; mb_x<xend; mb_x++){
  937. const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
  938. int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
  939. int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
  940. if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
  941. CHECK_MV(mx,my)
  942. }
  943. }
  944. }
  945. //check(best[0],best[1],0, b0)
  946. dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  947. //check(best[0],best[1],0, b1)
  948. *mx_ptr= best[0];
  949. *my_ptr= best[1];
  950. // printf("%d %d %d \n", best[0], best[1], dmin);
  951. return dmin;
  952. }
  953. //this function is dedicated to the braindamaged gcc
  954. inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
  955. int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
  956. int ref_mv_scale, int size, int h)
  957. {
  958. MotionEstContext * const c= &s->me;
  959. //FIXME convert other functions in the same way if faster
  960. if(c->flags==0 && h==16 && size==0){
  961. return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
  962. // case FLAG_QPEL:
  963. // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
  964. }else{
  965. return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
  966. }
  967. }
  968. static int epzs_motion_search4(MpegEncContext * s,
  969. int *mx_ptr, int *my_ptr, int P[10][2],
  970. int src_index, int ref_index, int16_t (*last_mv)[2],
  971. int ref_mv_scale)
  972. {
  973. MotionEstContext * const c= &s->me;
  974. int best[2]={0, 0};
  975. int d, dmin;
  976. int map_generation;
  977. const int penalty_factor= c->penalty_factor;
  978. const int size=1;
  979. const int h=8;
  980. const int ref_mv_stride= s->mb_stride;
  981. const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
  982. me_cmp_func cmpf, chroma_cmpf;
  983. LOAD_COMMON
  984. int flags= c->flags;
  985. LOAD_COMMON2
  986. cmpf= s->dsp.me_cmp[size];
  987. chroma_cmpf= s->dsp.me_cmp[size+1];
  988. map_generation= update_map_generation(c);
  989. dmin = 1000000;
  990. //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
  991. /* first line */
  992. if (s->first_slice_line) {
  993. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  994. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  995. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  996. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  997. }else{
  998. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  999. //FIXME try some early stop
  1000. CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
  1001. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  1002. CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
  1003. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  1004. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  1005. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  1006. }
  1007. if(dmin>64*4){
  1008. CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
  1009. (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
  1010. if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
  1011. CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  1012. (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  1013. }
  1014. dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  1015. *mx_ptr= best[0];
  1016. *my_ptr= best[1];
  1017. // printf("%d %d %d \n", best[0], best[1], dmin);
  1018. return dmin;
  1019. }
  1020. //try to merge with above FIXME (needs PSNR test)
  1021. static int epzs_motion_search2(MpegEncContext * s,
  1022. int *mx_ptr, int *my_ptr, int P[10][2],
  1023. int src_index, int ref_index, int16_t (*last_mv)[2],
  1024. int ref_mv_scale)
  1025. {
  1026. MotionEstContext * const c= &s->me;
  1027. int best[2]={0, 0};
  1028. int d, dmin;
  1029. int map_generation;
  1030. const int penalty_factor= c->penalty_factor;
  1031. const int size=0; //FIXME pass as arg
  1032. const int h=8;
  1033. const int ref_mv_stride= s->mb_stride;
  1034. const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
  1035. me_cmp_func cmpf, chroma_cmpf;
  1036. LOAD_COMMON
  1037. int flags= c->flags;
  1038. LOAD_COMMON2
  1039. cmpf= s->dsp.me_cmp[size];
  1040. chroma_cmpf= s->dsp.me_cmp[size+1];
  1041. map_generation= update_map_generation(c);
  1042. dmin = 1000000;
  1043. //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
  1044. /* first line */
  1045. if (s->first_slice_line) {
  1046. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  1047. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  1048. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  1049. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  1050. }else{
  1051. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  1052. //FIXME try some early stop
  1053. CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
  1054. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  1055. CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
  1056. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  1057. CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
  1058. (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
  1059. }
  1060. if(dmin>64*4){
  1061. CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
  1062. (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
  1063. if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
  1064. CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
  1065. (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
  1066. }
  1067. dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
  1068. *mx_ptr= best[0];
  1069. *my_ptr= best[1];
  1070. // printf("%d %d %d \n", best[0], best[1], dmin);
  1071. return dmin;
  1072. }